Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
// a user interface for it, add INPUT_SCHEMA.json file to your actor.
// For more information, see https://apify.com/docs/actor/input-schema
const input = await Apify.getInput();
console.log('Input:');
console.dir(input);
// Do something useful here...
// Save output
const output = {
receivedInput: input,
message: 'Hello sir!',
};
console.log('Output:');
console.dir(output);
await Apify.setValue('OUTPUT', output);
});
if(settingMinMaxPrice && !settingPropertyType){
await setMinMaxPrice(page, input, requestQueue);
}
// If filtering is enabled, enqueue necessary pages.
if(input.useFilters && !filtered){
console.log('enqueuing filtered pages...');
await enqueueLinks(page, requestQueue, '.filterelement', null, 'page', fixUrl('&', input), async link => {
const lText = await getAttribute(link, 'textContent');
return lText + '_' + 0;
});
}
if (enqueuingReady && input.simple) { // If simple output is enough, extract the data.
console.log('extracting data...');
await Apify.setValue('page.html', await page.content(), {contentType: 'text/html'});
await Apify.utils.puppeteer.injectJQuery(page);
const result = await page.evaluate(listPageFunction, input);
console.log('Found ' + result.length + ' results');
if (result.length > 0) {
const toBeAdded = [];
for (const item of result) {
item.url = addUrlParameters(item.url, input);
if (!state.crawled[item.name]) {
toBeAdded.push(item);
state.crawled[item.name] = true;
}
}
if (migrating) { await Apify.setValue('STATE', state); }
if (toBeAdded.length > 0) { await Apify.pushData(toBeAdded); }
}
} else if (enqueuingReady) { // If not, enqueue the detail pages to be extracted.
exports.saveSnapshot = async (page) => {
// Throttle snapshots.
const now = Date.now();
if (now - lastSnapshotTimestamp < SNAPSHOT.TIMEOUT_SECS * 1000) {
log.warning(`Aborting saveSnapshot(). It can only be invoked once in ${SNAPSHOT.TIMEOUT_SECS} secs to prevent database overloading.`);
return;
}
lastSnapshotTimestamp = now;
const htmlP = page.content();
const screenshotP = page.screenshot();
const [html, screenshot] = await Promise.all([htmlP, screenshotP]);
await Promise.all([
Apify.setValue(SNAPSHOT.KEYS.HTML, html, { contentType: 'text/html' }),
Apify.setValue(SNAPSHOT.KEYS.SCREENSHOT, screenshot, { contentType: 'image/png' }),
]);
};
export const setValue = async ({ key, body, contentType, recursion = 0 }) => {
const uuid = uuidv4();
const opts = contentType ? { contentType } : undefined;
if (contentType) opts.contentType = contentType;
logInfo(`Saving value ${key}`);
const promise = Apify
.setValue(key, body, opts)
.then(() => {
delete pendingSetValues[uuid];
})
// TODO: this might be removed as we added backoff for network errors ...
.catch((err) => {
if (recursion > SET_VALUE_MAX_REPEATS) {
logError(`Cannot set value ${key} in iteration ${recursion}, giving up`, err);
return;
}
logError(`Cannot set value ${key} in iteration ${recursion}, trying once again`, err);
setTimeout(() => setValue({ key, body, contentType, recursion: recursion + 1 }), 15 * 1000);
});
pendingSetValues[uuid] = promise;
async writeOutput() {
const allData = this.finishedData.length ? [...this.finishedData, this.fields] : this.fields;
const data = JSON.stringify(allData, null, 2);
try {
await Apify.setValue('OUTPUT', data, { contentType: 'application/json' });
if (this.fields.crawler) this.fields.outputFinished = true;
} catch (error) {
console.error('could not save output');
console.error(error);
}
}
async setValue(...args) {
return Apify.setValue(...args);
}
if (now - lastSnapshotTimestamp < SNAPSHOT.TIMEOUT_SECS * 1000) {
log.warning('Aborting saveSnapshot(). It can only be invoked once '
+ `in ${SNAPSHOT.TIMEOUT_SECS} secs to prevent database overloading.`);
return;
}
lastSnapshotTimestamp = now;
if ($) {
await Apify.setValue(SNAPSHOT.KEYS.HTML, $.html(), { contentType: 'text/html' });
}
if (page) {
const htmlP = page.content();
const screenshotP = page.screenshot();
const [html, screenshot] = await Promise.all([htmlP, screenshotP]);
await Promise.all([
Apify.setValue(SNAPSHOT.KEYS.HTML, html, { contentType: 'text/html' }),
Apify.setValue(SNAPSHOT.KEYS.SCREENSHOT, screenshot, { contentType: 'image/png' }),
]);
}
};
log.warning('Aborting saveSnapshot(). It can only be invoked once '
+ `in ${SNAPSHOT.TIMEOUT_SECS} secs to prevent database overloading.`);
return;
}
lastSnapshotTimestamp = now;
if ($) {
await Apify.setValue(SNAPSHOT.KEYS.HTML, $.html(), { contentType: 'text/html' });
}
if (page) {
const htmlP = page.content();
const screenshotP = page.screenshot();
const [html, screenshot] = await Promise.all([htmlP, screenshotP]);
await Promise.all([
Apify.setValue(SNAPSHOT.KEYS.HTML, html, { contentType: 'text/html' }),
Apify.setValue(SNAPSHOT.KEYS.SCREENSHOT, screenshot, { contentType: 'image/png' }),
]);
}
};