Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Apify.main(async () => {
// Get input of the actor (here only for demonstration purposes).
// If you'd like to have your input checked and have Apify display
// a user interface for it, add INPUT_SCHEMA.json file to your actor.
// For more information, see https://apify.com/docs/actor/input-schema
const input = await Apify.getInput();
console.log('Input:');
console.dir(input);
if (!input || !input.sources) throw new Error('Input must be a JSON object with the "sources" field!');
const requestList = await Apify.openRequestList('my-request-list', input.sources);
// Create a basic crawler that will use request-promise to download
// web pages from a given list of URLs
const basicCrawler = new Apify.BasicCrawler({
requestList,
handleRequestFunction: async ({ request }) => {
await Apify.pushData({
request,
finishedAt: new Date(),
html: await rp(request.url),
'#debug': Apify.utils.createRequestDebugInfo(request),
});
},
handleFailedRequestFunction: async ({ request }) => {
await Apify.pushData({
async _initializeAsync() {
// RequestList
const startUrls = this.input.startUrls.map((req) => {
req.useExtendedUniqueKey = true;
req.keepUrlFragment = this.input.keepUrlFragments;
return req;
});
this.requestList = await Apify.openRequestList('PUPPETEER_SCRAPER', startUrls);
// RequestQueue if selected
if (this.input.useRequestQueue) this.requestQueue = await Apify.openRequestQueue();
// Dataset
this.dataset = await Apify.openDataset();
const { itemsCount } = await this.dataset.getInfo();
this.pagesOutputted = itemsCount || 0;
// KeyValueStore
this.keyValueStore = await Apify.openKeyValueStore();
}
async _initializeAsync() {
// RequestList
const startUrls = this.input.startUrls.map((req) => {
req.useExtendedUniqueKey = true;
req.keepUrlFragment = this.input.keepUrlFragments;
return req;
});
this.requestList = await Apify.openRequestList('WEB_SCRAPER', startUrls);
// RequestQueue if selected
if (this.input.useRequestQueue) this.requestQueue = await Apify.openRequestQueue();
// Dataset
this.dataset = await Apify.openDataset();
const { itemsCount } = await this.dataset.getInfo();
this.pagesOutputted = itemsCount || 0;
// KeyValueStore
this.keyValueStore = await Apify.openKeyValueStore();
}
async _initializeAsync() {
// RequestList
const startUrls = this.input.startUrls.map((req) => {
req.useExtendedUniqueKey = true;
req.keepUrlFragment = this.input.keepUrlFragments;
return req;
});
this.requestList = await Apify.openRequestList('CHEERIO_SCRAPER', startUrls);
// RequestQueue if selected
if (this.input.useRequestQueue) this.requestQueue = await Apify.openRequestQueue();
// Dataset
this.dataset = await Apify.openDataset();
const { itemsCount } = await this.dataset.getInfo();
this.pagesOutputted = itemsCount || 0;
// KeyValueStore
this.keyValueStore = await Apify.openKeyValueStore();
}