Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Apify.main(async () => {
// Get input of the actor (here only for demonstration purposes).
// If you'd like to have your input checked and have Apify display
// a user interface for it, add INPUT_SCHEMA.json file to your actor.
// For more information, see https://apify.com/docs/actor/input-schema
const input = await Apify.getInput();
console.log('Input:');
console.dir(input);
if (!input || !input.url) throw new Error('Input must be a JSON object with the "url" field!');
console.log('Launching Puppeteer...');
const browser = await Apify.launchPuppeteer();
console.log(`Opening page ${input.url}...`);
const page = await browser.newPage();
await page.goto(input.url);
const title = await page.title();
console.log(`Title of the page "${input.url}" is "${title}".`);
console.log('Saving output...');
await Apify.setValue('OUTPUT', {
Apify.main(async () => {
// Read the actor input configuration containing the URLs for the screenshot.
// By convention, the input is present in the actor's default key-value store under the "INPUT" key.
const input = await Apify.getInput();
if (!input) throw new Error('Have you passed the correct INPUT ?');
const { sources } = input;
const requestList = new Apify.RequestList({ sources });
await requestList.initialize();
const crawler = new Apify.PuppeteerCrawler({
requestList,
handlePageFunction: async ({ page, request }) => {
console.log(`Processing ${request.url}...`);
// This is a Puppeteer function that takes a screenshot of the page and returns its buffer.
const screenshotBuffer = await page.screenshot();
// The record key may only include the following characters: a-zA-Z0-9!-_.'()
Apify.main(async () => {
// Get input of the actor.
// If you'd like to have your input checked and have Apify display
// a user interface for it, add INPUT_SCHEMA.json file to your actor.
// For more information, see https://apify.com/docs/actor/input-schema
const input = await Apify.getInput();
console.log('Input:');
console.dir(input);
// Do something useful here...
// Save output
const output = {
receivedInput: input,
message: 'Hello sir!',
};
console.log('Output:');
console.dir(output);
await Apify.setValue('OUTPUT', output);
});
Apify.main(async () => {
// Get input of the actor (here only for demonstration purposes).
// If you'd like to have your input checked and have Apify display
// a user interface for it, add INPUT_SCHEMA.json file to your actor.
// For more information, see https://apify.com/docs/actor/input-schema
const input = await Apify.getInput();
console.log('Input:');
console.dir(input);
// Open a request queue and add a start URL to it
const requestQueue = await Apify.openRequestQueue();
await requestQueue.addRequest({ url: 'https://www.iana.org/' });
// Define a pattern of URLs that the crawler should visit
const pseudoUrls = [new Apify.PseudoUrl('https://www.iana.org/[.*]')];
// Create a crawler that will use headless Chrome / Puppeteer to extract data
// from pages and recursively add links to newly-found pages
const crawler = new Apify.PuppeteerCrawler({
requestQueue,
// This function is called for every page the crawler visits
Apify.main(async () => {
// Get input of the actor (here only for demonstration purposes).
// If you'd like to have your input checked and have Apify display
// a user interface for it, add INPUT_SCHEMA.json file to your actor.
// For more information, see https://apify.com/docs/actor/input-schema
const input = await Apify.getInput();
console.log('Input:');
console.dir(input);
if (!input || !input.sources) throw new Error('Input must be a JSON object with the "sources" field!');
const requestList = await Apify.openRequestList('my-request-list', input.sources);
// Create a basic crawler that will use request-promise to download
// web pages from a given list of URLs
const basicCrawler = new Apify.BasicCrawler({
requestList,
handleRequestFunction: async ({ request }) => {
await Apify.pushData({
request,
finishedAt: new Date(),
html: await rp(request.url),
Apify.main(async () => {
log.debug('Reading INPUT.');
const input = await Apify.getInput();
if (!input) throw new Error('INPUT cannot be empty!');
// Get crawler setup and startup options.
log.info('Configuring Web Scraper.');
const setup = new CrawlerSetup(input);
const crawler = await setup.createCrawler();
log.info('Configuration completed. Starting the scrape.');
await crawler.run();
log.info('Web Scraper finished.');
});
Apify.main(async () => {
log.debug('Reading INPUT.');
const input = await Apify.getInput();
if (!input) throw new Error('INPUT cannot be empty!');
// Get crawler setup and startup options.
log.info('Configuring Cheerio Scraper.');
const setup = new CrawlerSetup(input);
const crawler = await setup.createCrawler();
log.info('Configuration completed. Starting the scrape.');
await crawler.run();
log.info('Cheerio Scraper finished.');
});
Apify.main(async () => {
log.debug('Reading INPUT.');
const input = await Apify.getInput();
if (!input) throw new Error('INPUT cannot be empty!');
// Get crawler setup and startup options.
log.info('Configuring Puppeteer Scraper.');
const setup = new CrawlerSetup(input);
const crawler = await setup.createCrawler();
log.info('Configuration completed. Starting the scrape.');
await crawler.run();
log.info('Puppeteer Scraper finished.');
});
Apify.main(async () => {
const input = await Apify.getInput();
console.log(`My test input: ${input.test}`);
await Apify.setValue('OUTPUT', { foo: 'bar' });
console.log('Done.');
});