Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Apify.main(async () => {
// Launch web browser.
const browser = await Apify.launchPuppeteer();
// Load http://goldengatebridge75.org/news/webcam.html and get an IFRAME with the webcam stream
console.log('Opening web page...');
const page = await browser.newPage();
await page.goto('http://goldengatebridge75.org/news/webcam.html');
const iframe = (await page.frames()).pop();
// Get webcam image element handle.
const imageElementHandle = await iframe.$('.VideoColm img');
// Give the webcam image some time to load.
console.log('Waiting for page to load...');
await Apify.utils.sleep(3000);
// Get a screenshot of that image.
const imageBuffer = await imageElementHandle.screenshot();
console.log('Screenshot captured.');
// Save the screenshot as the actor's output. By convention, similarly to "INPUT",
// the actor's output is stored in the default key-value store under the "OUTPUT" key.
await Apify.setValue('OUTPUT', imageBuffer, { contentType: 'image/jpeg' });
console.log('Actor finished.');
});
// from being saved to dataset. It will just contain
// the relevant metadata.
let result = pageFunctionResult || {};
// Validate the result.
const type = typeof result;
if (type !== 'object') {
throw new Error(`Page function must return Object | Object[], but it returned ${type}.`);
}
// Metadata need to be appended to each item
// to match results with dataset "lines".
if (!Array.isArray(result)) result = [result];
const meta = {
'#error': isError,
'#debug': Apify.utils.createRequestDebugInfo(request, response),
};
return result.map(item => Object.assign({}, item, meta));
};
/**
* This example demonstrates how to use [`CheerioCrawler`](../api/cheeriocrawler)
* to crawl a list of URLs from an external file,
* load each URL using a plain HTTP request, parse the HTML using <a href="https://www.npmjs.com/package/cheerio">cheerio</a>
* and extract some data from it: the page title and all H1 tags.
*
* To run this example on the Apify Platform, select the `Node.js 10 on Alpine Linux (apify/actor-node-basic)` base image
* on the source tab of your actor configuration.
*/
const Apify = require('apify');
// Apify.utils contains various utilities, e.g. for logging.
// Here we turn off the logging of unimportant messages.
const { log } = Apify.utils;
log.setLevel(log.LEVELS.WARNING);
// A link to a list of Fortune 500 companies' websites available on GitHub.
const CSV_LINK = 'https://gist.githubusercontent.com/hrbrmstr/ae574201af3de035c684/raw/f1000.csv';
// Apify.main() function wraps the crawler logic (it is optional).
Apify.main(async () => {
// Create an instance of the RequestList class that contains a list of URLs to crawl.
// Here we download and parse the list of URLs from an external file.
const requestList = new Apify.RequestList({
sources: [{ requestsFromUrl: CSV_LINK }],
});
await requestList.initialize();
// Create an instance of the CheerioCrawler class - a crawler
// that automatically loads the URLs and parses their HTML using the cheerio library.
handlePageFunction: async ({ request, page }) => {
const title = await page.title();
console.log(`Title of ${request.url}: ${title}`);
await Apify.pushData({
title,
'#debug': Apify.utils.createRequestDebugInfo(request),
});
await Apify.utils.enqueueLinks({ page, selector: 'a', pseudoUrls, requestQueue });
},
handleFailedRequestFunction: async ({ request }) => {
console.log(`Request ${request.url} failed too many times`);
await Apify.pushData({
'#debug': Apify.utils.createRequestDebugInfo(request),
});
},
});
handleFailedRequestFunction: async ({ request }) => {
console.log(`Request ${request.url} failed too many times`);
await Apify.pushData({
'#debug': Apify.utils.createRequestDebugInfo(request),
});
},
async _handleLinks($, request) {
if (!(this.input.linkSelector && this.requestQueue)) return;
const currentDepth = request.userData[META_KEY].depth;
const hasReachedMaxDepth = this.input.maxCrawlingDepth && currentDepth >= this.input.maxCrawlingDepth;
if (hasReachedMaxDepth) {
log.debug(`Request ${request.url} reached the maximum crawling depth of ${currentDepth}.`);
return;
}
await Apify.utils.enqueueLinks({
$,
selector: this.input.linkSelector,
pseudoUrls: this.input.pseudoUrls,
requestQueue: this.requestQueue,
baseUrl: request.loadedUrl,
transformRequestFunction: (requestOptions) => {
requestOptions.userData = {
[META_KEY]: {
parentRequestId: request.id || request.uniqueKey,
depth: currentDepth + 1,
},
};
requestOptions.useExtendedUniqueKey = true;
requestOptions.keepUrlFragment = this.input.keepUrlFragments;
return requestOptions;
},
handleRequestFunction: async ({ request }) => {
await Apify.pushData({
request,
finishedAt: new Date(),
html: await rp(request.url),
'#debug': Apify.utils.createRequestDebugInfo(request),
});
},