Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
page.on('request', onRequest);
console.log(` * goto: ${urlToVisit}`);
const status = await page.goto(urlToVisit, {
timeout: 120000,
waitUntil: 'networkidle2',
});
const pageUrl = page.url();
if (pageUrl !== urlToVisit) {
console.log(` > ${page.url()}`);
}
// We do not collect URLs unless we are on the home-page
if (status.ok && url === undefined) {
const domainOfPage = getDomain(pageUrl);
const urlsOnPage = await page.evaluate(() => [...document.querySelectorAll('a')].map(a => a.href).filter(Boolean));
const sameDomainUrls = urlsOnPage.filter(
href => href
&& (href.startsWith('https://')
|| href.startsWith('http://')
|| href.startsWith('ws://')
|| href.startsWith('wss://'))
&& getDomain(href) === domainOfPage,
);
return [...new Set(sameDomainUrls)];
}
} catch (ex) {
console.log(`Could not fetch: ${urlToVisit}`, ex);
} finally {
await page.removeAllListeners('request');
await page.close();
const { cpt, sourceUrl, url } = request;
if (cpt === 'document' || url.length > 200) {
return;
}
const t0 = Date.now();
const { exception, filter } = engine.match({
cpt: cpt.toLowerCase(),
sourceUrl,
url,
});
const total = Date.now() - t0;
const sourceDomain = getDomain(sourceUrl);
if (total > 5) {
console.log('SLOW', total, cpt, sourceUrl, url.slice(0, 25), '...');
slowRequests.push(request);
}
if (filter !== undefined && !networkFilters.has(filter.rawLine)) {
networkFilters.add(filter.rawLine);
// console.log('> f', removeExtraHostnames(filter));
addFilter(sourceDomain, removeExtraHostnames(filter));
}
if (exception !== undefined && !networkFilters.has(exception.rawLine)) {
networkFilters.add(exception.rawLine);
// console.log('> e', removeExtraHostnames(exception));
addFilter(sourceDomain, removeExtraHostnames(exception));
href => href
&& (href.startsWith('https://')
|| href.startsWith('http://')
|| href.startsWith('ws://')
|| href.startsWith('wss://'))
&& getDomain(href) === domainOfPage,
);