Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
// We're getting the title, rank and URL of each post on Hacker News.
$posts.forEach(($post) => {
data.push({
title: $post.querySelector('.title a').innerText,
rank: $post.querySelector('.rank').innerText,
href: $post.querySelector('.title a').href,
});
});
return data;
};
const data = await page.$$eval('.athing', pageFunction);
// Store the results to the default dataset.
await Apify.pushData(data);
// Find a link to the next page and enqueue it if it exists.
const infos = await Apify.utils.enqueueLinks({
page,
requestQueue,
selector: '.morelink',
});
if (infos.length === 0) console.log(`${request.url} is the last page!`);
},
console.log('extracting data...');
await Apify.setValue('page.html', await page.content(), {contentType: 'text/html'});
await Apify.utils.puppeteer.injectJQuery(page);
const result = await page.evaluate(listPageFunction, input);
console.log('Found ' + result.length + ' results');
if (result.length > 0) {
const toBeAdded = [];
for (const item of result) {
item.url = addUrlParameters(item.url, input);
if (!state.crawled[item.name]) {
toBeAdded.push(item);
state.crawled[item.name] = true;
}
}
if (migrating) { await Apify.setValue('STATE', state); }
if (toBeAdded.length > 0) { await Apify.pushData(toBeAdded); }
}
} else if (enqueuingReady) { // If not, enqueue the detail pages to be extracted.
console.log('enqueuing detail pages...');
//await enqueueLinks(page, requestQueue, '.hotel_name_link', null, 'detail',
// fixUrl('&', input), (link) => getAttribute(link, 'textContent'));
const urlMod = fixUrl('&', input);
const keyMod = (link) => getAttribute(link, 'textContent');
const prItem = await page.$('.bui-pagination__info');
const pageRange = (await getAttribute(prItem, 'textContent')).match(/\d+/g);
const firstItem = parseInt(pageRange[0]);
const links = await page.$$('.hotel_name_link');
for (let iLink = 0; iLink < links.length; iLink++) {
const link = links[iLink];
const href = await getAttribute(link, 'href');
if (href) {
await requestQueue.addRequest(new Apify.Request({
// Submit the form and wait for full load of next page
console.log('Submit search form');
await Promise.all([
page.waitForNavigation(),
page.click('#adv_code_search button[type="submit"]')
]);
// Obtain and print list of search results
const results = await page.$$eval('div.codesearch-results ul.repo-list li h3 a', nodes => nodes.map(node => ({
url: node.href,
name: node.innerText
})));
console.log('Results:', results);
// Store data in default dataset
await Apify.pushData(results);
});
const pageUrl = await page.url();
if (!input.startUrls && pageUrl.indexOf('label') < 0) {
await retireBrowser();
return;
}
// Exit if core data is not present ot the rating is too low.
if (!ld || (ld.aggregateRating && ld.aggregateRating.ratingValue <= (input.minScore || 0))) {
return;
}
// Extract the data.
console.log('extracting detail...');
const detail = await extractDetail(page, ld, input, request.userData);
console.log('detail extracted');
await Apify.pushData(detail);
return;
} else { // Handle hotel list page.
const filtered = await isFiltered(page);
const settingFilters = input.useFilters && !filtered;
const settingMinMaxPrice = input.minMaxPrice != 'none' && !await isMinMaxPriceSet(page, input);
const settingPropertyType = input.propertyType != 'none' && !await isPropertyTypeSet(page, input);
const enqueuingReady = !(settingFilters || settingMinMaxPrice || settingPropertyType);
// Check if the page was open through working proxy.
const pageUrl = await page.url();
if (!input.startUrls && pageUrl.indexOf(sortBy) < 0) {
await retireBrowser();
return;
}
} else {
await Apify.pushData({
status: 'No sellers for this keyword.',
keyword: request.userData.keyword,
});
}
} else if (type === 'RESULT') {
if (input.maxResults) {
if (await checkSaveCount(datasetId, input.maxResults) === true) {
await Apify.pushData(item);
} else {
console.log('Finished');
process.exit(0);
}
} else {
await Apify.pushData(item);
}
}
}
async _handleResult(request, response, pageFunctionResult, isError) {
const start = process.hrtime();
const payload = tools.createDatasetPayload(request, response, pageFunctionResult, isError);
await Apify.pushData(payload);
this.pagesOutputted++;
tools.logPerformance(request, 'handleResult EXECUTION', start);
}
async _handleResult(request, response, pageFunctionResult, isError) {
const payload = tools.createDatasetPayload(request, response, pageFunctionResult, isError);
await Apify.pushData(payload);
this.pagesOutputted++;
}
}