Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
console.log('enqueuing pagination pages...');
const pageSelector = '.bui-pagination__list a:not([aria-current])';
const countSelector = '.sorth1, .sr_header h1, .sr_header h2';
try{
await page.waitForSelector(pageSelector, {timeout: 60000});
const pageElem = await page.$(pageSelector);
const pageUrl = await getAttribute(pageElem, 'href');
await page.waitForSelector(countSelector);
const countElem = await page.$(countSelector);
const countData = (await getAttribute(countElem, 'textContent')).replace(/\.|,|\s/g, '').match(/\d+/);
if(countData){
const count = Math.ceil(parseInt(countData[0])/20);
console.log('pagination pages: ' + count);
for(let i = 0; i < count; i++){
const newUrl = pageUrl.replace(/rows=(\d+)/, 'rows=20').replace(/offset=(\d+)/, 'offset=' + 20*i);
await requestQueue.addRequest(new Apify.Request({
url: addUrlParameters(newUrl, input),
//url: baseUrl + '&rows=20&offset=' + 20*i,
userData: {label: 'page'}
}));
}
}
}
catch(e){
console.log(e);
await Apify.setValue('count_error.html', await page.content(), {contentType: 'text/html'});
}
}
}
// If property type is enabled, enqueue necessary page.
if(settingPropertyType){
Apify.main(async () => {
// Get queue and enqueue first url.
const requestQueue = await Apify.openRequestQueue();
// Enqueue Start url.
await requestQueue.addRequest(new Apify.Request({ url: 'https://news.ycombinator.com/' }));
// Create crawler.
const crawler = new Apify.PuppeteerCrawler({
requestQueue,
// This page is executed for each request.
// If request failes then it's retried 3 times.
// Parameter page is Puppeteers page object with loaded page.
handlePageFunction: async ({ page, request }) => {
console.log(`Processing ${request.url}...`);
// Extract all posts.
const pageFunction = ($posts) => {
const data = [];
$posts.forEach(($post) => {
title: $(el).find('.title a').text(),
rank: $(el).find('.rank').text(),
href: $(el).find('.title a').attr('href'),
});
});
// Save data.
await Apify.pushData(data);
// Enqueue next page.
const $moreLink = $('.morelink');
if ($moreLink.length) {
const path = $moreLink.attr('href')
const url = `https://news.ycombinator.com/${path}`;
await requestQueue.addRequest(new Apify.Request({ url }));
} else {
console.log(`Url ${request.url} is the last page!`);
}
},
it('should work', async () => {
const page = await browser.newPage();
await page.setContent(PAGE_CONTENT);
const linkSelector = 'a';
const pseudoUrls = [
{ purl: 'https://example.com[.*]' },
];
let id = 0;
const requestQueue = await Apify.openRequestQueue();
requestQueue.requests = [];
requestQueue.addRequest = function (request) {
requestQueue.requests.push(request);
return { requestId: `some-${++id}` };
};
const request = new Apify.Request({ id: 'parent', url: 'https://www.example.com' });
tools.ensureMetaData(request);
await tools.enqueueLinks(page, linkSelector, pseudoUrls, requestQueue, request);
expect(requestQueue.requests).to.have.lengthOf(3);
requestQueue.requests.forEach((r) => {
expect(r.userData[META_KEY].depth).to.be.eql(1);
expect(r.userData[META_KEY].parentRequestId).to.be.eql('parent');
expect(r.userData[META_KEY].childRequestIds).to.be.eql({});
});
const children = Object.keys(request.userData[META_KEY].childRequestIds);
expect(children).to.have.lengthOf(3);
children.forEach(c => expect(/^some-[123]$/.test(c)).to.be.eql(true));
});
});
it('should work', () => {
const request = new Apify.Request({ url: 'https://www.example.com' });
tools.ensureMetaData(request);
expect(request.userData[META_KEY]).to.be.an('object');
const meta = request.userData[META_KEY];
expect(meta.depth).to.be.eql(0);
expect(meta.parentRequestId).to.be.eql(null);
expect(meta.childRequestIds).to.be.eql({});
});
});
module.exports.setMinMaxPrice = async (page, input, requestQueue) => {
console.log('enqueuing min-max price page...');
const urlMod = fixUrl('&', input);
const fPrices = await (await page.$$('.filteroptions'))[0].$$('.filterelement');
const index = pLabels.indexOf(input.minMaxPrice);
const label = await (fPrices[index]).$('.filter_label');
const fText = await getAttribute(label, 'textContent');
console.log('Using filter: ' + fText);
const href = await getAttribute(fPrices[index], 'href');
await requestQueue.addRequest(new Apify.Request({
userData: { label: 'page' },
url: urlMod(href),
uniqueKey: fText + '_' + 0,
}));
};