Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
// Second, look through our selectors looking for potential authors.
author = extractFromSelectors($, AUTHOR_SELECTORS, 2);
if (author && author.length < AUTHOR_MAX_LENGTH) {
return cleanAuthor(author);
}
// Last, use our looser regular-expression based selectors for
// potential authors.
// eslint-disable-next-line no-restricted-syntax
for (const [selector, regex] of BYLINE_SELECTORS_RE) {
const node = $(selector);
if (node.length === 1) {
const text = node.text();
if (regex.test(text)) {
return cleanAuthor(text);
}
}
}
return null;
},
};
extract({ $, metaCache }) {
let author;
// First, check to see if we have a matching
// meta tag that we can make use of.
author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);
if (author && author.length < AUTHOR_MAX_LENGTH) {
return cleanAuthor(author);
}
// Second, look through our selectors looking for potential authors.
author = extractFromSelectors($, AUTHOR_SELECTORS, 2);
if (author && author.length < AUTHOR_MAX_LENGTH) {
return cleanAuthor(author);
}
// Last, use our looser regular-expression based selectors for
// potential authors.
// eslint-disable-next-line no-restricted-syntax
for (const [selector, regex] of BYLINE_SELECTORS_RE) {
const node = $(selector);
if (node.length === 1) {
const text = node.text();
if (regex.test(text)) {
extract({ $, metaCache }) {
let author;
// First, check to see if we have a matching
// meta tag that we can make use of.
author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);
if (author && author.length < AUTHOR_MAX_LENGTH) {
return cleanAuthor(author);
}
// Second, look through our selectors looking for potential authors.
author = extractFromSelectors($, AUTHOR_SELECTORS, 2);
if (author && author.length < AUTHOR_MAX_LENGTH) {
return cleanAuthor(author);
}
// Last, use our looser regular-expression based selectors for
// potential authors.
// eslint-disable-next-line no-restricted-syntax
for (const [selector, regex] of BYLINE_SELECTORS_RE) {
const node = $(selector);
if (node.length === 1) {
const text = node.text();
if (regex.test(text)) {
return cleanAuthor(text);
}
}
}
return null;
);
if (topScore > 0) {
cleanUrl = cleanImage(topUrl);
if (cleanUrl) return cleanUrl;
}
// If nothing else worked, check to see if there are any really
// probable nodes in the doc, like .
// eslint-disable-next-line no-restricted-syntax
for (const selector of LEAD_IMAGE_URL_SELECTORS) {
const $node = $(selector).first();
const src = $node.attr('src');
if (src) {
cleanUrl = cleanImage(src);
if (cleanUrl) return cleanUrl;
}
const href = $node.attr('href');
if (href) {
cleanUrl = cleanImage(href);
if (cleanUrl) return cleanUrl;
}
const value = $node.attr('value');
if (value) {
cleanUrl = cleanImage(value);
if (cleanUrl) return cleanUrl;
}
}
.prepend(html);
}
// Check to see if we have a matching meta tag that we can make use of.
// Moving this higher because common practice is now to use large
// images on things like Open Graph or Twitter cards.
// images usually have for things like Open Graph.
const imageUrl = extractFromMeta(
$,
LEAD_IMAGE_URL_META_TAGS,
metaCache,
false
);
if (imageUrl) {
cleanUrl = cleanImage(imageUrl);
if (cleanUrl) return cleanUrl;
}
// Next, try to find the "best" image via the content.
// We'd rather not have to fetch each image and check dimensions,
// so try to do some analysis and determine them instead.
const $content = $(content);
const imgs = $('img', $content).toArray();
const imgScores = {};
imgs.forEach((img, index) => {
const $img = $(img);
const src = $img.attr('src');
if (!src) return;
score += scoreAttr($img);
score += scoreByParents($img);
score += scoreBySibling($img);
score += scoreByDimensions($img);
score += scoreByPosition(imgs, index);
imgScores[src] = score;
});
const [topUrl, topScore] = Reflect.ownKeys(imgScores).reduce(
(acc, key) => (imgScores[key] > acc[1] ? [key, imgScores[key]] : acc),
[null, 0]
);
if (topScore > 0) {
cleanUrl = cleanImage(topUrl);
if (cleanUrl) return cleanUrl;
}
// If nothing else worked, check to see if there are any really
// probable nodes in the doc, like .
// eslint-disable-next-line no-restricted-syntax
for (const selector of LEAD_IMAGE_URL_SELECTORS) {
const $node = $(selector).first();
const src = $node.attr('src');
if (src) {
cleanUrl = cleanImage(src);
if (cleanUrl) return cleanUrl;
}
const href = $node.attr('href');
}
// If nothing else worked, check to see if there are any really
// probable nodes in the doc, like .
// eslint-disable-next-line no-restricted-syntax
for (const selector of LEAD_IMAGE_URL_SELECTORS) {
const $node = $(selector).first();
const src = $node.attr('src');
if (src) {
cleanUrl = cleanImage(src);
if (cleanUrl) return cleanUrl;
}
const href = $node.attr('href');
if (href) {
cleanUrl = cleanImage(href);
if (cleanUrl) return cleanUrl;
}
const value = $node.attr('value');
if (value) {
cleanUrl = cleanImage(value);
if (cleanUrl) return cleanUrl;
}
}
return null;
},
};
datePublished = extractFromMeta(
$,
DATE_PUBLISHED_META_TAGS,
metaCache,
false
);
if (datePublished) return cleanDatePublished(datePublished);
// Second, look through our selectors looking for potential
// date_published's.
datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);
if (datePublished) return cleanDatePublished(datePublished);
// Lastly, look to see if a dately string exists in the URL
datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);
if (datePublished) return cleanDatePublished(datePublished);
return null;
},
};
let datePublished;
// First, check to see if we have a matching meta tag
// that we can make use of.
// Don't try cleaning tags from this string
datePublished = extractFromMeta(
$,
DATE_PUBLISHED_META_TAGS,
metaCache,
false
);
if (datePublished) return cleanDatePublished(datePublished);
// Second, look through our selectors looking for potential
// date_published's.
datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);
if (datePublished) return cleanDatePublished(datePublished);
// Lastly, look to see if a dately string exists in the URL
datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);
if (datePublished) return cleanDatePublished(datePublished);
return null;
},
};
extract({ $, url, metaCache }) {
let datePublished;
// First, check to see if we have a matching meta tag
// that we can make use of.
// Don't try cleaning tags from this string
datePublished = extractFromMeta(
$,
DATE_PUBLISHED_META_TAGS,
metaCache,
false
);
if (datePublished) return cleanDatePublished(datePublished);
// Second, look through our selectors looking for potential
// date_published's.
datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);
if (datePublished) return cleanDatePublished(datePublished);
// Lastly, look to see if a dately string exists in the URL
datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);
if (datePublished) return cleanDatePublished(datePublished);
return null;
},
};