Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
module.exports = async (req, res) => {
const { url } = queryParser(req)
if (!url)
return send(res, 400, { error: 'URL parameter missing' })
let { structuredData, text } = req.locals ? req.locals : await parseHtmlFromUrl(url)
// Add a full stop after the end of every line, if there is not one already
text = text.replace(/([^\.])\n/g, "$1.\n")
// Get sentences in text
const sentences = sbdTokenizer.sentences(text, { newline_boundaries: true, html_boundaries: true })
// Build word list
let words = `${structuredData.title} ${structuredData.description} ${structuredData.tags} ${text}`.split(' ')
let keywords = []
getKeywords(words.join(' ')).forEach(word => {
keywords.push({
name: word,
count: 0
})
// wordOccurrences.forEach(wordOccurance => {
// if (wordOccurance.token === word)
// keywords.push({
// name: word,
// count: 0
// })
export function extractTitle(text, maxLen) {
// see https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AWord_Break%3DNewline%3A%5D&g=&i=
const [line] = text.split(/[\u0000-\u001f\u0085\u2028\u2029]/, 1);
if (line.length <= maxLen) {
return trimPeriod(line);
}
const ss = sentences(line);
if (ss[0].length < maxLen) {
return trimPeriod(joinStrings(ss, maxLen));
}
const words = ss[0].split(/\s+/);
if (words[0].length < maxLen) {
return `${joinStrings(words, maxLen - 1)}\u2026`;
}
return `${words[0].substr(0, maxLen - 1)}\u2026`
}
.map(textChunk => {
const tweets = sentences(textChunk)
.reduce(
(acc, sentence) => {
const lastIndex = acc.length - 1
const currentNumber = globalAccLength + acc.length
const nextNumber = currentNumber + 1
const extraNumberWidth = 2 // "/" plus
const nextNumberWidth =
nextNumber.toString().length + extraNumberWidth
const lastTweet = acc[lastIndex]
const concatCandidate = joinSentences(
numbering ? lastTweet || `${currentNumber}/` : lastTweet,
sentence
)
// If the current sentence fits the last tweet.
if (getTweetLength(concatCandidate) <= limit) {
export function createKey($el) {
let key = '';
let len = 6;
let txt = normalizeText($el.textContent || '').replace(/[^\w\. ]+/giu, '');
if (txt && txt.length > 1) {
let lines = sbd
.sentences(txt)
.map(x => trim(x))
.filter(x => x);
if (lines.length) {
let first = lines[0].match(/\S+/gu).slice(0, len / 2);
let last = lines[lines.length - 1].match(/\S+/gu).slice(0, len / 2);
let k = first.concat(last);
let max = k.length > len ? len : k.length;
for (var i = 0; i < max; i++) {
key += k[i].substring(0, 1);
}
}
}
function breakContentIntoSentences(content) {
content.sentences = []
const sentences = sentenceBoundaryDetection.sentences(content.sourceContentSanitized)
sentences.forEach((sentence) => {
content.sentences.push({
text: sentence,
keywords: [],
images: []
})
})
}
export function splitEng(text: string): string[] {
return compact(tokenizer.sentences(text.trim(), optional_options));
}
if (!url)
return send(res, 400, { error: 'URL parameter missing' })
const { structuredData, text } = req.locals ? req.locals : await parseHtmlFromUrl(url)
const trustIndicators = { positive: [], negative: [] }
const quotes = getQuotes(text)
let quotesWithNumbers = []
quotes.forEach(quote => {
if (quote.match(/[0-9]/))
quotesWithNumbers.push(quote)
})
const sentences = sbdTokenizer.sentences(text, { newline_boundaries: true })
let sentencesWithNumbers = []
sentences.forEach(sentence => {
if (sentence.match(/[0-9]/))
sentencesWithNumbers.push(sentence.replace(/\n/g, ' '))
})
if (quotes.length > 1) {
trustIndicators.positive.push({
text: `Multiple quotes cited in article`,
description: 'Articles that contain quotes are useful as quotes can be verified.'
})
} else {
trustIndicators.negative.push({
text: `No quotes cited in article`,
description: 'It is unusual for legitimate news articles not to contain multiple quotes.\nQuotes are useful as they can be verified.'