Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
function cut(str = '', splitBy = ' ', limit = 500) {
return jieba
.extract(str, limit)
.map(({ word }) => word)
.filter((word) => {
if (word.trim() === '') {
return false;
}
if (ignoreWordsHash[word]) {
return false;
}
// 过滤 CDN 图片地址
if (/^TB\w+/.test(word)) {
return false;
}
return true;
})
.join(splitBy);
return docs.concat(files.map(file => {
const text = fs.readFileSync(path.join(srcPath, folder, file), 'utf-8');
const keywords = jieba.extract(text, 15)
.map(item => item.word);
return {
id: parseInt(file),
tag: folder,
keywords,
};
}));
}, []);
exports.tokenize = (content, topCount) => {
return nodejieba.extract(content, topCount || 10)
}
.then(function(result){
var $ = cheerio.load(result.body),
title = $('.question .question-title').length && $('.question .question-title').text()
content = $('.question .content').length && $('.question .content').text();
var wordsArr = jieba.extract(title+content, 10),
tagArr = [];
for(var i=0,len=wordsArr.length;i