How to use the natural.TfIdf function in natural

To help you get started, we’ve selected a few natural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nodejitsu / handbook / index.js View on Github external
function frequency(content) {
  var tfidf = new natural.TfIdf(),
      processed = [],
      words = [];

  // Add the current content.
  content = content.toLowerCase();
  tfidf.addDocument(content);

  tokenizer.tokenize(content).forEach(function wordFrequency(word) {
    // Return early if word is processed, to short or only a number.
    if (+word || word.length < 3 || ~processed.indexOf(word)) return;

    words.push({
      word: word,
      score: tfidf.tfidf(word, 0)
    });
github nodejitsu / handbook / index.js View on Github external
function frequency(content) {
  var tfidf = new natural.TfIdf(),
      processed = [],
      words = [];

  // Add the current content.
  content = content.toLowerCase();
  tfidf.addDocument(content);

  tokenizer.tokenize(content).forEach(function wordFrequency(word) {
    // Return early if word is processed, to short or only a number.
    if (+word || word.length < 3 || ~processed.indexOf(word)) return;

    words.push({
      word: word,
      score: tfidf.tfidf(word, 0)
    });
github JDvorak / Matilda.js / test3.js View on Github external
var process = function(docs, n, percentage) {
	var processedDocs = {};
			 wordBag      = {},
				 vocab     = {},
				 words   = [],
				   cut  = 0,
				 	 len = 0,
					   n = 0;

	tfidf  = new natural.TfIdf;

	for (d in docs) {
		wordBags[n] = natural.PorterStemmer.tokenizeAndStem(d);
		tfidf.addDocument(wordBags[n]);

		len = tfidf.listTerms(n).length-1;
		cut = (1-percentage)/2;
		words = tfidf.listTerms(n).slice(cut, (len-cut));
		
		for (w in words) {
			if (!!vocab[words[w].term]) continue;
			vocab[words[w].term] = {word: words[w].term, count: 0};
		}

		n = 0;
github mix / schenkerian / lib / analyze.js View on Github external
function wordAnalysis(title, content) {
  let graph = gramophone.extract([title, content].join(' '), {
    score: true,
    stopWords: commonWordsArray,
    stem: true,
    limit: 20
  })
  let tfidf = new TfIdf()
  tfidf.addDocument([title, content].join(' '))
  let tfGraph = graph.map(item =>
    _.merge({
      score: tfidf.tfidf(item.term, 0)
    }, item)
  )
  tfGraph = _.filter(tfGraph, item => item.term !== '')

  return {
    totalWords: content.split(' ').length,
    relevance: tfGraph
  }
}
github fergiemcdowall / norch / norch-lib.js View on Github external
facetValues = doc[facets[0]];
  }

  for (fieldKey in doc) {
    if( Object.prototype.toString.call(doc[fieldKey]) === '[object Array]' ) {
      value['fields'][fieldKey] = doc[fieldKey];
    } else {
      value['fields'][fieldKey] = doc[fieldKey].substring(0, maxStringFieldLength);
    }
  }

  for (fieldKey in doc) {
    if (indexMetaDataGlobal['indexedFieldNames'].indexOf(fieldKey) == -1) {
      indexMetaDataGlobal['indexedFieldNames'].push(fieldKey);
    }
    tfidf = new TfIdf();
    tfidf.addDocument(doc[fieldKey], fieldKey + '~' + id);
    docVector = tfidf.documents[tfidf.documents.length - 1];
    var highestFrequencyCount = 0;
    for (var k in docVector) {
      if (docVector[k] > highestFrequencyCount)
        highestFrequencyCount = docVector[k];
    }
    var deleteKeys = [];
    for (var k in docVector) {
      if (k != '__key') {
        var facetIndexKey = ['NO~FACETING'];
        for (var l = 0; l < facets.length; l++) {
          if (doc[facets[l]]) {
            var thisFacetValue = doc[facets[l]];
            for (var m = 0; m < thisFacetValue.length; m++) {
              facetIndexKey.push(facets[l] + '~' + thisFacetValue[m]);

natural

General natural language (tokenizing, stemming (English, Russian, Spanish), part-of-speech tagging, sentiment analysis, classification, inflection, phonetics, tfidf, WordNet, jaro-winkler, Levenshtein distance, Dice's Coefficient) facilities for node.

MIT
Latest version published 1 month ago

Package Health Score

98 / 100
Full package analysis