How to use the natural.NGrams function in natural

To help you get started, we’ve selected a few natural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zafarali / minerva-bot / plugins / training / train_query_classify.js View on Github external
var natural = require('natural')
var csv = require('fast-csv')
var fs = require('fs')
var path = require('path')
var jsonfile = require('jsonfile')
natural.LancasterStemmer.attach();

var query_classify = new natural.BayesClassifier();
var both_streams_ended = false;
var NGrams = natural.NGrams;


console.log('Non-queries');
var stream = fs.createReadStream(path.resolve('./plugins/training', 'conv_bot_db_augmented.csv'))
	.pipe(csv.parse())
	.on('readable', function(){
		var row;
		while(null !== (row = stream.read())){
			// console.log('unstemmed:',row);
			// console.log('stemmed:',)
			// console.log('->added document:',row[0])
			var bgrams1 = NGrams.ngrams(row[0].toLowerCase(), 2, null, '[end]')
			// var bgrams1 = NGrams.bigrams(row[0].toLowerCase());
			for (var i = 0; i < bgrams1.length; i++) {
				query_classify.addDocument(bgrams1[i], 'no');			
			};
github erelsgl / nlu-server / classifiers.js View on Github external
if (!('sentences' in sample))
	   throw new Error("for some reason sentences not in sample")

	if (!_.isArray(sample['sentences']))
		sample['sentences'] = [sample['sentences']]

	var tokens = _.compact(_.flatten(_.pluck(sample['sentences'], 'tokens')))

	var words = []
	_.each(tokens, function(token, key, list){
		words.push(token.word.toLowerCase())
	}, this)

	// var feature = natural.NGrams.ngrams(words, 1).concat(natural.NGrams.ngrams(words, 2))
	var feature = natural.NGrams.ngrams(words, 1)

	_.each(feature, function(value, key, list){
		features[value] = 1
	}, this)

	console.log("feAsyncPrimitive: train: "+train+" FEATURES: "+JSON.stringify(features, null, 4))
	callback(null, features)
}
github zafarali / minerva-bot / plugins / help / index.js View on Github external
var natural = require('natural')
// var WORDS = require('../WORDS.js').WORDS;
var utils = require('../../utils.js');
var chat_builders = require('../../chat_utils.js').builders;
natural.LancasterStemmer.attach();
var NGrams = natural.NGrams;
var WORDS = require('../WORDS.js').WORDS

function help_me(context){
	// checks if user input contains a greeting
	
	if(context.postback){
		// we have receieved a deep link from somewhere else.
		if(context.postback.substr(0,7) === 'help@'){
			set_user_needs_help(context)
			return context
		}
	}

	var tokenized_input = context.current_query.tokenizeAndStem();
	for (var i = 0; i < tokenized_input.length; i++) {
github telegram-ru / jobs-bot / deduplicator / minhash.js View on Github external
'use strict';
/*
 * From: https://github.com/sjhorn/node-minhash
 * rus: https://habrahabr.ru/post/250673/
 * Changelog:
 *  - added exports
 *  - updated to es6
 */

const maxShingleID = Math.pow(2, 32-1);
const nextPrime = 4294967311;
const numHashes = 256;

const crc32 = require('crc-32');
const natural = require('natural');
const NGrams = natural.NGrams;

/*
// Initially generated coefficients as shown below.
var fs = require('fs');
var coeffA = pickRandomCoeffs(numHashes);
var coeffB = pickRandomCoeffs(numHashes);
fs.writeFileSync("B.json", JSON.stringify(coeffB));
fs.writeFileSync("A.json", JSON.stringify(coeffA));
*/
const coeffA = [
  1737785533,1765439879,1662630800,1084975228,341414066,232531847,1121392417,2014673063,567934308,1318726960,
  1456120004,3232225,1273779376,1511214936,663223541,604587718,1997407686,2131009365,585324094,780980520,
  1782778798,238365119,1862924389,1675708909,649782163,880863752,910202909,2106981095,919496758,865726573,
  559524446,1096755754,1848180334,1394188713,471023138,762475741,547922281,1187557093,1423463148,550423718,
  1419548119,992336339,2043435375,1229454535,2135156067,1443236097,159293955,875949484,721564628,1252074821,
  1159187374,1495822558,87252092,798798571,1845239353,840224805,1195059557,593936607,255486346,449041957,
github 26medias / context-aware-markov-chains / cmarkov.js View on Github external
var _			= require('underscore');
var fstool		= require('fs-tool');
var natural		= require('natural');
var pstack		= require('pstack');
var md5File		= require('md5-file');
var progressbar = require('progress');
var NGrams		= natural.NGrams;
var pos			= require('pos');
var tbl			= require('cli-table');
var seraph		= require("seraph");
// var Tagger		= require("natural").BrillPOSTagger;
var Tagger      = require("brill-pos-tagger");
var tokenizer	= require("node-tokenizer");
var wlist		= require("./js-weighted-list");

var markov = function(options) {
	this.options	= _.extend({
		db:				'http://localhost:7474',
		name:			'dev',
		depth:			[1,3],
		weight:			1.2,
		depthWeight:	2,
		certainty:		1
github bxjx / gramophone / index.js View on Github external
_.each(_.keys(phrases), function(phrase){
    var ngramToTry, subPhrases;
    ngramToTry = phrase.split(' ').length - 1;

    if (ngramToTry < 1) return;

    _.each(natural.NGrams.ngrams(phrase, ngramToTry), function(ngram){
      var subPhrase = ngram.join(' ');
      if (phrases[subPhrase]){
        if (!cutoff || (phrases[phrase] / phrases[subPhrase]) >= (1 - cutoff)){
          delete combined[subPhrase];
        }
      }
    });
  });
github mark-watson / javascript_intelligent_systems / src / nlp.js View on Github external
console.log("dog dogs Dog dogged".tokenizeAndStem());

var classifier = new natural.BayesClassifier();

classifier.addDocument(economy, 'economy');
classifier.addDocument(politics, 'politics');
classifier.addDocument(sports, 'sports');
classifier.train();

console.log("\n-- Bayesian classifier test results:");

console.log(classifier.classify('The President and Congress went on vacation.'));
console.log(classifier.classify('Tax rates might be effected by quantitative easing.'));
console.log(classifier.classify('I like baseball more than football.'));

var NGrams = natural.NGrams;

console.log("\n-- 2grams in text from file sports.txt:");
console.log(NGrams.bigrams(sports));
console.log("\n-- 3grams in text from file sports.txt:");
console.log(NGrams.trigrams(sports));

var TfIdf = natural.TfIdf,
  tfidf = new TfIdf();

tfidf.addDocument(economy, 'economy');
tfidf.addDocument(politics, 'politics');
tfidf.addDocument(sports, 'sports');

console.log('\n-- tfidf for word "economy" in three test documents:');
console.log('economy:');
tfidf.tfidfs('economy', function(i, measure) {
github erelsgl / nlu-server / utils / bars.js View on Github external
if (!_(sequence).isArray()) {
        sequence = tokenizer.tokenize(sequence);
    }
    
  var output = []

  _(k).times(function(n){
    _.each(sequence, function(value, key, list){ 
      var some = sequence.slice()
      some.splice(key, n+1)
      output = output.concat(natural.NGrams.ngrams(some, ngr, start, end))
    }, this)
  })

  output = output.concat(natural.NGrams.ngrams(sequence, ngr, start, end))

  output = _.map(output, function(num){ return num.join(",") });
  output = _.uniq(output)
  output = _.map(output, function(num){ return num.split(",") });

  return output 
}
github phillro / mongoose-fulltext / lib / search.js View on Github external
/**
 * User: philliprosen
 * Date: 1/1/13
 * Time: 3:10 PM
 */
var natural = require('natural'),
    metaphone = natural.Metaphone,
    NGrams = natural.NGrams,
    TfIdf = natural.TfIdf;

metaphone.attach()

exports.Search = Search;

function Search(key, options) {
    var options = options || {};
    this.key = key;

    this.stopwords=options.stopwords ? options.stopwords : false;
    this.stopwords=options.naturalstop ? natural.stopwords : false;


}
github erelsgl / nlu-server / research / ppdb / modes.js View on Github external
{
	if (_.isArray(keyphrase))
		keyphrase = keyphrase[0]

	var ngr = _.flatten(natural.NGrams.ngrams(keyphrase, 1))
	if ((ngr.length == 1) && (bars.isstopword(keyphrase)))
		return []

	var ngr = _.flatten(natural.NGrams.ngrams(keyphrase, 1))
	if (ngr.indexOf("no") != -1 || ngr.indexOf("not") != -1)
		return [keyphrase]

	var skipgrams = []
	skipgrams = skipgrams.concat(bars.skipgrams(keyphrase, 2, 4)).
						  concat(bars.skipgrams(keyphrase, 3, 4)).
						  concat(natural.NGrams.ngrams(keyphrase, 1))

	skipgrams = _.unique(skipgrams)
	skipgrams = _.map(skipgrams, function(value){ return value.join(" ") });
	skipgrams = _.reject(skipgrams, function(num){ return bars.isstopword(num); });

	if (!_.isArray(skipgrams)) skipgrams = [skipgrams]

	skipgrams = _.sortBy(skipgrams, function(num){ return simpledistance(keyphrase, num) });

	return skipgrams
}

natural

General natural language (tokenizing, stemming (English, Russian, Spanish), part-of-speech tagging, sentiment analysis, classification, inflection, phonetics, tfidf, WordNet, jaro-winkler, Levenshtein distance, Dice's Coefficient) facilities for node.

MIT
Latest version published 1 month ago

Package Health Score

98 / 100
Full package analysis