How to use natural - 10 common examples

To help you get started, we’ve selected a few natural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github erelsgl / nlu-server / classifiers.js View on Github external
var lemmatizer = new Lemmatizer();

var antonyms = {}
//var data = fs.readFileSync("./antonyms.txt", 'utf8').split("\n")

/*_.each(data, function(value, key, list){
        var value1 = value.split(",")
        antonyms[value1[0]] = value1[1]
        antonyms[value1[1]] = value1[0]
}, this)

var old_unused_tokenizer = {tokenize: function(sentence) { return sentence.split(/[ \t,;:.!?]/).filter(function(a){return !!a}); }}

var tokenizer = new natural.RegexpTokenizer({pattern: /[^a-zA-Z0-9\-\?]+/});

console.vlog = function(data) { fs.appendFileSync(log_file, data + '\n', 'utf8') };

// var tokenizer = new natural.WordTokenizer({'pattern':(/(\W+|\%)/)}); // WordTokenizer, TreebankWordTokenizer, WordPunctTokenizer
// var ngrams = new natural.NGrams.ngrams()

// var enhance = function (classifierType, featureExtractor, inputSplitter, featureLookupTable, labelLookupTable, preProcessor, postProcessor, TestSplitLabel, multiplyFeaturesByIDF, featureExpansion, featureExpansionScale, featureExpansionPhrase, featureFine, expansionParam) {
var enhance = function (classifierType, featureExtractor, inputSplitter, featureLookupTable, labelLookupTable, preProcessor, postProcessor, TestSplitLabel, multiplyFeaturesByIDF, featureOptions) {
// var enhance = function (classifierType, featureLookupTable, labelLookupTable) {
	return classifiers.EnhancedClassifier.bind(0, {
		normalizer: normalizer,

		inputSplitter: inputSplitter,

		// featureExpansion: featureExpansion,
github mark-watson / javascript_intelligent_systems / src / nlp.js View on Github external
// Sample data for these examples (coerced to strings):

var economy = ' ' + fs.readFileSync('data/texts/economy.txt');
var politics = ' ' + fs.readFileSync('data/texts/politics.txt');
var sports = ' ' + fs.readFileSync('data/texts/sports.txt');

var natural = require('natural'),
  tokenizer = new natural.WordTokenizer();

console.log("\n-- Tokenized sample text in politics.txt:");

console.log("\n-- Use Porter Stemmer on a single word:");

natural.PorterStemmer.attach();  // add methods to string

console.log("\n-- Use Porter Stemmer text in file sports.txt:");

console.log("dog dogs Dog dogged".tokenizeAndStem());

var classifier = new natural.BayesClassifier();

classifier.addDocument(economy, 'economy');
classifier.addDocument(politics, 'politics');
classifier.addDocument(sports, 'sports');

console.log("\n-- Bayesian classifier test results:");

console.log(classifier.classify('The President and Congress went on vacation.'));
github digitalbiblesociety / browserbible-3 / tools / verse_indexer.js View on Github external
function createHashedIndexFiles(lang, indexPath, indexData, type) {

	var words_to_stem = {};
	var stem_to_words = {};
	var stemmer = null;

	switch (lang) {
		case 'eng':
			stemmer = natural.PorterStemmer;
		case 'esp':
			stemmer = natural.PorterStemmerEs;

	//console.log('trying to create index', stemmer);

	if (type == 'words' && stemmer != null) {

		// make stems
		for (var key in indexData) {

			var wordData = indexData[key],
				stemmedWord = stemmer.stem(key);
github ava-ia / core / composers / nlp / nlp.natural.js View on Github external
return new Promise((resolve, reject) => {
    const tokens = tokenizer.tokenize(phrase); => {
      console.log(token, Natural.PorterStemmer.stem(token))


      engine: 'compromise',
      ms: (new Date() - time),

      tokens: tokenizer.tokenize(phrase),
      stemmers: Natural.PorterStemmer.stem(phrase)
      // glossary: glossary.parse(phrase),
      // sentiment: analyser.classify(phrase),
github machinelearnjs / machinelearnjs / src / lib / feature_extraction / index.repl.ts View on Github external
/* tslint:disable */
import * as natural from 'natural';
const tokenizer = new natural.WordTokenizer();
console.log(tokenizer.tokenize('your do a dog dog has fleas.'));

const NGrams = natural.NGrams;
console.log(NGrams.ngrams('This is a text document to analyze.', 5));

import { CountVectorizer } from './text';

const cv = new CountVectorizer();

const text1 = ['deep learning ian good fellow learning jason shin shin', 'yoshua bengio'];

console.log('original text', text1);
const vocabCounts = cv.fit_transform(text1);
github ava-ia / core / composers / nlp / nlp.natural.js View on Github external
// -- More info:
'use strict';

import Natural from 'natural';
// -- Internal
const tokenizer = new Natural.WordTokenizer()

export default (phrase) => {
  const time = new Date();
  return new Promise((resolve, reject) => {
    const tokens = tokenizer.tokenize(phrase); => {
      console.log(token, Natural.PorterStemmer.stem(token))


      engine: 'compromise',
      ms: (new Date() - time),
github shiffman / A2Z-F18 / week10-sentiment / 03_classification_API / server.js View on Github external
function listen() {
  const host = server.address().address;
  const port = server.address().port;
  console.log('Example app listening at http://' + host + ':' + port);

// Do we already have a classifier "database"
const exists = fs.existsSync('classifier.json');

// If we do, load it
if (exists) {
  natural.BayesClassifier.load('classifier.json', null, loaded);
  // If not make a new one
} else {
  console.log('starting a new classifier');
  classifier = new natural.BayesClassifier();

// All set and loaded
function loaded(err, cf) {
  classifier = cf;
  console.log('Classifier loaded');

// This is a post for training'/train', training);

function training(req, res) {
  // Get the text and category
  const text = req.body.text;
  const category = req.body.category;
github zafarali / minerva-bot / plugins / training / train_query_classify.js View on Github external
var natural = require('natural')
var csv = require('fast-csv')
var fs = require('fs')
var path = require('path')
var jsonfile = require('jsonfile')

var query_classify = new natural.BayesClassifier();
var both_streams_ended = false;
var NGrams = natural.NGrams;

var stream = fs.createReadStream(path.resolve('./plugins/training', 'conv_bot_db_augmented.csv'))
	.on('readable', function(){
		var row;
		while(null !== (row ={
			// console.log('unstemmed:',row);
			// console.log('stemmed:',)
			// console.log('->added document:',row[0])
			var bgrams1 = NGrams.ngrams(row[0].toLowerCase(), 2, null, '[end]')
			// var bgrams1 = NGrams.bigrams(row[0].toLowerCase());
			for (var i = 0; i < bgrams1.length; i++) {
				query_classify.addDocument(bgrams1[i], 'no');			
github erelsgl / nlu-server / classifiers.js View on Github external
if (!('sentences' in sample))
	   throw new Error("for some reason sentences not in sample")

	if (!_.isArray(sample['sentences']))
		sample['sentences'] = [sample['sentences']]

	var tokens = _.compact(_.flatten(_.pluck(sample['sentences'], 'tokens')))

	var words = []
	_.each(tokens, function(token, key, list){
	}, this)

	// var feature = natural.NGrams.ngrams(words, 1).concat(natural.NGrams.ngrams(words, 2))
	var feature = natural.NGrams.ngrams(words, 1)

	_.each(feature, function(value, key, list){
		features[value] = 1
	}, this)

	console.log("feAsyncPrimitive: train: "+train+" FEATURES: "+JSON.stringify(features, null, 4))
	callback(null, features)
github erelsgl / nlu-server / classifiers.js View on Github external
if ("input" in sample)
		sample = sample.input

/*	if (!('basic-dependencies' in sample['sentences']))
		throw new Error("train:"+train+" basic-dependencies not in the sample "+JSON.stringify(sample))
/*	if (!('sentences' in sample))
	   throw new Error("for some reason sentences not in sample "+JSON.stringify(sample))
/*	if (!('tokens' in sample['sentences']))
	   throw new Error("for some reason tokens not in sample"+JSON.stringify(sample, null, 4))
	if (_.isArray(sample['sentences']))
	   throw new Error("feAsync is only for object sentences")

	var tokenizer = new natural.RegexpTokenizer({pattern: /[^\%a-zA-Z0-9\-\?]+/});
	var text = regexpNormalizer(sample["text"].toLowerCase())
	console.vlog("feAsyncStanford: text: "+text)
	// the array of tokens
	// var tokenized = tokenizer.tokenize(text)
	// console.vlog("feAsyncStanford: tokenized: "+JSON.stringify(tokenized, null, 4))

	// sample['sentences'] = {"tokens":[]}

	// _.each(tokenized, function(value, key, list){
 //    	sample['sentences']['tokens'].push({
 //            "word": value,
 //            // "lemma": value[0]
 //            "lemma": natural.PorterStemmer.stem(value)
 //        	// "lemma": lemmerEng.lemmatize(value[0])
 //        })


General natural language (tokenizing, stemming (English, Russian, Spanish), part-of-speech tagging, sentiment analysis, classification, inflection, phonetics, tfidf, WordNet, jaro-winkler, Levenshtein distance, Dice's Coefficient) facilities for node.

Latest version published 1 month ago

Package Health Score

98 / 100
Full package analysis