How to use the natural.PorterStemmer function in natural

To help you get started, we’ve selected a few natural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mark-watson / javascript_intelligent_systems / src / nlp.js View on Github external
// Sample data for these examples (coerced to strings):

var economy = ' ' + fs.readFileSync('data/texts/economy.txt');
var politics = ' ' + fs.readFileSync('data/texts/politics.txt');
var sports = ' ' + fs.readFileSync('data/texts/sports.txt');

var natural = require('natural'),
  tokenizer = new natural.WordTokenizer();

console.log("\n-- Tokenized sample text in politics.txt:");
console.log(tokenizer.tokenize(politics));

console.log("\n-- Use Porter Stemmer on a single word:");
console.log(natural.PorterStemmer.stem("dogs"));

natural.PorterStemmer.attach();  // add methods to string

console.log("\n-- Use Porter Stemmer text in file sports.txt:");
console.log(sports.tokenizeAndStem());

console.log("dog dogs Dog dogged".tokenizeAndStem());

var classifier = new natural.BayesClassifier();

classifier.addDocument(economy, 'economy');
classifier.addDocument(politics, 'politics');
classifier.addDocument(sports, 'sports');
classifier.train();

console.log("\n-- Bayesian classifier test results:");

console.log(classifier.classify('The President and Congress went on vacation.'));
github digitalbiblesociety / browserbible-3 / tools / verse_indexer.js View on Github external
function createHashedIndexFiles(lang, indexPath, indexData, type) {

	var words_to_stem = {};
	var stem_to_words = {};
	var stemmer = null;

	switch (lang) {
		case 'eng':
			stemmer = natural.PorterStemmer;
			break;
		case 'esp':
			stemmer = natural.PorterStemmerEs;
			break;
	}

	//console.log('trying to create index', stemmer);

	if (type == 'words' && stemmer != null) {

		// make stems
		for (var key in indexData) {

			var wordData = indexData[key],
				stemmedWord = stemmer.stem(key);
github ava-ia / core / composers / nlp / nlp.natural.js View on Github external
return new Promise((resolve, reject) => {
    const tokens = tokenizer.tokenize(phrase);
    tokens.map(token => {
      console.log(token, Natural.PorterStemmer.stem(token))
    });

    Natural.LancasterStemmer.attach();
    console.log(phrase.tokenizeAndStem());

    resolve({
      engine: 'compromise',
      ms: (new Date() - time),

      tokens: tokenizer.tokenize(phrase),
      stemmers: Natural.PorterStemmer.stem(phrase)
      // glossary: glossary.parse(phrase),
      // sentiment: analyser.classify(phrase),
    });
  });
};
github sxyizhiren / cn-search / lib / reds.js View on Github external
/*!
 * reds
 * Copyright(c) 2011 TJ Holowaychuk 
 * cn-search
 * Copyright(c) 2013 Sxyizhiren <786647787@qq.com>
 * MIT Licensed
 */

/**
 * Module dependencies.
 */

var natural = require('natural');
var metaphone = natural.Metaphone.process;
var stem = natural.PorterStemmer.stem;
var stopwords = natural.stopwords;
var cnstopwords = require('./cnstopWords');

// default chinese segment
var Segment = require('segment').Segment;
var segment = new Segment();
segment.useDefault();

/**
 * Chinese Segment
 * @type {*}
 */
var segmentSync = function(str){
    var words=segment.doSegment(str);
    var result=[];
    for(var i= 0,len=words.length;i
github mediocre / mehdown / lib / index.js View on Github external
return;
            }

            // Check the static Emoji mappings whitelist
            var whitelist = emojiMappings.whitelist[t.toLowerCase()];

            if (whitelist) {
                args = args.replace(new RegExp(`\\b${t}\\b(?!:)`, 'g'), whitelist);
                return;
            }

            var stem = natural.PorterStemmer.stem(t.toLowerCase());

            // Check to see if the word directly matches an Emoji shortname
            for (var key in emoji) {
                var stemmedShortname = natural.PorterStemmer.stem(key.toLowerCase());

                if (stemmedShortname === stem) {
                    args = args.replace(new RegExp(`\\b${t}\\b(?!:)`, 'g'), emoji[key].shortname);
                    return;
                }
            }

            // Check to see if the word matches an Emoji alias
            for (key in emoji) {
                var _emoji = emoji[key];

                if (_emoji.category === 'flags') {
                    continue;
                }

                if (_emoji.stemmedAliases.indexOf(stem) !== -1) {
github eugeneware / fulltext-engine / nlp.js View on Github external
function stem (words) {
  var ret = [];
  for (var i = 0, len = words.length; i < len; ++i) {
    ret.push(natural.PorterStemmer.stem(words[i]));
  }
  return ret;
}
github tldr-pages / tldr-node-client / lib / search.js View on Github external
tokens.forEach((word, index) => {
    word = word.toLowerCase();
    word = natural.PorterStemmer.stem(word);
    tokens[index] = word;
  });
github Planeshifter / text-miner / lib / corpus.js View on Github external
this.apply( function( text ) {
		if ( type === 'Lancaster' ) {
			return natural.LancasterStemmer.stem( text );
		} else {
			return natural.PorterStemmer.stem( text );
		}
	});
	return this;
github GCE-NEIIST / GCE-NEIIST-webapp / server / services / theses-modules / MEIC_3.js View on Github external
const natural = require("natural");
var PorterStemmer = natural.PorterStemmer;
let specClassifier = new natural.BayesClassifier(PorterStemmer,0.1);
natural.PorterStemmer.attach();

/*
* This classifier assumes one area of specialization per professor.
* When the classifier is receiving a criteria to proceed to the classification, there is a problem:
*   As the professor's names are stemmed and each name is separated (constituting a unique name per se),
*       It makes the classifier consider David De Matos and Ana Matos as having a similarity (surname), thus conficting the classification process
*
* Quick-fix:
* -Make professors' names unique (chosen technique) or:
* -Cross classify using keywords
* */

class MEIC_MODULE_3 {
    constructor() {
github tj / reds / lib / reds.js View on Github external
/*!
 * reds
 * Copyright(c) 2011 TJ Holowaychuk 
 * MIT Licensed
 */

/**
 * Module dependencies.
 */

var natural = require('natural');
var metaphone = natural.Metaphone.process;
var stem = natural.PorterStemmer.stem;
var stopwords = natural.stopwords;
var redis = require('redis');
function noop(){};

/**
 * Library version.
 */

exports.version = '1.0.0';

/**
 * Expose `Search`.
 */

exports.Search = Search;

natural

General natural language (tokenizing, stemming (English, Russian, Spanish), part-of-speech tagging, sentiment analysis, classification, inflection, phonetics, tfidf, WordNet, jaro-winkler, Levenshtein distance, Dice's Coefficient) facilities for node.

MIT
Latest version published 1 month ago

Package Health Score

98 / 100
Full package analysis