How to use @nlpjs/lang-es - 2 common examples

To help you get started, we’ve selected a few @nlpjs/lang-es examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hubtype / botonic / packages / botonic-plugin-contentful / src / nlp / stemmer.ts View on Github external
import { BaseStemmer } from '@nlpjs/core/src'
import { StemmerCa } from '@nlpjs/lang-ca/src'
import { StemmerEn } from '@nlpjs/lang-en/src'
import { StemmerEs } from '@nlpjs/lang-es/src'
import { StemmerPt } from '@nlpjs/lang-pt/src'
import { StemmerPl } from './stemmers/polish-stemmer'

// see https://github.com/axa-group/nlp.js/blob/HEAD/docs/language-support.md
// and https://stackoverflow.com/a/11210358/145289
// snowball algorithm inspired from https://github.com/MihaiValentin/lunr-languages, based on
// https://github.com/fortnightlabs/snowball-js/blob/master/stemmer/src/ext/SpanishStemmer.js based on
// java version at http://snowball.tartarus.org/download.html
export const stemmers: { [key: string]: BaseStemmer } = {
  ca: new StemmerCa(),
  en: new StemmerEn(),
  es: new StemmerEs(),
  pl: new StemmerPl(),
  pt: new StemmerPt(),
  //node-nlp does not support polish
}

export function stemmerFor(locale: string): BaseStemmer {
  const stem = stemmers[locale]
  if (!stem) {
    throw new Error(`No stemmer configured for locale '${locale}'`)
  }
  return stem
}
github hubtype / botonic / packages / botonic-plugin-contentful / src / nlp / tokens.ts View on Github external
return this.trim(normalized.split(/[^a-zA-Zá-úÁ-ÚñÑüÜ]+/))
  }

  private trim(arr: string[]): string[] {
    while (arr[arr.length - 1] === '') {
      arr.pop()
    }
    while (arr[0] === '') {
      arr.shift()
    }
    return arr
  }
}

const tokenizers: { [locale: string]: Tokenizer } = {
  es: new TokenizerEs(),
  en: new TokenizerEn(),
  ca: new TokenizerCa(),
  pl: new Tokenizer(),
  pt: new TokenizerPt(),
}

export function tokenizerPerLocale(locale: Locale): Tokenizer {
  return tokenizers[locale]
}
export const DEFAULT_SEPARATORS = ';,./()!?" '
export const DEFAULT_SEPARATORS_REGEX = new RegExp(
  '[' + DEFAULT_SEPARATORS + ']',
  'g'
)
export const DEFAULT_NOT_SEPARATORS_REGEX = new RegExp(
  '[^' + DEFAULT_SEPARATORS + ']',

@nlpjs/lang-es

Core

MIT
Latest version published 2 years ago

Package Health Score

62 / 100
Full package analysis

Similar packages