How to use sbd - 7 common examples

To help you get started, we’ve selected a few sbd examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github glitchdigital / glitched.news / pages / api / article / topics.js View on Github external
module.exports = async (req, res) => {
  const { url } = queryParser(req)

  if (!url)
    return send(res, 400, { error: 'URL parameter missing' })

  let { structuredData, text } = req.locals ? req.locals : await parseHtmlFromUrl(url)

  // Add a full stop after the end of every line, if there is not one already
  text = text.replace(/([^\.])\n/g, "$1.\n")

  // Get sentences in text
  const sentences = sbdTokenizer.sentences(text, { newline_boundaries: true, html_boundaries: true })

  // Build word list
  let words = `${structuredData.title} ${structuredData.description} ${structuredData.tags} ${text}`.split(' ')

  let keywords = []
  getKeywords(words.join(' ')).forEach(word => { 
    keywords.push({
      name: word,
      count: 0
    })
    // wordOccurrences.forEach(wordOccurance => {
    //   if (wordOccurance.token === word)
    //     keywords.push({
    //       name: word,
    //       count: 0
    //     })
github FreeFeed / freefeed-server / app / support / rss-text-parser.js View on Github external
export function extractTitle(text, maxLen) {
  // see https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AWord_Break%3DNewline%3A%5D&g=&i=
  const [line] = text.split(/[\u0000-\u001f\u0085\u2028\u2029]/, 1);

  if (line.length <= maxLen) {
    return trimPeriod(line);
  }

  const ss = sentences(line);

  if (ss[0].length < maxLen) {
    return trimPeriod(joinStrings(ss, maxLen));
  }

  const words = ss[0].split(/\s+/);

  if (words[0].length < maxLen) {
    return `${joinStrings(words, maxLen - 1)}\u2026`;
  }

  return `${words[0].substr(0, maxLen - 1)}\u2026`
}
github kossnocorp / chirrapp / src / app / _lib / split / index.js View on Github external
.map(textChunk => {
      const tweets = sentences(textChunk)
        .reduce(
          (acc, sentence) => {
            const lastIndex = acc.length - 1
            const currentNumber = globalAccLength + acc.length
            const nextNumber = currentNumber + 1
            const extraNumberWidth = 2 // "/" plus 
            const nextNumberWidth =
              nextNumber.toString().length + extraNumberWidth
            const lastTweet = acc[lastIndex]
            const concatCandidate = joinSentences(
              numbering ? lastTweet || `${currentNumber}/` : lastTweet,
              sentence
            )

            // If the current sentence fits the last tweet.
            if (getTweetLength(concatCandidate) <= limit) {
github science-periodicals / web-verse / src / index.js View on Github external
export function createKey($el) {
  let key = '';
  let len = 6;
  let txt = normalizeText($el.textContent || '').replace(/[^\w\. ]+/giu, '');

  if (txt && txt.length > 1) {
    let lines = sbd
      .sentences(txt)
      .map(x => trim(x))
      .filter(x => x);
    if (lines.length) {
      let first = lines[0].match(/\S+/gu).slice(0, len / 2);
      let last = lines[lines.length - 1].match(/\S+/gu).slice(0, len / 2);
      let k = first.concat(last);

      let max = k.length > len ? len : k.length;

      for (var i = 0; i < max; i++) {
        key += k[i].substring(0, 1);
      }
    }
  }
github filipedeschamps / video-maker / robots / text.js View on Github external
function breakContentIntoSentences(content) {
    content.sentences = []

    const sentences = sentenceBoundaryDetection.sentences(content.sourceContentSanitized)
    sentences.forEach((sentence) => {
      content.sentences.push({
        text: sentence,
        keywords: [],
        images: []
      })
    })
  }
github CopyTranslator / CopyTranslator / src / tools / translate / helper.ts View on Github external
export function splitEng(text: string): string[] {
  return compact(tokenizer.sentences(text.trim(), optional_options));
}
github glitchdigital / glitched.news / pages / api / article / text.js View on Github external
if (!url)
    return send(res, 400, { error: 'URL parameter missing' })

  const { structuredData, text } = req.locals ? req.locals : await parseHtmlFromUrl(url)

  const trustIndicators = { positive: [], negative: [] }

  const quotes = getQuotes(text)
  let quotesWithNumbers = []
  quotes.forEach(quote => {
    if (quote.match(/[0-9]/))
      quotesWithNumbers.push(quote)
  })

  const sentences = sbdTokenizer.sentences(text, { newline_boundaries: true })
  
  let sentencesWithNumbers = []
  sentences.forEach(sentence => {
    if (sentence.match(/[0-9]/))
      sentencesWithNumbers.push(sentence.replace(/\n/g, ' '))
  })

  if (quotes.length > 1) {
    trustIndicators.positive.push({ 
      text: `Multiple quotes cited in article`,
      description: 'Articles that contain quotes are useful as quotes can be verified.'
    })
  } else {
    trustIndicators.negative.push({
      text: `No quotes cited in article`,
      description: 'It is unusual for legitimate news articles not to contain multiple quotes.\nQuotes are useful as they can be verified.'

sbd

Split text into sentences with Sentence Boundary Detection (SBD).

MIT
Latest version published 3 years ago

Package Health Score

48 / 100
Full package analysis

Popular sbd functions