Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
/* global it */
/* global describe */
const Readable = require('stream').Readable
const logLevel = process.env.LOG_LEVEL || 'error'
const s = new Readable({ objectMode: true })
const sandboxPath = 'test/sandbox'
const should = require('should')
const stopwords = require('stopword').en
s.push({
id: 'a',
title: 'The Beatles',
content: 'The Beatles were an English rock band, formed in Liverpool in 1960. Beatles from Liverpool',
year: ['1960', '1961', '1962']
})
s.push({
id: 'b',
title: 'The Rolling Stones',
content: 'The Rolling Stones are an English rock band formed in London in 1962.',
year: ['1962', '1963', '1964']
})
s.push({
id: 'c',
title: 'Pink Floyd',
import stopword from 'stopword'
import pipeline from './search-index-pipeline'
import { convertMetaDocId } from 'src/activity-logger'
import { RESULT_TYPES } from 'src/overview/constants'
const indexOpts = {
batchSize: 500,
appendOnly: false,
indexPath: 'worldbrain-index',
logLevel: 'info',
preserveCase: false,
compositeField: false,
nGramLength: 1,
// separator: /[|' .,\-|(\n)]+/,
stopwords: stopword.en,
fieldOptions: {
// The `domain.tld(.cctld)` data from a page's URL
// Currently used to afford `domain.tld(.cctld)` search in our queries
// Should never need to tokenize, but put forward-slash separator incase preproecssing fails for whatever reason
// (then domain search can still happen)
domain: {
weight: 40,
fieldedSearch: true,
separator: '/',
},
// Page title text; occasionally empty
title: {
weight: 30,
fieldedSearch: true,
},
// Page URL tokenized by forward slashes; normalized slightly to remove protocol and leading `www.`