Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])
# Specify Porter stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='porter'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])
# Specify Krovetz stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='krovetz'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'bus', 'running', 'time'])
# No stemming
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=False))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'buses', 'running', 'time'])
# No stopword filter, no stemming
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=False, stopwords=False))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'buses', 'are', 'running', 'on', 'time'])
# No stopword filter, with stemming
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=True, stopwords=False))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'ar', 'run', 'on', 'time'])
def test_invalid_analyzer_wrapper(self):
# Invalid JAnalyzer, make sure we get an exception.
with self.assertRaises(TypeError):
analysis.Analyzer('str')
# Specify Porter stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='porter'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])
# Specify Krovetz stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='krovetz'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'bus', 'running', 'time'])
# No stemming
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=False))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'buses', 'running', 'time'])
# No stopword filter, no stemming
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=False, stopwords=False))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'buses', 'are', 'running', 'on', 'time'])
# No stopword filter, with stemming
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=True, stopwords=False))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'ar', 'run', 'on', 'time'])
def test_invalid_analysis(self):
# Invalid configuration, make sure we get an exception.
with self.assertRaises(ValueError):
analysis.Analyzer(analysis.get_lucene_analyzer('blah'))
def test_analysis(self):
# Default is Porter stemmer
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer())
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])
# Specify Porter stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='porter'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])
# Specify Krovetz stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='krovetz'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'bus', 'running', 'time'])
def test_analysis(self):
# Default is Porter stemmer
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer())
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])
# Specify Porter stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='porter'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])
# Specify Krovetz stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='krovetz'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'bus', 'running', 'time'])
# No stemming
"""Searches the collection.
Parameters
----------
term : str
The query term string.
field : str
Field to search.
analyzer : Analyzer
Analyzer to use for tokenizing the query term.
Returns
-------
JTermQuery
"""
analyzer = Analyzer(analyzer)
return JTermQuery(JTerm(field, analyzer.analyze(term)[0]))