Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_trec2019_bl(self):
topics = search.get_topics('trec2019_bl')
self.assertEqual(len(topics), 60)
self.assertEqual('d7d906991e2883889f850de9ae06655e', topics[870]['title'])
self.assertEqual('0d7f5e24cafc019265d3ee4b9745e7ea', topics[829]['title'])
self.assertTrue(isinstance(next(iter(topics.keys())), int))
def test_covid_round1(self):
topics = search.get_topics('covid_round1_udel')
self.assertEqual(len(topics), 30)
self.assertEqual('coronavirus origin origin COVID-19', topics[1]['query'])
self.assertEqual('coronavirus remdesivir remdesivir effective treatment COVID-19', topics[30]['query'])
self.assertTrue(isinstance(next(iter(topics.keys())), int))
def test_trec_topicreader(self):
# Running from command-line, we're in root of repo, but running in IDE, we're in tests/
path = 'tools/topics-and-qrels/topics.robust04.txt'
if not os.path.exists(path):
path = f'../{path}'
self.assertTrue(os.path.exists(path))
topics = search.get_topics_with_reader('io.anserini.search.topicreader.TrecTopicReader', path)
self.assertEqual(len(topics), 250)
self.assertTrue(isinstance(next(iter(topics.keys())), int))
self.assertEqual(search.get_topics('robust04'), topics)
def test_covid_round4_udel(self):
topics = search.get_topics('covid_round4_udel')
self.assertEqual(len(topics), 45)
self.assertEqual('coronavirus origin origin COVID-19', topics[1]['query'])
self.assertEqual('coronavirus mental health impact COVID-19 pandemic impacted mental health',
topics[45]['query'])
self.assertTrue(isinstance(next(iter(topics.keys())), int))
def test_car20(self):
topics = search.get_topics('car17v2.0_benchmarkY1test')
self.assertEqual(len(topics), 2254)
self.assertFalse(isinstance(next(iter(topics.keys())), int))
def test_msmarco_doc(self):
topics = search.get_topics('msmarco_doc_dev')
self.assertEqual(len(topics), 5193)
self.assertTrue(isinstance(next(iter(topics.keys())), int))
def test_core18(self):
topics = search.get_topics('core18')
self.assertEqual(len(topics), 50)
self.assertTrue(isinstance(next(iter(topics.keys())), int))
def test_covid_round3(self):
topics = search.get_topics('covid_round3')
self.assertEqual(len(topics), 40)
self.assertEqual('coronavirus origin', topics[1]['query'])
self.assertEqual('coronavirus mutations', topics[40]['query'])
self.assertTrue(isinstance(next(iter(topics.keys())), int))
def test_msmarco_passage(self):
topics = search.get_topics('msmarco_passage_dev_subset')
self.assertEqual(len(topics), 6980)
self.assertTrue(isinstance(next(iter(topics.keys())), int))
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])
# Specify Porter stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='porter'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'run', 'time'])
# Specify Krovetz stemmer explicitly
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemmer='krovetz'))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'bus', 'running', 'time'])
# No stemming
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=False))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'buses', 'running', 'time'])
# No stopword filter, no stemming
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=False, stopwords=False))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['city', 'buses', 'are', 'running', 'on', 'time'])
# No stopword filter, with stemming
analyzer = analysis.Analyzer(analysis.get_lucene_analyzer(stemming=True, stopwords=False))
self.assertTrue(isinstance(analyzer, Analyzer))
tokens = analyzer.analyze('City buses are running on time.')
self.assertEqual(tokens, ['citi', 'buse', 'ar', 'run', 'on', 'time'])