Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
# Download pre-built CACM index; append a random value to avoid filename clashes.
r = randint(0, 10000000)
self.collection_url = 'https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.tar.gz'
self.tarball_name = 'lucene-index.cacm-{}.tar.gz'.format(r)
self.index_dir = 'index{}/'.format(r)
filename, headers = urlretrieve(self.collection_url, self.tarball_name)
tarball = tarfile.open(self.tarball_name)
tarball.extractall(self.index_dir)
tarball.close()
self.searcher = SimpleSearcher(f'{self.index_dir}lucene-index.cacm')
parser.add_argument('--rm3', action='store_true', help="Use RM3")
parser.add_argument('--qld', action='store_true', help="Use QLD")
parser.add_argument('--prcl', type=ClassifierType, nargs='+', default=[],
help='Specify the classifier PseudoRelevanceClassifierReranker uses.')
parser.add_argument('--prcl.vectorizer', dest='vectorizer', type=str,
help='Type of vectorizer. Available: TfidfVectorizer, BM25Vectorizer.')
parser.add_argument('--prcl.r', dest='r', type=int, default=10,
help='Number of positive labels in pseudo relevance feedback.')
parser.add_argument('--prcl.n', dest='n', type=int, default=100,
help='Number of negative labels in pseudo relevance feedback.')
parser.add_argument('--prcl.alpha', dest='alpha', type=float, default=0.5,
help='Alpha value for interpolation in pseudo relevance feedback.')
args = parser.parse_args()
topics = get_topics(args.topics)
searcher = SimpleSearcher(args.index)
search_rankers = []
if args.qld:
search_rankers.append('qld')
searcher.set_qld()
else:
search_rankers.append('bm25')
if args.rm3:
search_rankers.append('rm3')
searcher.set_rm3()
# invalid topics name
if topics == {}:
print(f'Topic {args.topics} Not Found')
exit()