How to use the pyserini.search.SimpleSearcher function in pyserini

To help you get started, we’ve selected a few pyserini examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github castorini / pyserini / tests / test_search.py View on Github external
def setUp(self):
        # Download pre-built CACM index; append a random value to avoid filename clashes.
        r = randint(0, 10000000)
        self.collection_url = 'https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.tar.gz'
        self.tarball_name = 'lucene-index.cacm-{}.tar.gz'.format(r)
        self.index_dir = 'index{}/'.format(r)

        filename, headers = urlretrieve(self.collection_url, self.tarball_name)

        tarball = tarfile.open(self.tarball_name)
        tarball.extractall(self.index_dir)
        tarball.close()

        self.searcher = SimpleSearcher(f'{self.index_dir}lucene-index.cacm')
github castorini / pyserini / pyserini / search / __main__.py View on Github external
parser.add_argument('--rm3',  action='store_true', help="Use RM3")
parser.add_argument('--qld',  action='store_true', help="Use QLD")
parser.add_argument('--prcl',  type=ClassifierType, nargs='+', default=[],
                    help='Specify the classifier PseudoRelevanceClassifierReranker uses.')
parser.add_argument('--prcl.vectorizer',  dest='vectorizer', type=str,
                    help='Type of vectorizer. Available: TfidfVectorizer, BM25Vectorizer.')
parser.add_argument('--prcl.r',  dest='r', type=int, default=10,
                    help='Number of positive labels in pseudo relevance feedback.')
parser.add_argument('--prcl.n', dest='n', type=int, default=100,
                    help='Number of negative labels in pseudo relevance feedback.')
parser.add_argument('--prcl.alpha', dest='alpha', type=float, default=0.5,
                    help='Alpha value for interpolation in pseudo relevance feedback.')
args = parser.parse_args()

topics = get_topics(args.topics)
searcher = SimpleSearcher(args.index)
search_rankers = []

if args.qld:
    search_rankers.append('qld')
    searcher.set_qld()
else:
    search_rankers.append('bm25')

if args.rm3:
    search_rankers.append('rm3')
    searcher.set_rm3()

# invalid topics name
if topics == {}:
    print(f'Topic {args.topics} Not Found')
    exit()