Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def from_dataframes(dfs, run=None):
"""Return a TrecRun by populating dataframe with the provided list of dataframes.
Parameters
----------
dfs: List[Dataframe]
A list of Dataframes conforming to TrecRun.columns
run: TrecRun
Set to ``None`` by default. If None, then a new instance of TrecRun will be created.
Else, the given TrecRun will be modified.
"""
res = TrecRun() if run is None else run
res.reset_data()
res.run_data = res.run_data.append([df for df in dfs], ignore_index=True)
return res
The aggregation method to use.
depth : int
Maximum number of results from each input run to consider. Set to ``None`` by default, which indicates that
the complete list of results is considered.
k : int
Length of final results list. Set to ``None`` by default, which indicates that the union of all input documents
are ranked.
"""
if len(runs) < 2:
raise Exception('Merge requires at least 2 runs.')
rows = []
if aggregation == AggregationMethod.SUM:
topics = list(TrecRun.get_all_topics_from_runs(runs))
def merge_topic(topic):
doc_scores = dict()
for run in runs:
for docid, score in run.get_docs_by_topic(topic, depth)[['docid', 'score']].values:
doc_scores[docid] = doc_scores.get(docid, 0.0) + score
sorted_doc_scores = sorted(iter(doc_scores.items()), key=lambda x: (-x[1], x[0]))
sorted_doc_scores = sorted_doc_scores if k is None else sorted_doc_scores[:k]
return [
(topic, 'Q0', docid, rank, score, 'merge_sum')
for rank, (docid, score) in enumerate(sorted_doc_scores, start=1)
]
def from_search_results(docid_score_pair: Tuple[str, float], topic=1):
rows = []
for rank, (docid, score) in enumerate(docid_score_pair, start=1):
rows.append((topic, 'Q0', docid, rank, score, 'searcher'))
return TrecRun.from_list(rows)
"""Return a TrecRun by populating dataframe with the provided list of tuples.
For performance reasons, df.to_numpy() is faster than df.iterrows().
When manipulating dataframes, we first dump to np.ndarray and construct a list of tuples with new values.
Then use this function to convert the list of tuples to a TrecRun object.
Parameters
----------
rows: List[tuples]
List of tuples in the following format: (topic, 'Q0', docid, rank, score, tag)
run: TrecRun
Set to ``None`` by default. If None, then a new instance of TrecRun will be created.
Else, the given TrecRun will be modified.
"""
res = TrecRun() if run is None else run
df = pd.DataFrame(rows)
df.columns = TrecRun.columns
res.run_data = df.copy()
return res
"""
df_list = []
for topic in self.topics():
if topic not in qrels.topics():
continue
qrels_docids = qrels.get_docids(topic)
topic_df = self.run_data[self.run_data['topic'] == topic]
if keep is True:
topic_df = topic_df[topic_df['docid'].isin(qrels_docids)]
else:
topic_df = topic_df[~topic_df['docid'].isin(qrels_docids)]
df_list.append(topic_df)
run = TrecRun() if clone is True else self
return TrecRun.from_dataframes(df_list, run)
df_list = []
for topic in self.topics():
if topic not in qrels.topics():
continue
qrels_docids = qrels.get_docids(topic)
topic_df = self.run_data[self.run_data['topic'] == topic]
if keep is True:
topic_df = topic_df[topic_df['docid'].isin(qrels_docids)]
else:
topic_df = topic_df[~topic_df['docid'].isin(qrels_docids)]
df_list.append(topic_df)
run = TrecRun() if clone is True else self
return TrecRun.from_dataframes(df_list, run)