Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def score_corpus_multiprocess(
self, hypothesis: List[str], references: List[List[str]]
) -> float:
if self.n_workers == 1:
corpus_score = sb.corpus_chrf(hypothesis, references[0]).score
else:
batches = list(
self._batch(hypothesis, references, n_batches=self.n_workers)
)
corpus_statistics = [0 for _ in range(sb.CHRF_ORDER * 3)]
with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
futures = [
executor.submit(
sb.get_corpus_statistics, b[0], b[1][0]
)
for b in batches
]
progress = as_completed(futures)
if self.verbose:
progress = tqdm(progress)
for future in progress:
stats = future.result()
for i in range(sb.CHRF_ORDER * 3):
corpus_statistics[i] += stats[i]
avg_precision, avg_recall = sb._avg_precision_and_recall(
corpus_statistics, sb.CHRF_ORDER
with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
futures = [
executor.submit(
sb.get_corpus_statistics, b[0], b[1][0]
)
for b in batches
]
progress = as_completed(futures)
if self.verbose:
progress = tqdm(progress)
for future in progress:
stats = future.result()
for i in range(sb.CHRF_ORDER * 3):
corpus_statistics[i] += stats[i]
avg_precision, avg_recall = sb._avg_precision_and_recall(
corpus_statistics, sb.CHRF_ORDER
)
corpus_score = sb._chrf(avg_precision, avg_recall)
return corpus_score
def raw_corpus_chrf(hypotheses: Iterable[str], references: Iterable[str]) -> float:
"""
Simple wrapper around sacreBLEU's chrF implementation, without tokenization.
:param hypotheses: Hypotheses stream.
:param references: Reference stream.
:return: chrF score as float between 0 and 1.
"""
return sacrebleu.corpus_chrf(hypotheses, references, order=sacrebleu.CHRF_ORDER, beta=sacrebleu.CHRF_BETA,
remove_whitespace=True)