Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_offset(hypothesis, reference, expected_with_offset, expected_without_offset):
score_without_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.0).score / 100
assert abs(expected_without_offset - score_without_offset) < EPSILON
score_with_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.1).score / 100
assert abs(expected_with_offset - score_with_offset) < EPSILON
def test_offset(hypothesis, reference, expected_with_offset, expected_without_offset):
score_without_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.0).score / 100
assert abs(expected_without_offset - score_without_offset) < EPSILON
score_with_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.1).score / 100
assert abs(expected_with_offset - score_with_offset) < EPSILON
def test_bleu(hypotheses, references, expected_bleu):
bleu = sacrebleu.raw_corpus_bleu(hypotheses, [references], .01).score / 100
assert abs(bleu - expected_bleu) < EPSILON
def test_degenerate_uneven(hypotheses, references):
with pytest.raises(EOFError, match=r'.*stream.*'):
sacrebleu.raw_corpus_bleu(hypotheses, references)
def test_offset(hypothesis, reference, expected_with_offset, expected_without_offset):
score_without_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.0).score / 100
assert abs(expected_without_offset - score_without_offset) < EPSILON
score_with_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.1).score / 100
assert abs(expected_with_offset - score_with_offset) < EPSILON
def test_offset(hypothesis, reference, expected_with_offset, expected_without_offset):
score_without_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.0).score / 100
assert abs(expected_without_offset - score_without_offset) < EPSILON
score_with_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.1).score / 100
assert abs(expected_with_offset - score_with_offset) < EPSILON
def test_bleu(hypotheses, references, expected_bleu):
bleu = sacrebleu.raw_corpus_bleu(hypotheses, [references], .01).score / 100
assert abs(bleu - expected_bleu) < EPSILON
def bleu(hypotheses, references):
"""
Raw corpus BLEU from sacrebleu (without tokenization)
:param hypotheses: list of hypotheses (strings)
:param references: list of references (strings)
:return:
"""
return sacrebleu.raw_corpus_bleu(sys_stream=hypotheses,
ref_streams=[references]).score
def raw_corpus_bleu(hypotheses: Iterable[str], references: Iterable[str], offset: Optional[float] = 0.01) -> float:
"""
Simple wrapper around sacreBLEU's BLEU without tokenization and smoothing.
:param hypotheses: Hypotheses stream.
:param references: Reference stream.
:param offset: Smoothing constant.
:return: BLEU score as float between 0 and 1.
"""
return sacrebleu.raw_corpus_bleu(hypotheses, [references], smooth_value=offset).score / 100.0
all_reference_sentences = []
while not reached_eof:
batch_x, batch_y, batch_z, batch_tt, actual_batch_size, reached_eof = read_batch_from_samples(dev_samples, self.batch_size, self.token_per_batch, current_index, model.config.data_config.input_features, model.config.data_config.output_features, model.config.data_config.output_translations, model.config.data_config.output_translation_features, model.config.data_config.input_clear_text, model.config.data_config.output_translation_clear_text)
if actual_batch_size == 0:
break
reference = unpad_turn_to_text_and_remove_bpe_of_batch_t(batch_tt[0][0], model.config.data_config.output_translation_vocabularies[0][0])
for sentence in reference:
all_reference_sentences.append(sentence)
output = model.predict_translation_on_batch(batch_x)
output = unpad_turn_to_text_and_remove_bpe_of_batch_t(output, model.config.data_config.output_translation_vocabularies[0][0])
for sentence in output:
all_hypothesis_sentences.append(sentence)
current_index += actual_batch_size
if reached_eof is True:
break
bleu = sacrebleu.raw_corpus_bleu(sys_stream=all_hypothesis_sentences, ref_streams=[all_reference_sentences])
return bleu.score