Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def perform_computation(num_sentences):
truth = ["this is a speed test" for _ in range(0, num_sentences)]
hypo = ["this is not a speed test" for _ in range(0, num_sentences)]
wer(truth, hypo)
def test_fail_on_empty_ground_truth(self):
for method in [
jiwer.wer,
jiwer.wil,
jiwer.wip,
jiwer.mer,
jiwer.compute_measures,
]:
def callback():
method("", "test")
self.assertRaises(ValueError, callback)
def _apply_test_on(self, cases):
for gt, h, correct_measures in cases:
measures = jiwer.compute_measures(truth=gt, hypothesis=h)
self.assertDictAlmostEqual(measures, correct_measures, delta=1e-16)
self._apply_test_on(cases)
ground_truth = [
"i like monthy python",
"what do you mean african or european swallow",
]
hypothesis = ["i like", "python", "what you mean", "or swallow"]
x = jiwer.compute_measures(ground_truth, hypothesis)
# is equivalent to
ground_truth = (
"i like monthy python what do you mean african or european swallow"
)
hypothesis = "i like python what you mean or swallow"
y = jiwer.compute_measures(ground_truth, hypothesis)
self.assertDictAlmostEqual(x, y, delta=1e-9)
),
(
"i am a short ground truth",
"i am a considerably longer and very much incorrect hypothesis",
_m(7 / 6, 0.7, 0.85),
),
]
self._apply_test_on(cases)
ground_truth = [
"i like monthy python",
"what do you mean african or european swallow",
]
hypothesis = ["i like", "python", "what you mean", "or swallow"]
x = jiwer.compute_measures(ground_truth, hypothesis)
# is equivalent to
ground_truth = (
"i like monthy python what do you mean african or european swallow"
)
hypothesis = "i like python what you mean or swallow"
y = jiwer.compute_measures(ground_truth, hypothesis)
self.assertDictAlmostEqual(x, y, delta=1e-9)
def test_fail_on_empty_ground_truth(self):
for method in [
jiwer.wer,
jiwer.wil,
jiwer.wip,
jiwer.mer,
jiwer.compute_measures,
]:
def callback():
method("", "test")
self.assertRaises(ValueError, callback)
def test_fail_on_empty_ground_truth(self):
for method in [
jiwer.wer,
jiwer.wil,
jiwer.wip,
jiwer.mer,
jiwer.compute_measures,
]:
def callback():
method("", "test")
self.assertRaises(ValueError, callback)
def test_fail_on_empty_ground_truth(self):
for method in [
jiwer.wer,
jiwer.wil,
jiwer.wip,
jiwer.mer,
jiwer.compute_measures,
]:
def callback():
method("", "test")
self.assertRaises(ValueError, callback)
score_failure_cnt = 0
for step, text in tqdm(enumerate(self._dataset), desc='evaluation steps', total=len(self._dataset)):
if self.limit_len is not None:
text = text[:self.limit_len]
try:
unspaced_text = unspacing(text.strip())
tokenized_text = text_to_list(unspaced_text)
input_batch = torch.Tensor([self._input_vocab.to_indices(tokenized_text)]).long()
_, tag_seq = self._model(input_batch)
labeled_tag_seq = self._tag_vocab.to_tokens(tag_seq[0].tolist())
pred_text = segment_word_by_tags(unspaced_text, labeled_tag_seq)
wer_score += jiwer.wer(text.strip(), pred_text.strip())
if text.split() == pred_text.split():
corrected_sent_cnt += 1
_, labels = labelize(text, bi_tags_only=True)
labels = [ch for ch in labels]
labeled_tag_seq = ' '.join(labeled_tag_seq).replace('E', 'I').replace('S', 'B').replace('',
'I').split()
acc_score += acc(labeled_tag_seq, labels)
f1_score += f1(labeled_tag_seq, labels, labels=['B', 'I'])
except Exception as e:
score_failure_cnt += 1
logger.warning("Error message while calculating wer score: {}".format(e))
logger.info('wer score failure {} times'.format(score_failure_cnt))
raise ValueError()
else:
class BaseRemoveTransform(AbstractTransform):
def __init__(self, tokens_to_remove: List[str], replace_token=""):
self.tokens_to_remove = tokens_to_remove
self.replace_token = replace_token
def process_string(self, s: str):
for w in self.tokens_to_remove:
s = s.replace(w, self.replace_token)
return s
def process_list(self, inp: List[str]):
return [self.process_string(s) for s in inp]
class SentencesToListOfWords(AbstractTransform):
def __init__(self, word_delimiter: str = " "):
"""
Transforms one or more sentences into a list of words. A sentence is
assumed to be a string, where words are delimited by a token
(such as ` `, space). Each string is expected to contain only a single sentence.
:param word_delimiter: the character which delimits words. Default is ` ` (space).
Default is None (sentences are not delimited)
"""
self.word_delimiter = word_delimiter
def process_string(self, s: str):
return s.split(self.word_delimiter)
def process_list(self, inp: List[str]):
words = []