Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def similarity(self, first, second):
"""Returns string similarity in range 0 - 100%."""
try:
try:
distance = damerau_levenshtein_distance(first, second)
except ValueError:
# Needed on Python 2 only (actually jellyfish < 0.7.2)
distance = py_damerau_levenshtein_distance(first, second)
return int(
100 * (1.0 - (float(distance) / max(len(first), len(second), 1)))
)
except MemoryError:
# Too long string, mark them as not much similar
return 50
one_ahead_pred.append(total_predicted_time)
one_ahead_gt.append(ground_truth_t)
print('! predicted, end case')
break
y_t = y_t * divisor3
cropped_times3.append(cropped_times3[-1] + timedelta(seconds=y_t))
total_predicted_time = total_predicted_time + y_t
predicted += prediction
output = []
if len(ground_truth)>0:
output.append(caseid)
output.append(prefix_size)
output.append(unicode(ground_truth).encode("utf-8"))
output.append(unicode(predicted).encode("utf-8"))
output.append(1 - distance.nlevenshtein(predicted, ground_truth))
dls = 1 - (damerau_levenshtein_distance(unicode(predicted), unicode(ground_truth)) / max(len(predicted),len(ground_truth)))
if dls<0:
dls=0 # we encountered problems with Damerau-Levenshtein Similarity on some linux machines where the default character encoding of the operating system caused it to be negative, this should never be the case
output.append(dls)
output.append(1 - distance.jaccard(predicted, ground_truth))
output.append(ground_truth_t)
output.append(total_predicted_time)
output.append('')
output.append(metrics.mean_absolute_error([ground_truth_t], [total_predicted_time]))
#output.append(metrics.median_absolute_error([ground_truth_t], [total_predicted_time]))
spamwriter.writerow(output)
if i==2:
if len(ground_truth_t)>2:
three_ahead_pred.append(y_t)
three_ahead_gt.append(ground_truth_t[2])
if prediction == '!': # end of case was just predicted, therefore, stop predicting further into the future
print('! predicted, end case')
break
predicted += prediction
output = []
if len(ground_truth)>0:
output.append(caseid)
output.append(prefix_size)
output.append(unicode(ground_truth).encode("utf-8"))
output.append(unicode(predicted).encode("utf-8"))
output.append(1 - distance.nlevenshtein(predicted, ground_truth))
dls = 1 - (damerau_levenshtein_distance(unicode(predicted), unicode(ground_truth)) / max(len(predicted),len(ground_truth)))
if dls<0:
dls=0 # we encountered problems with Damerau-Levenshtein Similarity on some linux machines where the default character encoding of the operating system caused it to be negative, this should never be the case
output.append(dls)
output.append(1 - distance.jaccard(predicted, ground_truth))
output.append('; '.join(str(x) for x in ground_truth_t))
output.append('; '.join(str(x) for x in predicted_t))
if len(predicted_t)>len(ground_truth_t): # if predicted more events than length of case, only use needed number of events for time evaluation
predicted_t = predicted_t[:len(ground_truth_t)]
if len(ground_truth_t)>len(predicted_t): # if predicted less events than length of case, put 0 as placeholder prediction
predicted_t.extend(range(len(ground_truth_t)-len(predicted_t)))
if len(ground_truth_t)>0 and len(predicted_t)>0:
output.append('')
output.append(metrics.mean_absolute_error([ground_truth_t[0]], [predicted_t[0]]))
#output.append(metrics.median_absolute_error([ground_truth_t[0]], [predicted_t[0]]))
else:
output.append('')
def difference(self, another):
return damerau_levenshtein_distance(
self.normalized,
another.normalized,
)