Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(v_b, rmean_win).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
elif average.startswith("whole"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=v_a.mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=v_b.mean(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=spectral.orthogonalize(v_a).mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=spectral.orthogonalize(v_b).mean(axis=0).reshape(1, -1)
#v_a=array([weights_a[word]*embedding[word]
# for word in weights_a]).sum(axis=0).reshape(1, -1)
#v_b=array([weights_b[word]*embedding[word]
# for word in weights_b]).sum(axis=0).reshape(1, -1)
if args.dist.startswith("all"):
#distances.append((1-cosine_distances(v_a, v_b),
# euclidean_distances(v_a, v_b),
# manhattan_distances(v_a, v_b)))
try:
fo.write("%f\t%f\t%f\n" % (1-cosine_distances(v_a, v_b)[0],
euclidean_distances(v_a, v_b)[0],
manhattan_distances(v_a, v_b)[0]))
except:
fo.write("%f\t%f\t%f\t%s\n" % (0.2, 1.0, 1.0,"Distance error in pair: "+str(iPair)))
try:
weights_b[w]=(weights_b[w], embedding[w])
except KeyError:
weights_b[w]=0
missing_cbow.append(w)
continue
logging.info("Weights sentence B %s" % [(w, weights_b[w][0]) for w in weights_b if not weights_b[w] is 0])
if not average:
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][0]*weights_a[w][1]
for w in weights_a if weights_a[w]!=0]).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][0]*weights_b[w][1]
for w in weights_b if weights_b[w]!=0]).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=spectral.orthogonalize(array([weights_a[w][1] for w in weights_a if weights_a[w]!=0]))
w_a=array([weights_a[w][0] for w in weights_a if weights_a[w]!=0])
v_a=multiply(v_a.T, w_a).T.sum(axis=0).reshape(1, -1)
v_b=spectral.orthogonalize(array([weights_b[w][1] for w in weights_b if weights_b[w]!=0]))
w_b=array([weights_b[w][0] for w in weights_b if weights_b[w]!=0])
v_b=multiply(v_b.T, w_b).T.sum(axis=0).reshape(1, -1)
elif average.startswith("moving"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(v_a, rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(v_b, rmean_win).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
weights_b[w]=0
missing_cbow.append(w)
continue
logging.info("Weights sentence B %s" % [(w, weights_b[w][0]) for w in weights_b if not weights_b[w] is 0])
if not average:
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][0]*weights_a[w][1]
for w in weights_a if weights_a[w]!=0]).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][0]*weights_b[w][1]
for w in weights_b if weights_b[w]!=0]).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=spectral.orthogonalize(array([weights_a[w][1] for w in weights_a if weights_a[w]!=0]))
w_a=array([weights_a[w][0] for w in weights_a if weights_a[w]!=0])
v_a=multiply(v_a.T, w_a).T.sum(axis=0).reshape(1, -1)
v_b=spectral.orthogonalize(array([weights_b[w][1] for w in weights_b if weights_b[w]!=0]))
w_b=array([weights_b[w][0] for w in weights_b if weights_b[w]!=0])
v_b=multiply(v_b.T, w_b).T.sum(axis=0).reshape(1, -1)
elif average.startswith("moving"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(v_a, rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(v_b, rmean_win).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
elif average.startswith("whole"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
weights_b[w]=0
missing_cbow.append(w)
continue
logging.info("Weights sentence B %s" % [(w, weights_b[w][0]) for w in weights_b if not weights_b[w] is 0])
if not average:
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][0]*weights_a[w][1]
for w in weights_a if weights_a[w]!=0]).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][0]*weights_b[w][1]
for w in weights_b if weights_b[w]!=0]).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=spectral.orthogonalize(array([weights_a[w][1] for w in weights_a if weights_a[w]!=0]))
w_a=array([weights_a[w][0] for w in weights_a if weights_a[w]!=0])
v_a=multiply(v_a.T, w_a).T.sum(axis=0).reshape(1, -1)
v_b=spectral.orthogonalize(array([weights_b[w][1] for w in weights_b if weights_b[w]!=0]))
w_b=array([weights_b[w][0] for w in weights_b if weights_b[w]!=0])
v_b=multiply(v_b.T, w_b).T.sum(axis=0).reshape(1, -1)
elif average.startswith("moving"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(v_a, rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(v_b, rmean_win).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
elif average.startswith("whole"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
w_a=array([weights_a[w][0] for w in weights_a if weights_a[w]!=0])
v_a=multiply(v_a.T, w_a).T.sum(axis=0).reshape(1, -1)
v_b=spectral.orthogonalize(array([weights_b[w][1] for w in weights_b if weights_b[w]!=0]))
w_b=array([weights_b[w][0] for w in weights_b if weights_b[w]!=0])
v_b=multiply(v_b.T, w_b).T.sum(axis=0).reshape(1, -1)
elif average.startswith("moving"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(v_a, rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(v_b, rmean_win).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
elif average.startswith("whole"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=v_a.mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=v_b.mean(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=spectral.orthogonalize(v_a).mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=spectral.orthogonalize(v_b).mean(axis=0).reshape(1, -1)
#v_a=array([weights_a[word]*embedding[word]
# for word in weights_a]).sum(axis=0).reshape(1, -1)
#v_b=array([weights_b[word]*embedding[word]
# for word in weights_b]).sum(axis=0).reshape(1, -1)
if args.dist.startswith("all"):
elif args.ortho.startswith("orth"):
v_a=spectral.orthogonalize(array([weights_a[w][1] for w in weights_a if weights_a[w]!=0]))
w_a=array([weights_a[w][0] for w in weights_a if weights_a[w]!=0])
v_a=multiply(v_a.T, w_a).T.sum(axis=0).reshape(1, -1)
v_b=spectral.orthogonalize(array([weights_b[w][1] for w in weights_b if weights_b[w]!=0]))
w_b=array([weights_b[w][0] for w in weights_b if weights_b[w]!=0])
v_b=multiply(v_b.T, w_b).T.sum(axis=0).reshape(1, -1)
elif average.startswith("moving"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(v_a, rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(v_b, rmean_win).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
elif average.startswith("whole"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=v_a.mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=v_b.mean(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=spectral.orthogonalize(v_a).mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=spectral.orthogonalize(v_b).mean(axis=0).reshape(1, -1)
#v_a=array([weights_a[word]*embedding[word]
# for word in weights_a]).sum(axis=0).reshape(1, -1)
#v_b=array([weights_b[word]*embedding[word]
elif args.ortho.startswith("orth"):
v_a=spectral.orthogonalize(array([weights_a[w][1] for w in weights_a if weights_a[w]!=0]))
w_a=array([weights_a[w][0] for w in weights_a if weights_a[w]!=0])
v_a=multiply(v_a.T, w_a).T.sum(axis=0).reshape(1, -1)
v_b=spectral.orthogonalize(array([weights_b[w][1] for w in weights_b if weights_b[w]!=0]))
w_b=array([weights_b[w][0] for w in weights_b if weights_b[w]!=0])
v_b=multiply(v_b.T, w_b).T.sum(axis=0).reshape(1, -1)
elif average.startswith("moving"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(v_a, rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(v_b, rmean_win).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
elif average.startswith("whole"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=v_a.mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=v_b.mean(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=spectral.orthogonalize(v_a).mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=spectral.orthogonalize(v_b).mean(axis=0).reshape(1, -1)
#v_a=array([weights_a[word]*embedding[word]
# for word in weights_a]).sum(axis=0).reshape(1, -1)
#v_b=array([weights_b[word]*embedding[word]
try:
weights_b[w]=(weights_b[w], embedding[w])
except KeyError:
weights_b[w]=0
missing_cbow.append(w)
continue
logging.info("Weights sentence B %s" % [(w, weights_b[w][0]) for w in weights_b if not weights_b[w] is 0])
if not average:
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][0]*weights_a[w][1]
for w in weights_a if weights_a[w]!=0]).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][0]*weights_b[w][1]
for w in weights_b if weights_b[w]!=0]).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=spectral.orthogonalize(array([weights_a[w][1] for w in weights_a if weights_a[w]!=0]))
w_a=array([weights_a[w][0] for w in weights_a if weights_a[w]!=0])
v_a=multiply(v_a.T, w_a).T.sum(axis=0).reshape(1, -1)
v_b=spectral.orthogonalize(array([weights_b[w][1] for w in weights_b if weights_b[w]!=0]))
w_b=array([weights_b[w][0] for w in weights_b if weights_b[w]!=0])
v_b=multiply(v_b.T, w_b).T.sum(axis=0).reshape(1, -1)
elif average.startswith("moving"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(v_a, rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(v_b, rmean_win).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
elif average.startswith("whole"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=v_a.mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=v_b.mean(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=spectral.orthogonalize(v_a).mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=spectral.orthogonalize(v_b).mean(axis=0).reshape(1, -1)
#v_a=array([weights_a[word]*embedding[word]
# for word in weights_a]).sum(axis=0).reshape(1, -1)
#v_b=array([weights_b[word]*embedding[word]
# for word in weights_b]).sum(axis=0).reshape(1, -1)
if args.dist.startswith("all"):
#distances.append((1-cosine_distances(v_a, v_b),
# euclidean_distances(v_a, v_b),
# manhattan_distances(v_a, v_b)))
try:
fo.write("%f\t%f\t%f\n" % (1-cosine_distances(v_a, v_b)[0],
euclidean_distances(v_a, v_b)[0],
manhattan_distances(v_a, v_b)[0]))
except:
fo.write("%f\t%f\t%f\t%s\n" % (0.2, 1.0, 1.0,"Distance error in pair: "+str(iPair)))
elif args.dist.startswith("euc"):
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=running_mean(spectral.orthogonalize(v_a), rmean_win).sum(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=running_mean(spectral.orthogonalize(v_b), rmean_win).sum(axis=0).reshape(1, -1)
elif average.startswith("whole"):
if args.ortho.startswith("ld"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=v_a.mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=v_b.mean(axis=0).reshape(1, -1)
elif args.ortho.startswith("orth"):
v_a=array([weights_a[w][1] for w in weights_a if weights_a[w]!=0])
v_a=spectral.orthogonalize(v_a).mean(axis=0).reshape(1, -1)
v_b=array([weights_b[w][1] for w in weights_b if weights_b[w]!=0])
v_b=spectral.orthogonalize(v_b).mean(axis=0).reshape(1, -1)
#v_a=array([weights_a[word]*embedding[word]
# for word in weights_a]).sum(axis=0).reshape(1, -1)
#v_b=array([weights_b[word]*embedding[word]
# for word in weights_b]).sum(axis=0).reshape(1, -1)
if args.dist.startswith("all"):
#distances.append((1-cosine_distances(v_a, v_b),
# euclidean_distances(v_a, v_b),
# manhattan_distances(v_a, v_b)))
try:
fo.write("%f\t%f\t%f\n" % (1-cosine_distances(v_a, v_b)[0],
euclidean_distances(v_a, v_b)[0],
manhattan_distances(v_a, v_b)[0]))
except:
fo.write("%f\t%f\t%f\t%s\n" % (0.2, 1.0, 1.0,"Distance error in pair: "+str(iPair)))
elif args.dist.startswith("euc"):