Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
fout = zopen(out_fname, "w")
for fname in fnames:
last_name = fname.split("-")[-1].lower()
path_name = "%s/%s/%s_%s-ud-%s.conllu" % (UD2_DIR, fname, lang, last_name, curf)
if os.path.exists(path_name):
with zopen(path_name) as fin:
deal_conll_file(fin, fout)
fout.close()
# stat
system('cat %s | grep -E "^$" | wc' % out_fname, pp=True)
system('cat %s | grep -Ev "^$" | wc' % out_fname, pp=True)
system("cat %s | grep -Ev '^$' | cut -f 5 -d $'\t'| grep -Ev 'PUNCT|SYM' | wc" % out_fname, pp=True)
# get original embed
system("wget -nc -O %s/wiki.%s.vec https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.%s.vec" % (OUT_DIR, lang, lang), pp=True)
# project with LIB-matrix
lang_dict = FastVector(vector_file='%s/wiki.%s.vec' % (OUT_DIR, lang))
lang_dict.apply_transform("%s/alignment_matrices/%s.txt" % (LIB_DIR, lang))
lang_dict.export("%s/wiki.multi.%s.vec" % (OUT_DIR, lang))