Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if vs < available[0]:
vs = available[0]
else:
vs = available[-1]
print("BPEmb fallback: {} from vocab size {} to {}".format(lang, _vs, vs))
self.vocab_size = self.vs = vs
self.dim = dim
self.cache_dir = Path(cache_dir)
model_file = self.model_tpl.format(lang=lang, vs=vs)
self.model_file = self._load_file(model_file)
self.spm = sentencepiece_load(self.model_file)
if encode_extra_options:
self.spm.SetEncodeExtraOptions(encode_extra_options)
emb_file = self.emb_tpl.format(lang=lang, vs=vs, dim=dim)
self.emb_file = self._load_file(emb_file, archive=True)
self.emb = load_word2vec_file(self.emb_file, add_pad=add_pad_emb)
self.most_similar = self.emb.most_similar
assert self.dim == self.emb.vectors.shape[1]
self.do_preproc = preprocess
self.BOS_str = "<s>"
self.EOS_str = "</s>"
self.BOS = self.spm.PieceToId(self.BOS_str)
self.EOS = self.spm.PieceToId(self.EOS_str)