Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, config):
super(Predictor, self).__init__(config)
self.config = config
self.model = None
self.sess = None
# self.builder = tf.saved_model.builder.SavedModelBuilder("savedModel")
if self.config["use_bpe"]:
self.bpe_zh = BPEmb(lang="zh", vs=config["vocab_size"])
else:
# 加载词汇表
self.word_to_idx = self.load_vocab()
self.idx_to_label = {value: key for key, value in self.word_to_idx.items()}
# 初始化模型
self.create_model()
print("load model finished")
# 加载计算图
self.load_graph()
print("load graph finished")
for sentence in sentences:
for token in sentence:
embedding = embedded[index]
token.set_embedding(self.name, embedding)
index += 1
return sentences
def __str__(self):
return self.name
def extra_repr(self):
return "min_freq={}".format(self.min_freq)
class BPEmbSerializable(BPEmb):
def __getstate__(self):
state = self.__dict__.copy()
# save the sentence piece model as binary file (not as path which may change)
state["spm_model_binary"] = open(self.model_file, mode="rb").read()
state["spm"] = None
return state
def __setstate__(self, state):
from bpemb.util import sentencepiece_load
model_file = self.model_tpl.format(lang=state["lang"], vs=state["vs"])
self.__dict__ = state
# write out the binary sentence piece model into the expected directory
self.cache_dir: Path = Path(flair.cache_root) / "embeddings"
if "spm_model_binary" in self.__dict__:
if word not in word2id:
word2id[word] = len(word2id)
id2word[len(id2word)] = word
for i in range(len(test_inputs)):
for word in test_inputs[i]:
if word not in word2id:
word2id[word] = len(word2id)
id2word[len(id2word)] = word
# BPE-LEVEL
bpe_embs = []
if bpe_lang_list is not None:
print("Loading BPE:", bpe_lang_list)
for i in range(len(bpe_lang_list)):
bpemb = BPEmb(lang=bpe_lang_list[i], dim=bpe_emb_size, vs=bpe_vocab, cache_dir=bpe_cache)
bpe_embs.append(bpemb)
# CHAR-LEVEL
for i in range(len(word_list)):
for word in word_list[i]:
for char in word:
if char not in char2id:
char2id[char] = len(char2id)
id2char[len(id2char)] = char
for i in range(len(train_inputs)):
for word in train_inputs[i]:
for char in word:
if char not in char2id:
char2id[char] = len(char2id)
id2char[len(id2char)] = char