Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
with open(realpath, encoding='utf-8', errors='ignore') as f:
for idx, line in enumerate(f):
line = line.rstrip().split(delimiter)
if len(line) > 2:
if dim is None:
dim = len(line)
else:
if len(line) != dim:
logger.warning('{}#{} length mismatches with {}'.format(path, idx + 1, dim))
continue
word, vec = line[0], line[1:]
word2vec[word] = np.array(vec, dtype=np.float32)
dim -= 1
if cache:
save_pickle((word2vec, dim), binpath)
logger.debug(f'Cached {binpath}')
return word2vec, dim
def __init__(self, filepath: str, padding=PAD, name=None, **kwargs):
self.padding = padding.encode('utf-8')
self.filepath = filepath
filepath = get_resource(filepath)
assert os.path.isfile(filepath), f'Resolved path {filepath} is not a file'
existed = global_cache.get(filepath, None)
if existed:
logger.debug('Use cached fasttext model [{}].'.format(filepath))
self.model = existed
else:
logger.debug('Loading fasttext model from [{}].'.format(filepath))
# fasttext print a blank line here
with stdout_redirected(to=os.devnull, stdout=sys.stderr):
self.model = fasttext.load_model(filepath)
global_cache[filepath] = self.model
kwargs.pop('input_dim', None)
kwargs.pop('output_dim', None)
kwargs.pop('mask_zero', None)
if not name:
name = os.path.splitext(os.path.basename(filepath))[0]
super().__init__(input_dim=len(self.model.words), output_dim=self.model['king'].size,
mask_zero=padding is not None, trainable=False, dtype=tf.string, name=name, **kwargs)
embed_fn = np.frompyfunc(self.embed, 1, 1)
# vf = np.vectorize(self.embed, otypes=[np.ndarray])