Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
* silent disable the log output from the C++ extension [1]
* encoding specify input_file encoding [utf-8]
* pretrained_vectors pretrained word vectors (.vec file) for supervised learning []
"""
config = get_config()
kwargs.setdefault('lr', config.get('model', 'lr'))
kwargs.setdefault('lr_update_rate', config.get('model', 'lr_update_rate'))
kwargs.setdefault('dim', config.get('model', 'dim'))
kwargs.setdefault('ws', config.get('model', 'ws'))
kwargs.setdefault('epoch', config.get('model', 'epoch'))
kwargs.setdefault('word_ngrams', config.get('model', 'word_ngrams'))
kwargs.setdefault('loss', config.get('model', 'loss'))
kwargs.setdefault('bucket', config.get('model', 'bucket'))
kwargs.setdefault('thread', config.get('model', 'thread'))
kwargs.setdefault('silent', config.get('model', 'silent'))
cls.__model = ft.supervised(input_file, output, **kwargs)
return cls.__model
cleaned = clean_text(tweet[1])
i+=1
if i%10==0: # to test
for htag in tweet[0]:
_=fo2.write("__label__{} ".format(htag))
_=fo2.write("{}\n".format(cleaned))
else: # to train
for htag in tweet[0]:
_=fo.write("__label__{} ".format(htag))
_=fo.write("{}\n".format(cleaned))
fo.close()
fo2.close()
# epoch improves precision at least on smallish sets. make it a variable?
self.classifier = fasttext.supervised(train_file, '/tmp/model', epoch=35)
self.analyze_model(test_file)
return self.classifier
def train_classifier(output):
self._classifier = fasttext.supervised(input_file=train_path, output=output,
label_prefix=label_prefix or self.LABEL_PREFIX, **self.get_params())
def create_classifier(self):
return fasttext.supervised(self.train_file,
self.model_name,
epoch=self.params['epoch'],
dim=10,
word_ngrams=self.params['word_ngrams'],
lr=self.params['lr'],
min_count=self.params['min_count'],
bucket=2000000,
loss='ns')