Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _scan_and_build_vocab(self):
try:
self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
except:
pass
self.model.build_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
def _scan_and_build_vocab(self):
from gensim.models import Phrases
bigram_transformer = Phrases(CorpusAdapterForGensim.get_sentences(self.corpus))
try:
self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
except:
pass
self.model.build_vocab(bigram_transformer[CorpusAdapterForGensim.get_sentences(self.corpus)])
def _scan_and_build_vocab(self):
try:
self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
except:
pass
self.model.build_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
def _scan_and_build_vocab(self):
from gensim.models import Phrases
bigram_transformer = Phrases(CorpusAdapterForGensim.get_sentences(self.corpus))
try:
self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
except:
pass
self.model.build_vocab(bigram_transformer[CorpusAdapterForGensim.get_sentences(self.corpus)])
def add_phrases(self, corpus):
'''
Parameters
----------
corpus: Corpus for phrase augmentation
Returns
-------
New ParsedCorpus containing unigrams in corpus and new phrases
'''
from gensim.models import Phrases
assert isinstance(corpus, ParsedCorpus)
self.phrases = [Phrases(CorpusAdapterForGensim.get_sentences(corpus), delimiter=' ')]
for i in range(1, self.max_tokens_per_phrase):
self.phrases.append(Phrases(self.phrases[-1][CorpusAdapterForGensim.get_sentences(corpus)]))
return self
'''
Parameters
----------
corpus: Corpus for phrase augmentation
Returns
-------
New ParsedCorpus containing unigrams in corpus and new phrases
'''
from gensim.models import Phrases
assert isinstance(corpus, ParsedCorpus)
self.phrases = [Phrases(CorpusAdapterForGensim.get_sentences(corpus), delimiter=' ')]
for i in range(1, self.max_tokens_per_phrase):
self.phrases.append(Phrases(self.phrases[-1][CorpusAdapterForGensim.get_sentences(corpus)]))
return self
def _scan_and_build_vocab(self):
from gensim.models import Phrases
bigram_transformer = Phrases(CorpusAdapterForGensim.get_sentences(self.corpus))
try:
self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
except:
pass
self.model.build_vocab(bigram_transformer[CorpusAdapterForGensim.get_sentences(self.corpus)])
'''
Parameters
----------
epochs : int
Number of epochs to train for. Default is 2000.
training_iterations : int
Number of times to repeat training process. Default is training_iterations.
Returns
-------
A trained word2vec model.
'''
self._scan_and_build_vocab()
for _ in range(training_iterations):
self.model.train(CorpusAdapterForGensim.get_sentences(self.corpus),
total_examples=self.model.corpus_count,
epochs=epochs)
return self.model