How to use the gensim.models.word2vec.Word2Vec.load function in gensim

To help you get started, we’ve selected a few gensim examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

NLPOR / SMP2018-task1 / svm.py View on Github

def get_word2vec(content):
    word2vec = Word2Vec.load('predictor/model/wiki.zh.seg_200d.model')
    res = np.zeros([200])
    count = 0
    # word_list = content.split()
    for word in content:
        if word in word2vec:
            res += word2vec[word]
            count += 1
    return pd.Series(res / count)

MarkWuNLP / KEHNN / PreProcess.py View on Github

def createtopicvec(word2vec_path):
    max_topicword = 20
    model = Word2Vec.load(word2vec_path)
    topicmatrix = np.zeros(shape=(100,max_topicword,100),dtype=theano.config.floatX)
    file = open(r"\\msra-sandvm-001\v-wuyu\Data\SemEvalCQA"
                r"\semeval2015-task3-english-data\pre-process\stemming_preservestop_cate\catedic.txt")
    i = 0
    miss = 0
    for line in file:
        tmp = line.strip().split(' ')
        for j in range(min(len(tmp),max_topicword)):
            if gensim.utils.to_unicode(tmp[j]) in model.vocab:
                topicmatrix[i,j,:] = model[gensim.utils.to_unicode(tmp[j])]
            else:
                miss = miss+1
        i= i+1
    print "miss word2vec", miss
    return topicmatrix

juliakreutzer / quetch / src / QUETCH.py View on Github

t2 = WMT15QETask2(languagePair2, "../WMT15-data/task2_"+languagePair+"_dev_comb", "../WMT15-data/task2_"+languagePair+"_train_comb", targetWindowSize=targetWindowSize, sourceWindowSize=sourceWindowSize, featureIndices=featureIndices, alignments=s2tAlignments, badWeight=badweight, lowercase=lowerCase, full=full)

		
		
		contextSize = t2.contextSize
		print "... context size", contextSize

		#print t2.wordDictionary
		vocabularySize = len(t2.wordDictionary)

		#load pretrained gensim word2vec model
		params = None
		if pretrainedModel is not None:
			print "... Loading pretrained model from file", pretrainedModel
			try:
				model = gensim.models.word2vec.Word2Vec.load(pretrainedModel)
				#print model["computer"]
				lc = False
				if ".lc." in pretrainedModel:
					lc = True
					print "... lowercasing"
					#construct initial lookup table from pretrained model
				params = constructLT(model,t2.wordDictionary,d_wrd,lc)
			except AttributeError: #full model, not only LT pretrained
				params = loadParams(pretrainedModel)



		#"translate" language pair notation for task 2
		languagePair2 = languagePair.upper().replace("-","_")

		#get instance vectors and binary labels for training

OFAI / million-post-corpus / experiments / src / evaluate_lstm.py View on Github

def evaluate(cat, fold, txt_train, txt_test, y_train, y_test):
    pool = multiprocessing.Pool()
    wordlists_train = pool.map(preprocess, txt_train)
    wordlists_test = pool.map(preprocess, txt_test)
    pool.close()
    pool.join()

    emb = Word2Vec.load(os.path.join(conf.W2V_DIR, 'model'))
    # add point at orign for unknown words
    emb.wv.syn0 = numpy.vstack((emb.wv.syn0,
        numpy.zeros(emb.wv.syn0.shape[1], dtype=numpy.float32)))

    # train data: replace words with embedding IDs, zero-padding and truncation
    X = numpy.zeros((len(y_train), conf.LSTM_MAXPOSTLEN), dtype=numpy.int32)
    X_lengths = numpy.zeros((len(y_train)))
    for i, words in enumerate(wordlists_train):
        X_lengths[i] = len(words)
        for j, w in enumerate(words):
            if j >= conf.LSTM_MAXPOSTLEN:
                break
            if w in emb:
                X[i,j] = emb.vocab[w].index
            else:
                X[i,j] = len(emb.vocab)

gkeglevich / word-embedding-visualizer / word_to_vec.py View on Github

print("We're gonna train the model now...")
    vec_model.build_vocab(sentences)

    # Pass in all of the necessary training variables
    vec_model.train(
        sentences,
        total_examples = vec_model.corpus_count,
        epochs = vec_model.iter
    )

    if not os.path.exists("trained"):
        os.makedirs("trained")

    vec_model.save(os.path.join("trained", "trained_model.w2v"))

    vec_model = w2v.Word2Vec.load(os.path.join("trained", "trained_model.w2v"))
    print("We're just gonna compress the dimensions... hang tight!")
    
    # Compress the words into a 2d Vector Space using t-distributed stochastic neighbour embedding
    tsne = sklearn.manifold.TSNE(n_components=2, random_state=0)

    all_word_vectors_matrix = vec_model.wv.syn0

    all_word_vectors_matrix_2d = tsne.fit_transform(all_word_vectors_matrix)

    points = pd.DataFrame(
        [
            (word, coords[0], coords[1])
            for word, coords in [
                (word, all_word_vectors_matrix_2d[vec_model.wv.vocab[word].index])
                for word in vec_model.wv.vocab
            ]

deepmipt / DeepPavlov / deeppavlov / models / vectorizers / word2vec_vectorizer.py View on Github

def load(self) -> None:
        """Load Word2vec model"""
        logger.info("Loading word2vec model from {}".format(self.load_path))
        self.model = Word2Vec.load(self.load_path)

sheffieldnlp / stance-semeval2016 / word2vec_integration.py View on Github

def applyWord2VecModel(modelname):
    model = word2vec.Word2Vec.load(modelname)
    for key in KEYWORDS_LONG['trump']:
        print("\n", key)
        for res in model.most_similar(key, topn=60):
            print(res)

palmagro / gg2vec / node2vec.py View on Github

else:
            sents = self.sentences_array
        self.path = self.path +str(it)+".npy"
        print "Learning:" + self.path
        print "CCCC!"
        if not os.path.exists(self.path):
            print "Entra"
            entrada = []
            results = Parallel(n_jobs=num_cores, backend="threading")(delayed(generate_sample)(self.mode,sents,self.degree,self.w_size,i) for i in range(1,self.ns))
            for r in results:
                entrada.append(r) 
            self.w2v = word2vec.Word2Vec(entrada, size=self.ndim, window=self.w_size, min_count=1, workers=num_cores,sg=0) 
            self.w2v.save(self.path)
            print "TERMINO"   
        else:
            self.w2v = word2vec.Word2Vec.load(self.path)  
        self.get_nodes()
        self.get_rels([])
        self.delete_props()

idio / wiki2vec / resources / gensim / convert_model.py View on Github

def convert_model(prefix):
    ln.info("loading model")
    w2v = Word2Vec.load(prefix)

    ln.info("saving dict...")
    dict_file = prefix + ".wordids.txt"
    with open(dict_file, "w") as f:
        for word, voc_obj in w2v.vocab.items():
            f.write((u"%s\t%s\n" % (word, voc_obj.index)).encode("UTF-8"))

    ln.info("saving weights as csv...")
    weights_file = prefix+".syn0.csv"
    np.savetxt(weights_file, w2v.syn0, delimiter=",", header="%s\n%s" % w2v.syn0.shape)

    ln.info("all done. Saved converted model files: %s and %s." % (weights_file, dict_file))

superhy / graph-mind / src / word_seg / word2vec / wordVecOpt.py View on Github

def loadModelfromFile(self, modelFilePath):
        '''
        load model from disk which is already existed
        can continue training with the loaded model (need more test)
        '''
        return Word2Vec.load(modelFilePath)

How to use the gensim.models.word2vec.Word2Vec.load function in gensim

To help you get started, we’ve selected a few gensim examples, based on popular ways it is used in public projects.

gensim

Package Health Score

Popular gensim functions

Similar packages