Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
negs = self.vocab.get_negative_samples(n=self.CNTX_SPAN * 2, ignore_punct_and_num=True, stopwords=STOP_WORDS)
neg_cntx_vecs = [self.vocab.vec(self.vocab.index2word[x]) for x in negs]
neg_cntx = np.average(neg_cntx_vecs, axis=0)
self.cdb.add_context_vec(cui, neg_cntx, negative=True, cntx_type='MED',
inc_cui_count=False, lr=lr, anneal=True)
#### DEBUG ONLY ####
if self.DEBUG:
if cui in self.cdb.cui2context_vec and len(cntx_vecs) > 0:
if np.dot(unitvec(cntx), unitvec(self.cdb.cui2context_vec[cui])) < 0.01:
log.debug("SIMILARITY MED::::::::::::::::::::")
log.debug(words)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
if cui in self.cdb.cui2context_vec_short and len(cntx_vecs_short) > 0:
if np.dot(unitvec(cntx_short), unitvec(self.cdb.cui2context_vec_short[cui])) < 0.01:
log.debug("SIMILARITY SHORT::::::::::::::::::::")
log.debug(words_short)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx_short),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
cntx_vecs_short.append(self.vocab.vec(word) * weights_short[w_ind])
if len(cntx_vecs_short) > 0:
cntx_short = np.average(cntx_vecs_short, axis=0)
if len(cntx_vecs) > 0:
cntx = np.average(cntx_vecs, axis=0)
#### DEBUG ONLY ####
if self.DEBUG:
if cui in self.cdb.cui2context_vec and len(cntx_vecs) > 0:
log.debug("SIMILARITY MED::::::::::::::::::::")
log.debug(words)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
if cui in self.cdb.cui2context_vec_short and len(cntx_vecs_short) > 0:
log.debug("SIMILARITY SHORT::::::::::::::::::::")
log.debug(words_short)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx_short),
unitvec(self.cdb.cui2context_vec_short[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
#### END OF DEBUG ####
if cui in self.cdb.cui2context_vec and len(cntx_vecs) > 0:
sim = np.dot(unitvec(cntx), unitvec(self.cdb.cui2context_vec[cui]))
if np.dot(unitvec(cntx), unitvec(self.cdb.cui2context_vec[cui])) < 0.01:
log.debug("SIMILARITY MED::::::::::::::::::::")
log.debug(words)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
if cui in self.cdb.cui2context_vec_short and len(cntx_vecs_short) > 0:
if np.dot(unitvec(cntx_short), unitvec(self.cdb.cui2context_vec_short[cui])) < 0.01:
log.debug("SIMILARITY SHORT::::::::::::::::::::")
log.debug(words_short)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx_short),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
inc_cui_count=False, lr=lr, anneal=True)
#### DEBUG ONLY ####
if self.DEBUG:
if cui in self.cdb.cui2context_vec and len(cntx_vecs) > 0:
if np.dot(unitvec(cntx), unitvec(self.cdb.cui2context_vec[cui])) < 0.01:
log.debug("SIMILARITY MED::::::::::::::::::::")
log.debug(words)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
if cui in self.cdb.cui2context_vec_short and len(cntx_vecs_short) > 0:
if np.dot(unitvec(cntx_short), unitvec(self.cdb.cui2context_vec_short[cui])) < 0.01:
log.debug("SIMILARITY SHORT::::::::::::::::::::")
log.debug(words_short)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx_short),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
# Add only if probability and 'not' negative input
negs = self.vocab.get_negative_samples(n=self.CNTX_SPAN * 2, ignore_punct_and_num=True, stopwords=STOP_WORDS)
neg_cntx_vecs = [self.vocab.vec(self.vocab.index2word[x]) for x in negs]
neg_cntx = np.average(neg_cntx_vecs, axis=0)
self.cdb.add_context_vec(cui, neg_cntx, negative=True, cntx_type='MED',
inc_cui_count=False, lr=lr, anneal=True)
#### DEBUG ONLY ####
if self.DEBUG:
if cui in self.cdb.cui2context_vec and len(cntx_vecs) > 0:
if np.dot(unitvec(cntx), unitvec(self.cdb.cui2context_vec[cui])) < 0.01:
log.debug("SIMILARITY MED::::::::::::::::::::")
log.debug(words)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
if cui in self.cdb.cui2context_vec_short and len(cntx_vecs_short) > 0:
if np.dot(unitvec(cntx_short), unitvec(self.cdb.cui2context_vec_short[cui])) < 0.01:
log.debug("SIMILARITY SHORT::::::::::::::::::::")
log.debug(words_short)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx_short),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
if cui in self.cdb.cui2context_vec and len(cntx_vecs) > 0:
log.debug("SIMILARITY MED::::::::::::::::::::")
log.debug(words)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
if cui in self.cdb.cui2context_vec_short and len(cntx_vecs_short) > 0:
log.debug("SIMILARITY SHORT::::::::::::::::::::")
log.debug(words_short)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx_short),
unitvec(self.cdb.cui2context_vec_short[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
#### END OF DEBUG ####
if cui in self.cdb.cui2context_vec and len(cntx_vecs) > 0:
sim = np.dot(unitvec(cntx), unitvec(self.cdb.cui2context_vec[cui]))
if cui in self.cdb.cui2context_vec_short and len(cntx_vecs_short) > 0:
sim2 = np.dot(unitvec(cntx_short), unitvec(self.cdb.cui2context_vec_short[cui]))
if sim2 > 0 and abs(sim - sim2) > 0.1:
sim = (sim + sim2) / 2
if name is not None:
if cui in self.cdb.cui2pref_name:
if name == self.cdb.cui2pref_name[cui]:
sim = min(1, sim + 0.1)
return sim
else:
log.debug(np.dot(unitvec(cntx),
unitvec(self.cdb.cui2context_vec[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
if cui in self.cdb.cui2context_vec_short and len(cntx_vecs_short) > 0:
log.debug("SIMILARITY SHORT::::::::::::::::::::")
log.debug(words_short)
log.debug(cui)
log.debug(tkns)
log.debug(np.dot(unitvec(cntx_short),
unitvec(self.cdb.cui2context_vec_short[cui])))
log.debug(":::::::::::::::::::::::::::::::::::\n")
#### END OF DEBUG ####
if cui in self.cdb.cui2context_vec and len(cntx_vecs) > 0:
sim = np.dot(unitvec(cntx), unitvec(self.cdb.cui2context_vec[cui]))
if cui in self.cdb.cui2context_vec_short and len(cntx_vecs_short) > 0:
sim2 = np.dot(unitvec(cntx_short), unitvec(self.cdb.cui2context_vec_short[cui]))
if sim2 > 0 and abs(sim - sim2) > 0.1:
sim = (sim + sim2) / 2
if name is not None:
if cui in self.cdb.cui2pref_name:
if name == self.cdb.cui2pref_name[cui]:
sim = min(1, sim + 0.1)
return sim
else:
return -1