Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#Create co-occurrence matrix
if tag not in self.tagsToDiscard:
word_windows = list(range( max(0, len(block_of_word_obj)-windowsSize), len(block_of_word_obj) ))
for w in word_windows:
if block_of_word_obj[w][0] not in self.tagsToDiscard:
self.addCooccur(block_of_word_obj[w][2], term_obj)
#Generate candidate keyphrase list
candidate = [ (tag, word, term_obj) ]
cand = composed_word(candidate)
self.addOrUpdateComposedWord(cand)
word_windows = list(range( max(0, len(block_of_word_obj)-(n-1)), len(block_of_word_obj) ))[::-1]
for w in word_windows:
candidate.append(block_of_word_obj[w])
self.freq_ns[len(candidate)] += 1.
cand = composed_word(candidate[::-1])
self.addOrUpdateComposedWord(cand)
# Add term to the block of words' buffer
block_of_word_obj.append( (tag, word, term_obj) )
if len(block_of_word_obj) > 0:
sentence_obj_aux.append( block_of_word_obj )
if len(sentence_obj_aux) > 0:
self.sentences_obj.append(sentence_obj_aux)
if len(block_of_word_obj) > 0:
sentence_obj_aux.append( block_of_word_obj )
if len(sentence_obj_aux) > 0:
self.sentences_obj.append(sentence_obj_aux)
def build_candidate(self, candidate_string):
sentences_str = [w for w in split_contractions(web_tokenizer(candidate_string.lower())) if not (w.startswith("'") and len(w) > 1) and len(w) > 0]
candidate_terms = []
for (i, word) in enumerate(sentences_str):
tag = self.getTag(word, i)
term_obj = self.getTerm(word, save_non_seen=False)
if term_obj.tf == 0:
term_obj = None
candidate_terms.append( (tag, word, term_obj) )
if len([cand for cand in candidate_terms if cand[2] != None]) == 0:
invalid_virtual_cand = composed_word(None)
return invalid_virtual_cand
virtual_cand = composed_word(candidate_terms)
return virtual_cand
block_of_word_obj = []
else:
tag = self.getTag(word, pos_sent)
term_obj = self.getTerm(word)
term_obj.addOccur(tag, sentence_id, pos_sent, pos_text)
pos_text += 1
#Create co-occurrence matrix
if tag not in self.tagsToDiscard:
word_windows = list(range( max(0, len(block_of_word_obj)-windowsSize), len(block_of_word_obj) ))
for w in word_windows:
if block_of_word_obj[w][0] not in self.tagsToDiscard:
self.addCooccur(block_of_word_obj[w][2], term_obj)
#Generate candidate keyphrase list
candidate = [ (tag, word, term_obj) ]
cand = composed_word(candidate)
self.addOrUpdateComposedWord(cand)
word_windows = list(range( max(0, len(block_of_word_obj)-(n-1)), len(block_of_word_obj) ))[::-1]
for w in word_windows:
candidate.append(block_of_word_obj[w])
self.freq_ns[len(candidate)] += 1.
cand = composed_word(candidate[::-1])
self.addOrUpdateComposedWord(cand)
# Add term to the block of words' buffer
block_of_word_obj.append( (tag, word, term_obj) )
if len(block_of_word_obj) > 0:
sentence_obj_aux.append( block_of_word_obj )
if len(sentence_obj_aux) > 0:
self.sentences_obj.append(sentence_obj_aux)
def build_candidate(self, candidate_string):
sentences_str = [w for w in split_contractions(web_tokenizer(candidate_string.lower())) if not (w.startswith("'") and len(w) > 1) and len(w) > 0]
candidate_terms = []
for (i, word) in enumerate(sentences_str):
tag = self.getTag(word, i)
term_obj = self.getTerm(word, save_non_seen=False)
if term_obj.tf == 0:
term_obj = None
candidate_terms.append( (tag, word, term_obj) )
if len([cand for cand in candidate_terms if cand[2] != None]) == 0:
invalid_virtual_cand = composed_word(None)
return invalid_virtual_cand
virtual_cand = composed_word(candidate_terms)
return virtual_cand