Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def convert_to_wordnet_pos(senseval_pos):
if senseval_pos == 'VERB':
return wn.VERB
elif senseval_pos == 'NOUN':
return wn.NOUN
elif senseval_pos == 'ADV':
return wn.ADV
elif senseval_pos == 'ADJ':
return wn.ADJ
else:
return None
:param y: the second argument
:return: Whether they are aligned
"""
global nlp
# Allow partial matching
if fuzz.partial_ratio(' ' + x + ' ', ' ' + y + ' ') == 100:
return True
x_words = [w for w in x.split() if not nlp.is_stop(w)]
y_words = [w for w in y.split() if not nlp.is_stop(w)]
if len(x_words) == 0 or len(y_words) == 0:
return False
x_synonyms = [set([lemma.lower().replace('_', ' ') for synset in wn.synsets(w) for lemma in synset.lemma_names()])
for w in x_words]
y_synonyms = [set([lemma.lower().replace('_', ' ') for synset in wn.synsets(w) for lemma in synset.lemma_names()])
for w in y_words]
# One word - check whether there is intersection between synsets
if len(x_synonyms) == 1 and len(y_synonyms) == 1 and \
len([w for w in x_synonyms[0].intersection(y_synonyms[0]) if not nlp.is_stop(w)]) > 0:
return True
# More than one word - align words from x with words from y
intersections = [len([w for w in s1.intersection(s2) if not nlp.is_stop(w)])
for s1 in x_synonyms for s2 in y_synonyms]
if len([intersection_len for intersection_len in intersections if intersection_len > 0]) >= \
0.75 * max(len(x_synonyms), len(y_synonyms)):
return True
def create(cls, word1, word2):
""" Returns a new class instance if word1 and word2 are valid
words else None is returned.
"""
# do some checking before creating a _WordPair instance
if word1 in TermFrequency._FILTER or word2 in TermFrequency._FILTER:
return None
word1=wordnet.morphy(word1, wordnet.NOUN)
word2=wordnet.morphy(word2, wordnet.NOUN)
if word1 is None or word2 is None:
return None
# return the new instance
return cls(word1, word2)
def getWordnetPos(tag):
if tag.startswith('J'):
return wordnet.ADJ
elif tag.startswith('V'):
return wordnet.VERB
elif tag.startswith('N'):
return wordnet.NOUN
elif tag.startswith('R'):
return wordnet.ADV
elif tag.startswith('S'):
return wordnet.ADJ
else:
return None
res = []
def __init__(self):
self.synsets_list = list(wn.all_synsets())
self.synset_to_id = { s:s.offset for s in self.synsets_list }
self.brown_ic = wordnet_ic.ic('ic-brown.dat')
self.sem_hub = read_json_file("semantic-hub.txt")
self.sem_hub = {data['offset']:data for data in self.sem_hub}
def synsets_mapping(self, term):
return wn.synsets(term, pos=wn.NOUN)
def penn_to_wn_tags(pos_tag):
if pos_tag.startswith('J'):
return wn.ADJ
elif pos_tag.startswith('V'):
return wn.VERB
elif pos_tag.startswith('N'):
return wn.NOUN
elif pos_tag.startswith('R'):
return wn.ADV
else:
return None
from nltk.corpus import wordnet as wn
all_nouns = list(wn.all_synsets('n'))
import numpy as np
# get mapping of synset id to index
id2index = {}
for i in range(len(all_nouns)):
id2index[all_nouns[i].name()] = i
# get hypernym relations
hypernyms = []
for synset in all_nouns:
for h in synset.hypernyms() + synset.instance_hypernyms():
hypernyms.append([id2index[synset.name()], id2index[h.name()]])
hypernyms = np.array(hypernyms)
# save hypernyms
import h5py
syn2 = self._synsets.get(term2)
if all(syn is None for syn in (syn1, syn2)):
syn1, syn2 = self._get_synsets(term1, term2)
# If one/both synset/s is/are not found in WordNet. If it's not found, its
# value is None, otherwise, a Synset object.
if syn1 is None or syn2 is None:
if syn1 is not None:
self._synsets[term1] = syn1
if syn2 is not None:
self._synsets[term2] = syn2
return 0
score = wn.wup_similarity(syn1, syn2)
if score is None:
score = 0
self._synset_pairs[sorted_terms] = score
return score