Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _handle_lexunit_elt(self, elt, ignorekeys):
"""
Load full info for a lexical unit from its xml file.
This should only be called when accessing corpus annotations
(which are not included in frame files).
"""
luinfo = self._load_xml_attributes(AttrDict(), elt)
luinfo['_type'] = 'lu'
luinfo['definition'] = ""
luinfo['subCorpus'] = PrettyList()
luinfo['lexemes'] = PrettyList() # multiword LUs have multiple lexemes
luinfo['semTypes'] = PrettyList() # an LU can have multiple semtypes
for k in ignorekeys:
if k in luinfo:
del luinfo[k]
for sub in elt:
if sub.tag.endswith('header'):
continue # not used
elif sub.tag.endswith('valences'):
continue # not used
elif sub.tag.endswith('definition') and 'definition' not in ignorekeys:
luinfo['definition'] = self._strip_tags(sub.text)
elif sub.tag.endswith('subCorpus') and 'subCorpus' not in ignorekeys:
sc = self._handle_lusubcorpus_elt(sub)
if sc is not None:
rels = self._frel_idx.values()
# filter by 'frame2'
if frame2 is not None:
if frame is None:
raise FramenetError("frame_relations(frame=None, frame2=) is not allowed")
if not isinstance(frame2, int):
if isinstance(frame2, dict):
frame2 = frame2.ID
else:
frame2 = self.frame_by_name(frame2).ID
if frame==frame2:
raise FramenetError("The two frame arguments to frame_relations() must be different frames")
rels = [rel for rel in rels if rel.superFrame.ID==frame2 or rel.subFrame.ID==frame2]
return PrettyList(sorted(rels,
key=lambda frel: (frel.type.ID, frel.superFrameName, frel.subFrameName)))
frinfo['semTypes'] = []
for k in ignorekeys:
if k in frinfo:
del frinfo[k]
for sub in elt:
if sub.tag.endswith('definition') and 'definition' not in ignorekeys:
frinfo['definition'] = self._strip_tags(sub.text)
elif sub.tag.endswith('FE') and 'FE' not in ignorekeys:
feinfo = self._handle_fe_elt(sub)
frinfo['FE'][feinfo.name] = feinfo
feinfo['frame'] = frinfo # backpointer
elif sub.tag.endswith('FEcoreSet') and 'FEcoreSet' not in ignorekeys:
coreset = self._handle_fecoreset_elt(sub)
# assumes all FEs have been loaded before coresets
frinfo['FEcoreSets'].append(PrettyList(frinfo['FE'][fe.name] for fe in coreset))
elif sub.tag.endswith('lexUnit') and 'lexUnit' not in ignorekeys:
luentry = self._handle_framelexunit_elt(sub)
if luentry['status'] in self._bad_statuses:
# problematic LU entry; ignore it
continue
luentry['frame'] = frinfo
luentry['subCorpus'] = Future((lambda lu: lambda: self._lu_file(lu))(luentry))
frinfo['lexUnit'][luentry.name] = luentry
if not self._lu_idx:
self._buildluindex()
self._lu_idx[luentry.ID] = luentry
elif sub.tag.endswith('semType') and 'semTypes' not in ignorekeys:
semtypeinfo = self._load_xml_attributes(AttrDict(), sub)
frinfo['semTypes'].append(self.semtype(semtypeinfo.ID))
frinfo['frameRelations'] = self.frame_relations(frame=frinfo)
scon - subordinating conjunction
:type name: str
:return: A list of selected (or all) lexical units
:rtype: list of LU objects (dicts). See the lu() function for info
about the specifics of LU objects.
"""
try:
luIDs = list(self._lu_idx.keys())
except AttributeError:
self._buildluindex()
luIDs = list(self._lu_idx.keys())
if name is not None:
return PrettyList(self.lu(luID) for luID,luName in self.lu_ids_and_names(name).items())
else:
return PrettyLazyMap(self.lu, luIDs)
def _buildrelationindex(self):
#print('building relation index...', file=sys.stderr)
freltypes = PrettyList(x for x in XMLCorpusView(self.abspath("frRelation.xml"),
'frameRelations/frameRelationType',
self._handle_framerelationtype_elt))
self._freltyp_idx = {}
self._frel_idx = {}
self._frel_f_idx = defaultdict(set)
self._ferel_idx = {}
for freltyp in freltypes:
self._freltyp_idx[freltyp.ID] = freltyp
for frel in freltyp.frameRelations:
supF = frel.superFrame = frel[freltyp.superFrameName] = Future((lambda fID: lambda: self.frame_by_id(fID))(frel.supID))
subF = frel.subFrame = frel[freltyp.subFrameName] = Future((lambda fID: lambda: self.frame_by_id(fID))(frel.subID))
self._frel_idx[frel.ID] = frel
self._frel_f_idx[frel.supID].add(frel.ID)
self._frel_f_idx[frel.subID].add(frel.ID)
for ferel in frel.feRelations:
def _handle_semtype_elt(self, elt, tagspec=None):
semt = self._load_xml_attributes(AttrDict(), elt)
semt['_type'] = 'semtype'
semt['superType'] = None
semt['subTypes'] = PrettyList()
for sub in elt:
if sub.text is not None:
semt['definition'] = self._strip_tags(sub.text)
else:
supertypeinfo = self._load_xml_attributes(AttrDict(), sub)
semt['superType'] = supertypeinfo
# the supertype may not have been loaded yet
return semt
the "lemma" part can be made up of either a single lexeme
(e.g. 'run') or multiple lexemes (e.g. 'a little').
Note: if you are going to be doing a lot of this type of
searching, you'd want to build an index that maps from lemmas to
frames because each time frames_by_lemma() is called, it has to
search through ALL of the frame XML files in the db.
>>> from nltk.corpus import framenet as fn
>>> fn.frames_by_lemma(r'(?i)a little')
[, ]
:return: A list of frame objects.
:rtype: list(AttrDict)
"""
return PrettyList(f for f in self.frames() if any(re.search(pat, luName) for luName in f.lexUnit))
relation_type = type
if not self._frel_idx:
self._buildrelationindex()
rels = None
if relation_type is not None:
if not isinstance(relation_type, dict):
type = [rt for rt in self.frame_relation_types() if rt.name==type][0]
assert isinstance(type,dict)
# lookup by 'frame'
if frame is not None:
if isinstance(frame,dict) and 'frameRelations' in frame:
rels = PrettyList(frame.frameRelations)
else:
if not isinstance(frame, int):
if isinstance(frame, dict):
frame = frame.ID
else:
frame = self.frame_by_name(frame).ID
rels = [self._frel_idx[frelID] for frelID in self._frel_f_idx[frame]]
# filter by 'type'
if type is not None:
rels = [rel for rel in rels if rel.type is type]
elif type is not None:
# lookup by 'type'
rels = type.frameRelations
else:
rels = self._frel_idx.values()
def _handle_framelexunit_elt(self, elt):
"""Load the lexical unit info from an xml element in a frame's xml file."""
luinfo = AttrDict()
luinfo['_type'] = 'lu'
luinfo = self._load_xml_attributes(luinfo, elt)
luinfo["definition"] = ""
luinfo["sentenceCount"] = PrettyDict()
luinfo['lexemes'] = PrettyList() # multiword LUs have multiple lexemes
luinfo['semTypes'] = PrettyList() # an LU can have multiple semtypes
for sub in elt:
if sub.tag.endswith('definition'):
luinfo['definition'] = self._strip_tags(sub.text)
elif sub.tag.endswith('sentenceCount'):
luinfo['sentenceCount'] = self._load_xml_attributes(
PrettyDict(), sub)
elif sub.tag.endswith('lexeme'):
luinfo['lexemes'].append(self._load_xml_attributes(PrettyDict(), sub))
elif sub.tag.endswith('semType'):
semtypeinfo = self._load_xml_attributes(PrettyDict(), sub)
luinfo['semTypes'].append(self.semtype(semtypeinfo.ID))
return luinfo
def _handle_framerelationtype_elt(self, elt, *args):
"""Load frame-relation element and its child fe-relation elements from frRelation.xml."""
info = self._load_xml_attributes(AttrDict(), elt)
info['_type'] = 'framerelationtype'
info['frameRelations'] = PrettyList()
for sub in elt:
if sub.tag.endswith('frameRelation'):
frel = self._handle_framerelation_elt(sub)
frel['type'] = info # backpointer
for ferel in frel.feRelations:
ferel['type'] = info
info['frameRelations'].append(frel)
return info