Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _handle_lulayer_elt(self, elt):
"""Load a layer from an annotation set"""
layer = self._load_xml_attributes(AttrDict(), elt)
layer['_type'] = 'lulayer'
layer['label'] = []
for sub in elt:
if sub.tag.endswith('label'):
l = self._load_xml_attributes(AttrDict(), sub)
if l is not None:
layer['label'].append(l)
return layer
coreset = self._handle_fecoreset_elt(sub)
# assumes all FEs have been loaded before coresets
frinfo['FEcoreSets'].append(PrettyList(frinfo['FE'][fe.name] for fe in coreset))
elif sub.tag.endswith('lexUnit') and 'lexUnit' not in ignorekeys:
luentry = self._handle_framelexunit_elt(sub)
if luentry['status'] in self._bad_statuses:
# problematic LU entry; ignore it
continue
luentry['frame'] = frinfo
luentry['subCorpus'] = Future((lambda lu: lambda: self._lu_file(lu))(luentry))
frinfo['lexUnit'][luentry.name] = luentry
if not self._lu_idx:
self._buildluindex()
self._lu_idx[luentry.ID] = luentry
elif sub.tag.endswith('semType') and 'semTypes' not in ignorekeys:
semtypeinfo = self._load_xml_attributes(AttrDict(), sub)
frinfo['semTypes'].append(self.semtype(semtypeinfo.ID))
frinfo['frameRelations'] = self.frame_relations(frame=frinfo)
# resolve 'requires' and 'excludes' links between FEs of this frame
for fe in frinfo.FE.values():
if fe.requiresFE:
name, ID = fe.requiresFE.name, fe.requiresFE.ID
fe.requiresFE = frinfo.FE[name]
assert fe.requiresFE.ID==ID
if fe.excludesFE:
name, ID = fe.excludesFE.name, fe.excludesFE.ID
fe.excludesFE = frinfo.FE[name]
assert fe.excludesFE.ID==ID
return frinfo
def _loadsemtypes(self):
"""Create the semantic types index."""
self._semtypes = AttrDict()
semtypeXML = [x for x in XMLCorpusView(self.abspath("semTypes.xml"),
'semTypes/semType',
self._handle_semtype_elt)]
for st in semtypeXML:
n = st['name']
a = st['abbrev']
i = st['ID']
# Both name and abbrev should be able to retrieve the
# ID. The ID will retrieve the semantic type dict itself.
self._semtypes[n] = i
self._semtypes[a] = i
self._semtypes[i] = st
# now that all individual semtype XML is loaded, we can link them together
roots = []
for st in self.semtypes():
if st.superType:
def _handle_fulltextindex_elt(self, elt, tagspec=None):
"""
Extracts corpus/document info from the fulltextIndex.xml file.
Note that this function "flattens" the information contained
in each of the "corpus" elements, so that each "document"
element will contain attributes for the corpus and
corpusid. Also, each of the "document" items will contain a
new attribute called "filename" that is the base file name of
the xml file for the document in the "fulltext" subdir of the
Framenet corpus.
"""
ftinfo = self._load_xml_attributes(AttrDict(), elt)
corpname = ftinfo.name
corpid = ftinfo.ID
retlist = []
for sub in elt:
if sub.tag.endswith('document'):
doc = self._load_xml_attributes(AttrDict(), sub)
if 'name' in doc:
docname = doc.name
else:
docname = doc.description
doc.filename = "{0}__{1}.xml".format(corpname, docname)
doc.corpname = corpname
doc.corpid = corpid
retlist.append(doc)
return retlist
def _handle_elt(self, elt, tagspec=None):
"""Extracts and returns the attributes of the given element"""
return self._load_xml_attributes(AttrDict(), elt)
def _handle_framerelationtype_elt(self, elt, *args):
"""Load frame-relation element and its child fe-relation elements from frRelation.xml."""
info = self._load_xml_attributes(AttrDict(), elt)
info['_type'] = 'framerelationtype'
info['frameRelations'] = PrettyList()
for sub in elt:
if sub.tag.endswith('frameRelation'):
frel = self._handle_framerelation_elt(sub)
frel['type'] = info # backpointer
for ferel in frel.feRelations:
ferel['type'] = info
info['frameRelations'].append(frel)
return info
def _handle_fulltext_sentence_elt(self, elt):
"""Load information from the given 'sentence' element. Each
'sentence' element contains a "text" and an "annotationSet" sub
element."""
info = self._load_xml_attributes(AttrDict(), elt)
info['_type'] = "sentence"
info['annotationSet'] = []
info['text'] = ""
for sub in elt:
if sub.tag.endswith('text'):
info['text'] = self._strip_tags(sub.text)
elif sub.tag.endswith('annotationSet'):
a = self._handle_fulltextannotationset_elt(sub)
info['annotationSet'].append(a)
return info
def _handle_fulltextlayer_elt(self, elt):
"""Load information from the given 'layer' element. Each
'layer' contains several "label" elements."""
info = self._load_xml_attributes(AttrDict(), elt)
info['_type'] = 'layer'
info['label'] = []
for sub in elt:
if sub.tag.endswith('label'):
l = self._load_xml_attributes(AttrDict(), sub)
info['label'].append(l)
return info
def _handle_fulltextannotation_elt(self, elt):
"""Load full annotation info for a document from its xml
file. The main element (fullTextAnnotation) contains a 'header'
element (which we ignore here) and a bunch of 'sentence'
elements."""
info = AttrDict()
info['_type'] = 'fulltextannotation'
info['sentence'] = []
for sub in elt:
if sub.tag.endswith('header'):
continue # not used
elif sub.tag.endswith('sentence'):
s = self._handle_fulltext_sentence_elt(sub)
info['sentence'].append(s)
return info