Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@GateNlpPr
def run(doc, **kwargs):
set1 = doc.get_annotations("PythonTokenizeFunc")
set1.clear()
text = doc.text
whitespaces = [m for m in re.finditer(r"[\s,.!?]+|^[\s,.!?]*|[\s,.!?]*$",text)]
for k in range(len(whitespaces)-1):
fromoff=whitespaces[k].end()
tooff=whitespaces[k+1].start()
set1.add(fromoff, tooff, "Token", {"tokennr": k})
doc.set_feature("nr_tokens", len(whitespaces)-1)
@GateNlpPr
def run(doc, **kwargs):
print("We are running on a doc! kwargs={}".format(kwargs), file=sys.stderr)
helpermodule.helperfunc()
set1 = doc.get_annotations("PythonModifyFunc")
set1.clear()
set1.add(1,4,"Type1",{"f1":12, "f2": "val2"})
doc.set_feature("FEAT", "VAL")
doc.clear_features()
doc.set_feature("feat1", 12)
doc.set_feature("feat2", "asdf")
doc.set_feature("feat1", 13)
print("changelog", doc.changelog)
def __call__(self, doc, **kwargs):
# print("PROCESSING: ",doc.get_feature("gate.plugin.python.docName"))
outset = ""
if "outputAnnotationSet" in kwargs:
outset = kwargs.get("outputAnnotationSet")
annset = doc.get_annotations(outset)
annset.clear()
apply_nlp(self.nlp, doc, setname=outset)
self.tokens_total += len(doc)
self.nr_docs += 1
return doc