Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self["id"] = wiki_document.id,
self["url"] = wiki_document.url
self["text"] = wiki_document.text
def setAnnotations(self, annotations):
self["annotations"] = annotations
def __str__(self):
return json.dumps(self) + "\n"
"""
An extended version of the WikiExtrator. Output is in JSON format and annotations
are added for links in the article. See README.md for more information about the
JSON format.
"""
class AnnotatedWikiExtractor (wikiextractor.WikiExtractor):
def __init__(self):
wikiextractor.prefix = ''
wikiextractor.WikiExtractor.__init__(self)
def extract(self, wiki_document):
annotations = []
#Extract the article using the general WikiExtractor:
wiki_document = wikiextractor.WikiExtractor.extract(self, wiki_document)
if not wiki_document: return None
#This int is used to keep track of the difference between the original article with <a href="..">
#links and the new article that only contains the label of the link.
deltaStringLength = 0
</a>
self["id"] = wiki_document.id,
self["url"] = wiki_document.url
self["text"] = wiki_document.text
def setAnnotations(self, annotations):
self["annotations"] = annotations
def __str__(self):
return json.dumps(self) + "\n"
"""
An extended version of the WikiExtrator. Output is in JSON format and annotations
are added for links in the article. See README.md for more information about the
JSON format.
"""
class AnnotatedWikiExtractor (wikiextractor.WikiExtractor):
def __init__(self):
wikiextractor.prefix = 'http://en.wikipedia.org/wiki/'
wikiextractor.WikiExtractor.__init__(self)
def extract(self, wiki_document):
annotations = []
#Extract the article using the general WikiExtractor:
wiki_document = wikiextractor.WikiExtractor.extract(self, wiki_document)
if not wiki_document: return None
#This int is used to keep track of the difference between the original article with <a href="..">
#links and the new article that only contains the label of the link.
deltaStringLength = 0
</a>