Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def documents_from_file(filename):
with (bz2.open(filename, 'rt', encoding="UTF-8")) as f:
for line in f:
try:
yield Document.from_json(json.loads(line))
except ValueError as e:
logging.warning("Error while reading document (%s); skipping", e)
def from_json(cls, json_doc):
doc_id = json_doc["docId"]
pages = [Page.from_json(p) for p in json_doc.get("pages", [])]
return Document(doc_id=doc_id, pages=pages)