Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main(path, coef, top):
documents_with_text = dict()
documents_without_text = dict()
documents_zero_abs = dict()
documents_empty_body = dict()
cnt = 0
collection = Collection('Cord19AbstractCollection', path)
articles = collection.__next__()
# iterate through raw collection
for (i, d) in enumerate(articles):
article = Cord19Article(d.raw)
# documents with empty abstract
if len(article.abstract()) == 0:
documents_zero_abs.setdefault(article.cord_uid(), [])
documents_zero_abs[article.cord_uid()].append(article.metadata()["doi"])
else:
# document does not have text
if not article.is_full_text():
documents_without_text.setdefault(article.cord_uid(), [])
documents_without_text[article.cord_uid()].append(article.metadata()["doi"])
documents_without_text[article.cord_uid()].append(len(article.title()))
documents_without_text[article.cord_uid()].append(len(article.abstract()))
# document whose text body is empty
elif len(article.body()) == 0:
documents_empty_body.setdefault(article.cord_uid(), [])
documents_empty_body[article.cord_uid()].append(article.metadata()["doi"])
# normal document and we save for analysis later