Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""Return a parse call to a record without spalsh page url."""
spider = brown_spider.BrownSpider()
body = """
{
"items": {
"docs": [
{
"json_uri": "https://repository.library.brown.edu/api/pub/items/bdr:11303/"
}
]
}
}
"""
response = fake_response_from_string(body)
jsonresponse = json.loads(response.body_as_unicode())
jsonrecord = jsonresponse["items"]["docs"][0]
response.meta["jsonrecord"] = jsonrecord
parsed_item = spider.parse(response).next()
assert parsed_item
assert parsed_item.record
return parsed_item.record
body = """
<article xmlns:xlink="http://www.w3.org/1999/xlink">
<title>References</title>
<label>5.</label>R.V. Krishnan, G. Panneerselvam, P. Manikandan M.P. Antony, K. Nagarajan, <source>J. Nucl. Radiochem. Sci., 10.1, 19–26 (2009).
<label>44.</label>L. Cronin, P. Sojka, A. Lefebvre, <source>SAE Technical Paper, DOI: 10.4271/852086, (1985)
<label>3.</label>T. Aliyev, Т. Belyaev, S. Gallagher Simulation in ANSYS flow to the gas purification section of the multicomponent gas mixture through the dust cyclone CKBN GP-628. <source>Mechanical engineering, Moscow, №10, (2014).
</article>
"""
response = fake_response_from_string(body)
node = get_node(spider, "//article", response)[0]
parsed_item = spider.parse_node(response, node)
assert parsed_item
assert parsed_item.record
return parsed_item.record
def test_not_published_record():
"""Not-published paper should result in nothing."""
spider = iop_spider.IOPSpider()
body = """
<article>
2015
03
</article>
"""
response = fake_response_from_string(body)
node = get_node(spider, "Article", response)
spider.pdf_files = get_test_suite_path(
'responses',
'iop',
'pdf',
)
records = spider.parse_node(response, node)
assert records is None
spider = edp_spider.EDPSpider()
body = """
<article xmlns:xlink="http://www.w3.org/1999/xlink">
aa14485-102010A%26A...516A..97N
Dielectronic recombination of argon-like ions
</article>
"""
response = fake_response_from_string(body)
node = get_node(spider, "//article", response)[0]
parsed_item = spider.parse_node(response, node)
assert parsed_item
assert parsed_item.record
record = parsed_item.record
assert "related_article_doi" in record
assert record["related_article_doi"][0][
"value"] == "10.1051/0004-6361/201014485"
"""Test MSc thesis skipping.
Return a HEPrecord for a Master's thesis (should be None as we don't
want them)."""
spider = infn_spider.InfnSpider()
body = """
Tipo
Magister
"""
response = fake_response_from_string(body)
record = spider.scrape_splash(response)
assert record is None
def non_thesis():
"""Return a heprecord for a Master's thesis (should be None as we don't
want them)."""
spider = mit_spider.MITSpider()
body = """
dc.description.degree
M.Sc.
en_US
"""
response = fake_response_from_string(body)
return spider.build_item(response)
def package_rich(tarbzfile):
"""Extract tar.gz package with 'rich' XML file."""
spider = edp_spider.EDPSpider()
response = fake_response_from_string(text="", url="file://" + tarbzfile)
return next(spider.handle_package_file(response))
def test_collections_review():
"""Test collections when doctype is review. JATS format."""
spider = edp_spider.EDPSpider()
body = """
<article xmlns:xlink="http://www.w3.org/1999/xlink">
</article>
"""
response = fake_response_from_string(body)
node = get_node(spider, "//article", response)[0]
parsed_item = spider.parse_node(response, node)
assert parsed_item
assert parsed_item.record
record = parsed_item.record
assert "collections" in record
assert record["collections"] == [{'primary': 'HEP'}, {'primary': 'Review'}]
"""Parse the node in the listing without author, date, or url. Should
take straight to `build_item` and build the HEPRecord.
"""
spider = magic_spider.MagicSpider()
body = """
<table style="margin-left: 20px; width: 920px;" class="list">
<tbody><tr class="odd">
<td><a>Limits to the violation of...</a></td>
</tr>
</tbody></table>
"""
response = fake_response_from_string(body)
node = get_node(spider, spider.itertag, text=body)
parsed_item = spider.parse_node(response, node).next()
assert parsed_item
assert parsed_item.record
record = parsed_item.record
assert isinstance(record, hepcrawl.items.HEPRecord)
assert "date" not in record
assert "authors" not in record
def test_no_aff():
"""Test the result of calling `scrape_for_pdf` without author
affiliation. Should be a HEPRecord."""
spider = magic_spider.MagicSpider()
body = """
<div id="content">
<h3 class="pub_title">Limits to the violation of Lorentz...</h3>
<p class="author">Daniel Garrido Terrats</p>
</div>
"""
response = fake_response_from_string(body)
parsed_item = spider.scrape_for_pdf(response).next()
assert parsed_item
assert parsed_item.record
record = parsed_item.record
assert isinstance(record, hepcrawl.items.HEPRecord)
assert "date" not in record
assert "affiliations" not in record["authors"]