Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_wsp_ftp_crawl_twice(expected_results, settings, cleanup):
crawler = get_crawler_instance(
settings.get('CRAWLER_HOST_URL'),
)
crawl_results = CeleryMonitor.do_crawl(
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=20,
events_limit=1,
crawler_instance=crawler,
project=settings.get('CRAWLER_PROJECT'),
spider='WSP',
settings={},
**settings.get('CRAWLER_ARGUMENTS')
)
assert len(crawl_results) == 1
crawl_result = crawl_results[0]
gotten_results = [
def test_arxiv(
expected_results,
config,
spider,
):
crawler = get_crawler_instance(config['CRAWLER_HOST_URL'])
crawl_results = CeleryMonitor.do_crawl(
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=1,
crawler_instance=crawler,
project=config['CRAWLER_PROJECT'],
spider=spider,
settings={},
**config['CRAWLER_ARGUMENTS']
)
assert len(crawl_results) == 1
crawl_result = crawl_results[0]
gotten_results = [
def test_pos_conference_paper_record_and_proceedings_record(
expected_results,
config,
):
crawler = get_crawler_instance(config['CRAWLER_HOST_URL'])
crawl_results = CeleryMonitor.do_crawl(
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=2,
crawler_instance=crawler,
project=config['CRAWLER_PROJECT'],
spider='pos',
settings={},
**config['CRAWLER_ARGUMENTS']
)
assert len(crawl_results) == 1
crawl_result = crawl_results[0]
gotten_results = [
def test_wsp(expected_results, settings, cleanup):
crawler = get_crawler_instance(
settings.get('CRAWLER_HOST_URL'),
)
crawl_results = CeleryMonitor.do_crawl(
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=1,
crawler_instance=crawler,
project=settings.get('CRAWLER_PROJECT'),
spider='WSP',
settings={},
**settings.get('CRAWLER_ARGUMENTS')
)
assert len(crawl_results) == 1
crawl_result = crawl_results[0]
gotten_results = [
def test_desy_crawl_twice(expected_results, settings, cleanup):
crawler = get_crawler_instance(
settings.get('CRAWLER_HOST_URL')
)
crawl_results = CeleryMonitor.do_crawl(
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=1,
crawler_instance=crawler,
project=settings.get('CRAWLER_PROJECT'),
spider='desy',
settings={},
**settings.get('CRAWLER_ARGUMENTS')
)
assert len(crawl_results) == 1
crawl_result = crawl_results[0]
gotten_records = [
def test_cds(
expected_results,
config,
spider,
):
crawler = get_crawler_instance(config['CRAWLER_HOST_URL'])
crawl_results = CeleryMonitor.do_crawl(
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=1,
crawler_instance=crawler,
project=config['CRAWLER_PROJECT'],
spider=spider,
settings={},
**config['CRAWLER_ARGUMENTS']
)
assert len(crawl_results) == 1
crawl_result = crawl_results[0]
gotten_results = [
def test_desy(
expected_results,
settings,
cleanup,
):
crawler = get_crawler_instance(
settings.get('CRAWLER_HOST_URL')
)
crawl_results = CeleryMonitor.do_crawl(
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=1,
crawler_instance=crawler,
project=settings.get('CRAWLER_PROJECT'),
spider='desy',
settings={},
**settings.get('CRAWLER_ARGUMENTS')
)
crawl_result = crawl_results[0]
gotten_records = [
result['record'] for result in crawl_result['results_data']
]
def test_desy_broken_xml(get_local_settings_for_broken, cleanup):
settings = get_local_settings_for_broken
crawler = get_crawler_instance(
settings.get('CRAWLER_HOST_URL')
)
crawl_results = CeleryMonitor.do_crawl(
app=celery_app,
monitor_timeout=5,
monitor_iter_limit=100,
events_limit=2,
crawler_instance=crawler,
project=settings.get('CRAWLER_PROJECT'),
spider='desy',
settings={},
**settings.get('CRAWLER_ARGUMENTS')
)
crawl_result = crawl_results[0]
result_records = crawl_result['results_data']
assert not crawl_result['errors']
assert len(result_records) == 1
res = result_records[0]