Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_pipeline_report_limit_in_range(self):
filepath = os.path.join(self.data_dir, 'report_limit_structure.csv')
options = {}
validator = Pipeline(filepath, processors=('structure',),
report_limit=1, options=options)
result, report = validator.run()
self.assertEqual(len(report.generate()['results']), 1)
def test_messytables_source_two(self):
data = 'https://raw.githubusercontent.com/okfn/messytables/master/horror/utf-16le_encoded.csv'
pipeline = Pipeline(data)
result, report = pipeline.run()
self.assertTrue(pipeline.data)
def test_pipeline_row_limit_out_range(self):
filepath = os.path.join(self.data_dir, 'valid.csv')
limit = Pipeline.ROW_LIMIT_MAX
validator = Pipeline(filepath, row_limit=(limit + 1))
self.assertEqual(validator.row_limit, limit)
self.assertEqual(validator.pipeline[0].row_limit, limit)
def test_pipeline_report_stream_none(self):
filepath = os.path.join(self.data_dir, 'valid.csv')
report_stream = None
options = {}
validator = Pipeline(filepath, processors=('schema',),
report_stream=report_stream, options=options)
result, report = validator.run()
self.assertTrue(result)
def test_from_url(self):
pipeline = Pipeline(self.data_url)
result, report = pipeline.run()
self.assertTrue(pipeline.data)
def test_pipeline_ignore_duplicate_columns_false(self):
filepath = os.path.join(self.data_dir, 'duplicate_columns.csv')
validator = Pipeline(filepath, processors=('structure',))
result, report = validator.run()
self.assertFalse(result)
def test_pipeline_error_report_when_invalid_excel_error(self):
data_source = os.path.join(self.data_dir, 'hmt', 'invalid_excel.xlsx')
validator = Pipeline(data_source, fail_fast=True, format='excel')
result, report = validator.run()
generated_report = report.generate()
report_results = generated_report['results']
self.assertFalse(result)
self.assertEqual(len(report_results), 1)
self.assertEqual(report_results[0]['result_id'], 'invalid_excel_error')
def test_messytables_source_three(self):
data = 'https://raw.githubusercontent.com/okfn/messytables/master/horror/sparse_with_column_errors.csv'
pipeline = Pipeline(data)
result, report = pipeline.run()
self.assertTrue(pipeline.data)
def test_gla_source_two(self):
data = os.path.join(self.data_dir, 'jungle', 'gla-250-report-2014-15-P07.csv')
pipeline = Pipeline(data)
result, report = pipeline.run()
self.assertFalse(result)
self.assertTrue(pipeline.data)
def check_database_content(self):
"""Check that the database content is compliant with the datapackage"""
self.run()
for resource in self.datapackage.resources:
resource_path = resource.local_data_path
if os.path.exists(resource_path):
options = {'schema': {'schema': resource.descriptor['schema']}}
pipe = pipeline.Pipeline(resource_path, processors=['schema'],
options=options)
result, report = pipe.run()
if result is False:
issues = [res['result_message'] for res in report.generate()['results']]
msg = ('The file {0} is not compliant with the schema '
'you declared for it in "datapackage.json".'
'Errors: {1}'
).format(resource_path, ';'.join(issues))
raise ValueError(msg)