Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_inspector_warnings_table_and_error_limit():
inspector = Inspector(table_limit=1, error_limit=1)
source = 'data/datapackages/invalid/datapackage.json'
report = inspector.inspect(source, preset='datapackage')
assert len(report['warnings']) == 2
assert 'table(s) limit' in report['warnings'][0]
assert 'error(s) limit' in report['warnings'][1]
def test_inspector_datapackage_valid(log, dp_path):
inspector = Inspector()
report = inspector.inspect(dp_path)
assert log(report) == []
def test_inspector_catch_all_open_exceptions(log):
inspector = Inspector()
report = inspector.inspect('data/latin1.csv', encoding='utf-8')
assert log(report) == [
(1, None, None, 'source-error'),
]
def test_nested_presets_set_default_preset():
inspector = Inspector(infer_schema=True)
report = inspector.inspect([
{'source': 'data/datapackages/valid/datapackage.json'},
], preset='nested')
assert report['valid']
assert report['warnings'] == []
def test_inspector_table_invalid(log):
inspector = Inspector(infer_schema=True)
report = inspector.inspect('data/invalid.csv')
assert log(report) == [
(1, None, 3, 'blank-header'),
(1, None, 4, 'duplicate-header'),
(1, 2, 3, 'missing-value'),
(1, 2, 4, 'missing-value'),
(1, 3, None, 'duplicate-row'),
(1, 4, None, 'blank-row'),
(1, 5, 5, 'extra-value'),
]
def test_inspector_no_headers():
inspector = Inspector()
report = inspector.inspect('data/invalid_no_headers.csv', headers=None)
assert report['tables'][0]['row-count'] == 3
assert report['tables'][0]['error-count'] == 1
assert report['tables'][0]['errors'][0]['code'] == 'extra-value'
tables = []
for resource in datapackage.resources:
is_tabular = resource.descriptor.get('format', None) == 'csv' \
or resource.descriptor.get('mediatype', None) == 'text/csv' \
or resource.local_data_path.endswith('csv')
if is_tabular:
path = resource.remote_data_path or resource.local_data_path
tables.append({
'source': path,
'stream': Stream(path, headers=1),
'schema': Schema(resource.descriptor['schema']),
'extra': {}
})
inspector = Inspector()
reports = []
errors = []
for table in tables:
report = inspector._Inspector__inspect_table(table)
errors.extend(report['errors'])
reports.append(report)
# Stop timer
stop = datetime.datetime.now()
errors = errors[:1000]
report = {
'time': round((stop - start).total_seconds(), 3),
'valid': True if len(reports) == 0 else all(report['valid'] for report in reports),
'table-count': len(tables),
'error-count': sum(len(report['errors']) for report in reports),
for package in data['result']['results']:
for resource in package['resources']:
if resource['url'].endswith('.csv'):
tables.append({
'source': resource['url'],
'stream': Stream(resource['url'], headers=1),
'schema': None,
'extra': {
'dataset': package['title'],
'resource': resource['name'],
'publisher': package['organization']['name']
},
})
return warnings, tables
inspector = Inspector(custom_presets=[ckan_preset])
report = inspector.inspect('http://data.surrey.ca', preset='ckan')
pprint(report)
from pprint import pprint
from goodtables import Inspector
inspector = Inspector()
report = inspector.inspect([
{'source': 'data/datapackages/valid/datapackage.json'},
{'source': 'data/datapackages/invalid/datapackage.json'},
], preset='datapackages')
pprint(report)