How to use the goodtables.validate function in goodtables

To help you get started, we’ve selected a few goodtables examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github frictionlessdata / goodtables-py / tests / test_validate.py View on Github external
def test_validate_warnings_table_limit():
    source = 'data/datapackages/invalid/datapackage.json'
    report = validate(source, preset='datapackage', table_limit=1)
    assert len(report['warnings']) == 1
    assert 'table(s) limit' in report['warnings'][0]
github frictionlessdata / goodtables-py / tests / presets / test_table.py View on Github external
def test_validate_table_invalid_row_limit(log):
    report = validate('data/invalid.csv', row_limit=2, infer_schema=True)
    assert log(report) == [
        (1, None, 3, 'blank-header'),
        (1, None, 4, 'duplicate-header'),
        (1, 2, 3, 'missing-value'),
        (1, 2, 4, 'missing-value'),
    ]
github frictionlessdata / goodtables-py / tests / test_validate.py View on Github external
def test_validate_infer_datapackage_dict(log):
    with open('data/datapackages/invalid/datapackage.json') as file:
        report = validate(json.load(file))
        assert report['error-count'] == 2
github frictionlessdata / goodtables-py / tests / checks / test_maximum_constraint.py View on Github external
def test_check_maximum_constraint(log):
    source = [
        ['row', 'score'],
        [2, 1],
        [3, 2],
        [4, 3],
        [5, 4],
        [6],
    ]
    schema = {'fields': [
        {'name': 'row', 'type': 'integer'},
        {'name': 'score', 'type': 'integer', 'constraints': {'maximum': 2}}
    ]}
    report = validate(source, schema=schema, checks=[
        'maximum-constraint',
    ])
    assert log(report) == [
        (1, 4, 2, 'maximum-constraint'),
        (1, 5, 2, 'maximum-constraint'),
    ]
github frictionlessdata / goodtables-py / tests / contrib / checks / test_deviated_value.py View on Github external
def test_check_deviated_value_not_enough_data(log):
    source = [
        ['temperature'],
        [1],
    ]
    report = validate(source, checks=[
        {'deviated-value': {'column': 'temperature'}},
    ])
    assert log(report) == []
github frictionlessdata / goodtables-py / tests / presets / test_datapackage.py View on Github external
def test_check_file_integrity(log):
    source = deepcopy(DESCRIPTOR)
    report = validate(source)
    assert log(report) == []
github frictionlessdata / goodtables-py / tests / test_validate.py View on Github external
'data':  [
                    ['id1', 'id2'],
                    ['a', '1'],
                    ['a', '1'],
                ],
                'schema': {
                    'fields': [
                        {'name': 'id1'},
                        {'name': 'id2'},
                    ],
                    'primaryKey': ['id1', 'id2']
                }
            }
        ],
    }
    report = validate(descriptor, skip_checks=['duplicate-row'])
    assert log(report) == [
        (1, 3, 1, 'unique-constraint'),
    ]
github catalyst-cooperative / pudl / results / datapkg-msha / pudl_msha_pkg.py View on Github external
microsecond=0).isoformat() + 'Z'
    # Have to set this to 'data-package' rather than 'tabular-data-package'
    # due to a DataHub.io bug
    pkg.descriptor['profile'] = 'data-package'
    pkg.commit()

    # save the datapackage
    print("Validating pudl-msha data package")
    if not pkg.valid:
        print("PUDL MSHA DATA PACKAGE IS NOT VALID.")
        return 1
    pkg.save(os.path.join(output_dir, 'datapackage.json'))

    # Validate some of the data...
    print("Validating pudl-msha data")
    report = goodtables.validate(os.path.join(
        output_dir, 'datapackage.json'), row_limit=args.row_limit)
    if not report['valid']:
        print("PUDL MSHA DATA TABLES FAILED TO VALIDATE")
        pprint(report)
        return 1

    shutil.copyfile(os.path.join(input_dir, "README.md"),
                    os.path.join(output_dir, "README.md"))
    shutil.copyfile(os.path.join(input_dir, sys.argv[0]),
                    os.path.join(output_dir, "scripts", sys.argv[0]))
    return 0
github ezwelty / goodtables-pandas-py / goodtables_pandas / validate.py View on Github external
def validate(source, return_tables=False):
    # Start clock
    start = datetime.datetime.now()
    # Initialize report
    checks = [
        'blank-header',
        'duplicate-header',
        'non-matching-header',
        'extra-header',
        'missing-header'
    ]
    report = goodtables.validate(
        source=source, preset='datapackage', checks=checks,
        table_limit=math.inf, row_limit=0)
    # Remove row_limit warnings
    report['warnings'] = [w for w in report['warnings']
        if re.match('row\(s\) limit', w)]
    # Retrieve descriptor
    package = datapackage.Package(source).descriptor
    resources = package.get('resources', [])
    names = [resource['name'] for resource in resources]
    # Expand descriptor
    for resource, name in zip(resources, names):
        schema = resource['schema']
        if 'primaryKey' in schema:
            schema['primaryKey'] = _as_list(schema['primaryKey'])
        if 'uniqueKeys' in schema:
            schema['uniqueKeys'] = [_as_list(k) for k in schema['uniqueKeys']]
github catalyst-cooperative / pudl / src / pudl / output / export.py View on Github external
}

    # Use that descriptor to instantiate a Package object
    data_pkg = datapackage.Package(pkg_descriptor)

    # Validate the data package descriptor before we go to
    if not data_pkg.valid:
        logger.warning(f"""
            Invalid tabular data package: {data_pkg.descriptor["name"]}
            Errors: {data_pkg.errors}""")

    data_pkg.save(pkg_json)

    if not dry_run:
        # Validate the data within the package using goodtables:
        report = goodtables.validate(pkg_json, row_limit=100_000)
        if not report['valid']:
            logger.warning("Data package data validation failed.")

    return data_pkg