Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
* If ``tables`` are provided or they have been loaded, check
all the data types for the table correspond to each column and
all the data types exists on the table.
* Validate that there is no circular relatioship in the metadata.
* Check that all the tables have at most one parent.
Args:
tables (bool, dict):
If a dict of table is passed, validate that the columns and
dtypes match the metadata. If ``True`` is passed, load the
tables from the Metadata instead. If ``None``, omit the data
validation. Defaults to ``None``.
"""
tables_meta = self._metadata.get('tables')
if not tables_meta:
raise MetadataError('"tables" entry not found in Metadata.')
if tables and not isinstance(tables, dict):
tables = self.load_tables()
for table_name, table_meta in tables_meta.items():
if tables:
table = tables.get(table_name)
if table is None:
raise MetadataError('Table `{}` not found in tables'.format(table_name))
else:
table = None
self._validate_table(table_name, table_meta, table)
self._validate_circular_relationships(table_name)
pk_field = table_meta['fields'].get(primary_key)
if not pk_field:
raise MetadataError('Primary key is not an existing field.')
if pk_field['type'] != 'id':
raise MetadataError('Primary key is not of type `id`.')
if table_data is not None:
for column in table_data:
try:
dtype = dtypes.pop(column)
table_data[column].dropna().astype(dtype)
except KeyError:
message = 'Unexpected column in table `{}`: `{}`'.format(table_name, column)
raise MetadataError(message) from None
except ValueError as ve:
message = 'Invalid values found in column `{}` of table `{}`: `{}`'.format(
column, table_name, ve)
raise MetadataError(message) from None
# assert all dtypes are in data
if dtypes:
raise MetadataError(
'Missing columns on table {}: {}.'.format(table_name, list(dtypes.keys()))
)
MetadataError:
If there is any error in the metadata or the data does not
match the metadata description.
"""
dtypes = self.get_dtypes(table_name, ids=True)
# Primary key field exists and its type is 'id'
primary_key = table_meta.get('primary_key')
if primary_key:
pk_field = table_meta['fields'].get(primary_key)
if not pk_field:
raise MetadataError('Primary key is not an existing field.')
if pk_field['type'] != 'id':
raise MetadataError('Primary key is not of type `id`.')
if table_data is not None:
for column in table_data:
try:
dtype = dtypes.pop(column)
table_data[column].dropna().astype(dtype)
except KeyError:
message = 'Unexpected column in table `{}`: `{}`'.format(table_name, column)
raise MetadataError(message) from None
except ValueError as ve:
message = 'Invalid values found in column `{}` of table `{}`: `{}`'.format(
column, table_name, ve)
raise MetadataError(message) from None
# assert all dtypes are in data
if dtypes:
on the metadata.
Raises:
MetadataError:
If there is any error in the metadata or the data does not
match the metadata description.
"""
dtypes = self.get_dtypes(table_name, ids=True)
# Primary key field exists and its type is 'id'
primary_key = table_meta.get('primary_key')
if primary_key:
pk_field = table_meta['fields'].get(primary_key)
if not pk_field:
raise MetadataError('Primary key is not an existing field.')
if pk_field['type'] != 'id':
raise MetadataError('Primary key is not of type `id`.')
if table_data is not None:
for column in table_data:
try:
dtype = dtypes.pop(column)
table_data[column].dropna().astype(dtype)
except KeyError:
message = 'Unexpected column in table `{}`: `{}`'.format(table_name, column)
raise MetadataError(message) from None
except ValueError as ve:
message = 'Invalid values found in column `{}` of table `{}`: `{}`'.format(
column, table_name, ve)
raise MetadataError(message) from None
def _validate_circular_relationships(self, parent, children=None):
"""Validate that there is no circular relatioship in the metadata."""
if children is None:
children = self.get_children(parent)
if parent in children:
raise MetadataError('Circular relationship found for table "{}"'.format(parent))
for child in children:
self._validate_circular_relationships(parent, self.get_children(child))
field_subtype = field.get('subtype')
dtype = self._DTYPES.get((field_type, field_subtype))
if not dtype:
raise MetadataError(
'Invalid type and subtype combination for field {}: ({}, {})'.format(
name, field_type, field_subtype)
)
if ids and field_type == 'id':
if (name != table_meta.get('primary_key')) and not field.get('ref'):
for child_table in self.get_children(table_name):
if name == self.get_foreign_key(table_name, child_table):
break
else:
raise MetadataError(
'id field `{}` is neither a primary or a foreign key'.format(name))
if ids or (field_type != 'id'):
dtypes[name] = dtype
return dtypes