Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _pf_validation(pf, columns, index, categories, filters):
"""Validate user options against metadata in dataset
columns, index and categories must be in the list of columns available
(both data columns and path-based partitioning - subject to possible
renaming, if pandas metadata is present). The output index will
be inferred from any available pandas metadata, if not given.
"""
from fastparquet.util import check_column_names
check_column_names(pf.columns, categories)
check_column_names(pf.columns + list(pf.cats or []), columns)
if isinstance(columns, tuple):
# ensure they tokenize the same
columns = list(columns)
if pf.fmd.key_value_metadata:
pandas_md = [x.value for x in pf.fmd.key_value_metadata if x.key == "pandas"]
else:
pandas_md = []
if len(pandas_md) == 0:
# Fall back to the storage information
index_names = pf._get_index()
if not isinstance(index_names, list):
index_names = [index_names]
column_names = pf.columns + list(pf.cats)
storage_name_mapping = {k: k for k in column_names}
def _pf_validation(pf, columns, index, categories, filters):
"""Validate user options against metadata in dataset
columns, index and categories must be in the list of columns available
(both data columns and path-based partitioning - subject to possible
renaming, if pandas metadata is present). The output index will
be inferred from any available pandas metadata, if not given.
"""
from fastparquet.util import check_column_names
check_column_names(pf.columns, categories)
check_column_names(pf.columns + list(pf.cats or []), columns)
if isinstance(columns, tuple):
# ensure they tokenize the same
columns = list(columns)
if pf.fmd.key_value_metadata:
pandas_md = [x.value for x in pf.fmd.key_value_metadata if x.key == "pandas"]
else:
pandas_md = []
if len(pandas_md) == 0:
# Fall back to the storage information
index_names = pf._get_index()
if not isinstance(index_names, list):
index_names = [index_names]
column_names = pf.columns + list(pf.cats)