Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _paths_to_cats(paths, scheme):
"""Extract out fields and labels from directory names"""
# can be factored out in fastparquet
from fastparquet.util import ex_from_sep, val_to_num, groupby_types
cats = OrderedDict()
raw_cats = OrderedDict()
for path in paths:
s = ex_from_sep("/")
if scheme == "hive":
partitions = s.findall(path)
for key, val in partitions:
cats.setdefault(key, set()).add(val_to_num(val))
raw_cats.setdefault(key, set()).add(val)
else:
for i, val in enumerate(path.split("/")[:-1]):
key = "dir%i" % i
cats.setdefault(key, set()).add(val_to_num(val))
raw_cats.setdefault(key, set()).add(val)
for key, v in cats.items():
# Check that no partition names map to the same value after
# transformation by val_to_num
raw = raw_cats[key]
if len(v) != len(raw):
def _paths_to_cats(paths, scheme):
"""Extract out fields and labels from directory names"""
# can be factored out in fastparquet
cats = OrderedDict()
raw_cats = OrderedDict()
for path in paths:
s = ex_from_sep("/")
if scheme == "hive":
partitions = s.findall(path)
for (key, val) in partitions:
cats.setdefault(key, set()).add(val_to_num(val))
raw_cats.setdefault(key, set()).add(val)
else:
for (i, val) in enumerate(path.split("/")[:-1]):
key = "dir%i" % i
cats.setdefault(key, set()).add(val_to_num(val))
raw_cats.setdefault(key, set()).add(val)
for (key, v) in cats.items():
# Check that no partition names map to the same value after
# transformation by val_to_num
raw = raw_cats[key]
if len(v) != len(raw):