Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def dtype_from_column(col):
import pandas as pd
col_dtype = col.dtype
# TODO add more basic types here
if col_dtype in (np.int32, np.int64, np.uint32, np.uint64, np.float, np.double,
np.uint8):
return ColumnInfo(col_dtype)
# TODO this seems kind of brittle
if col_dtype.base == np.dtype('M8[ns]'):
if col_dtype == np.dtype('datetime64[ns]'):
return ColumnInfo(col_dtype)
elif hasattr(col_dtype, 'tz'):
raise ValueError("datetime with tz not yet supported")
else:
raise ValueError("unsupported datetime subtype ({})".format(type(col_dtype)))
# Pandas 1.0 has StringDtype extension type
if col_dtype.name == 'string':
return ColumnInfo(unicode_dtype)
if col_dtype == 'bool':
return ColumnInfo(np.uint8, repr=np.dtype('bool'))
if col_dtype in (np.int32, np.int64, np.uint32, np.uint64, np.float, np.double,
np.uint8):
return ColumnInfo(col_dtype)
# TODO this seems kind of brittle
if col_dtype.base == np.dtype('M8[ns]'):
if col_dtype == np.dtype('datetime64[ns]'):
return ColumnInfo(col_dtype)
elif hasattr(col_dtype, 'tz'):
raise ValueError("datetime with tz not yet supported")
else:
raise ValueError("unsupported datetime subtype ({})".format(type(col_dtype)))
# Pandas 1.0 has StringDtype extension type
if col_dtype.name == 'string':
return ColumnInfo(unicode_dtype)
if col_dtype == 'bool':
return ColumnInfo(np.uint8, repr=np.dtype('bool'))
if col_dtype == np.dtype("O"):
# Note: this does a full scan of the column... not sure what else to do here
# because Pandas allows mixed string column types (and actually has
# problems w/ allowing non-string types in object columns)
inferred_dtype = pd.api.types.infer_dtype(col)
if inferred_dtype == 'bytes':
return ColumnInfo(np.bytes_)
elif inferred_dtype == 'string':
# TODO we need to make sure this is actually convertible
return ColumnInfo(unicode_dtype)
# TODO this seems kind of brittle
if col_dtype.base == np.dtype('M8[ns]'):
if col_dtype == np.dtype('datetime64[ns]'):
return ColumnInfo(col_dtype)
elif hasattr(col_dtype, 'tz'):
raise ValueError("datetime with tz not yet supported")
else:
raise ValueError("unsupported datetime subtype ({})".format(type(col_dtype)))
# Pandas 1.0 has StringDtype extension type
if col_dtype.name == 'string':
return ColumnInfo(unicode_dtype)
if col_dtype == 'bool':
return ColumnInfo(np.uint8, repr=np.dtype('bool'))
if col_dtype == np.dtype("O"):
# Note: this does a full scan of the column... not sure what else to do here
# because Pandas allows mixed string column types (and actually has
# problems w/ allowing non-string types in object columns)
inferred_dtype = pd.api.types.infer_dtype(col)
if inferred_dtype == 'bytes':
return ColumnInfo(np.bytes_)
elif inferred_dtype == 'string':
# TODO we need to make sure this is actually convertible
return ColumnInfo(unicode_dtype)
elif inferred_dtype == 'mixed':
raise ValueError(
def dtype_from_column(col):
import pandas as pd
col_dtype = col.dtype
# TODO add more basic types here
if col_dtype in (np.int32, np.int64, np.uint32, np.uint64, np.float, np.double,
np.uint8):
return ColumnInfo(col_dtype)
# TODO this seems kind of brittle
if col_dtype.base == np.dtype('M8[ns]'):
if col_dtype == np.dtype('datetime64[ns]'):
return ColumnInfo(col_dtype)
elif hasattr(col_dtype, 'tz'):
raise ValueError("datetime with tz not yet supported")
else:
raise ValueError("unsupported datetime subtype ({})".format(type(col_dtype)))
# Pandas 1.0 has StringDtype extension type
if col_dtype.name == 'string':
return ColumnInfo(unicode_dtype)
if col_dtype == 'bool':
return ColumnInfo(np.uint8, repr=np.dtype('bool'))
if col_dtype == np.dtype("O"):
# Note: this does a full scan of the column... not sure what else to do here
# because Pandas allows mixed string column types (and actually has
# problems w/ allowing non-string types in object columns)
# Pandas 1.0 has StringDtype extension type
if col_dtype.name == 'string':
return ColumnInfo(unicode_dtype)
if col_dtype == 'bool':
return ColumnInfo(np.uint8, repr=np.dtype('bool'))
if col_dtype == np.dtype("O"):
# Note: this does a full scan of the column... not sure what else to do here
# because Pandas allows mixed string column types (and actually has
# problems w/ allowing non-string types in object columns)
inferred_dtype = pd.api.types.infer_dtype(col)
if inferred_dtype == 'bytes':
return ColumnInfo(np.bytes_)
elif inferred_dtype == 'string':
# TODO we need to make sure this is actually convertible
return ColumnInfo(unicode_dtype)
elif inferred_dtype == 'mixed':
raise ValueError(
"Column '{}' has mixed value dtype and cannot yet be stored as a TileDB attribute"
)
raise ValueError(
"Unhandled column type: '{}'".format(
col_dtype
)