Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if dtype == "int32":
return "INTEGER"
elif dtype == "int64":
return "BIGINT"
elif dtype == "float32":
return "FLOAT4"
elif dtype == "float64":
return "FLOAT8"
elif dtype == "bool":
return "BOOLEAN"
elif dtype == "object" and isinstance(dtype, str):
return "VARCHAR(256)"
elif dtype[:10] == "datetime64":
return "TIMESTAMP"
else:
raise UnsupportedType("Unsupported Pandas type: " + dtype)
elif dtype == "float":
return "FLOAT4"
elif dtype == "double":
return "FLOAT8"
elif dtype in ("bool", "boolean"):
return "BOOLEAN"
elif dtype == "timestamp":
return "TIMESTAMP"
elif dtype == "date":
return "DATE"
elif dtype == "string":
return "VARCHAR(256)"
elif dtype.startswith("decimal"):
return dtype.replace(" ", "").upper()
else:
raise UnsupportedType("Unsupported Spark type: " + dtype)
if dtype == "int32":
return "int"
elif dtype == "int64":
return "bigint"
elif dtype == "float32":
return "float"
elif dtype == "float64":
return "double"
elif dtype == "bool":
return "boolean"
elif dtype == "object" and isinstance(dtype, str):
return "string"
elif dtype[:10] == "datetime64":
return "string"
else:
raise UnsupportedType("Unsupported Pandas type: " + dtype)
elif dtype in ("float", "double", "real"):
return "float64"
elif dtype == "boolean":
return "bool"
elif dtype in ("string", "char", "varchar"):
return "str"
elif dtype in ("timestamp", "timestamp with time zone"):
return "datetime64"
elif dtype == "date":
return "date"
elif dtype == "array":
return "list"
elif dtype == "decimal":
return "decimal"
else:
raise UnsupportedType(f"Unsupported Athena type: {dtype}")
partition_cols_types = {}
for name, dtype in pyarrow_schema:
if (cast_columns is not None) and (name in cast_columns.keys()):
if name in partition_cols:
partition_cols_types[name] = cast_columns[name]
else:
schema_built.append((name, cast_columns[name]))
else:
try:
athena_type = data_types.pyarrow2athena(dtype)
except UndetectedType:
raise UndetectedType(f"We can't infer the data type from an entire null object column ({name}). "
f"Please consider pass the type of this column explicitly using the cast "
f"columns argument")
except UnsupportedType:
raise UnsupportedType(f"Unsupported Pyarrow type for column {name}: {dtype}")
if name in partition_cols:
partition_cols_types[name] = athena_type
else:
schema_built.append((name, athena_type))
partition_cols_schema_built = [(name, partition_cols_types[name]) for name in partition_cols]
logger.debug(f"schema_built:\n{schema_built}")
logger.debug(f"partition_cols_schema_built:\n{partition_cols_schema_built}")
return schema_built, partition_cols_schema_built
elif dtype_str in ("BIGINT", "INT8"):
return "bigint"
elif dtype_str in ("REAL", "FLOAT4"):
return "float"
elif dtype_str in ("DOUBLE PRECISION", "FLOAT8", "FLOAT"):
return "double"
elif dtype_str in ("BOOLEAN", "BOOL"):
return "boolean"
elif dtype_str in ("VARCHAR", "CHARACTER VARYING", "NVARCHAR", "TEXT"):
return "string"
elif dtype_str == "DATE":
return "date"
elif dtype_str == "TIMESTAMP":
return "timestamp"
else:
raise UnsupportedType(f"Unsupported Redshift type: {dtype_str}")
elif dtype == "bigint":
return "int64"
elif dtype == "float":
return "float32"
elif dtype == "double":
return "float64"
elif dtype in ("boolean", "bool"):
return "bool"
elif dtype in ("string", "char", "varchar", "array", "row", "map"):
return "string"
elif dtype == "timestamp":
return "timestamp[ns]"
elif dtype == "date":
return "date32"
else:
raise UnsupportedType(f"Unsupported Athena type: {dtype}")
if dtype == "int32":
return "int"
elif dtype in ("int64", "Int64"):
return "bigint"
elif dtype == "float32":
return "float"
elif dtype == "float64":
return "double"
elif dtype == "bool":
return "boolean"
elif dtype == "object":
return "string"
elif dtype.startswith("datetime64"):
return "timestamp"
else:
raise UnsupportedType(f"Unsupported Pandas type: {dtype}")