Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_from_dtype(self, data):
if pa.types.is_string(data.dtype.arrow_dtype):
pytest.xfail(
"String construction is failing as Pandas wants to pass the FletcherChunkedDtype to NumPy"
)
BaseConstructorsTests.test_from_dtype(self, data)
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
arrow_dtype = data.dtype.arrow_dtype
if (
pa.types.is_integer(arrow_dtype)
or pa.types.is_floating(arrow_dtype)
or pa.types.is_decimal(arrow_dtype)
):
pytest.skip("Numeric arrays implement reductions, so don't raise")
else:
BaseNoReduceTests.test_reduce_series_numeric(
self, data, all_numeric_reductions, skipna
)
elif (
pa.types.is_integer(pa_type)
and pd.api.types.is_integer(value)
or pa.types.is_floating(pa_type)
and pd.api.types.is_float(value)
or pa.types.is_boolean(pa_type)
and pd.api.types.is_bool(value)
or pa.types.is_timestamp(pa_type)
and not isinstance(value, (bytes, str))
and (
pd.api.types.is_datetime64_dtype(value)
or isinstance(value, datetime.datetime)
)
):
return value
elif pa.types.is_date(pa_type):
if isinstance(value, str):
return datetime.datetime.strptime(value, "%Y-%m-%d").date()
elif isinstance(value, bytes):
value = value.decode("utf-8")
return datetime.datetime.strptime(value, "%Y-%m-%d").date()
elif isinstance(value, datetime.date):
if isinstance(value, datetime.datetime):
raise TypeError(
f"Unexpected type for predicate: Column {column_name!r} is an "
f"Arrow date ({pa_type}), but predicate value has type {type(value)}. "
f"Use a Python 'datetime.date' object instead."
)
else:
return value
predicate_value_dtype = pd.Series(value).dtype
raise TypeError(
if isinstance(typ, Schema) or types.is_struct(typ):
for field in typ:
path = (field.name,)
yield path, next(counter)
for sub, c in _traverse(field.type, counter):
yield path + sub, c
elif _is_map(typ):
for sub_c in _traverse(typ.value_type, counter):
yield sub_c
elif types.is_list(typ):
# Skip one index for list type, since this can never be selected
# directly
next(counter)
for sub_c in _traverse(typ.value_type, counter):
yield sub_c
elif types.is_union(typ):
# Union types not supported, just skip the indexes
for dtype in typ:
next(counter)
for sub_c in _traverse(dtype, counter):
pass
def any(self, skipna: bool = False, **kwargs) -> Optional[bool]:
"""Compute whether any boolean value is True."""
if pa.types.is_boolean(self.data.type):
return any_op(self.data, skipna=skipna)
else:
raise TypeError("Can only execute all on boolean arrays")
pandas type identifier, e.g. ``"list[int8]"``.
t_np: string
numpy type identifier, e.g. ``"object"``.
metadata: Union[None, Dict[String, Any]]
metadata associated with the type, e.g. information about categorials.
Returns
-------
type_tuple: Tuple[pyarrow.Type, string, string, Union[None, Dict[String, Any]]]
tuple of ``t_pa``, ``t_pd``, ``t_np``, ``metadata`` for normalized type
"""
if pa.types.is_signed_integer(t_pa):
return pa.int64(), "int64", "int64", None
elif pa.types.is_unsigned_integer(t_pa):
return pa.uint64(), "uint64", "uint64", None
elif pa.types.is_floating(t_pa):
return pa.float64(), "float64", "float64", None
elif pa.types.is_list(t_pa):
t_pa2, t_pd2, t_np2, metadata2 = normalize_type(
t_pa.value_type, t_pd[len("list[") : -1], None, None
)
return pa.list_(t_pa2), "list[{}]".format(t_pd2), "object", None
elif pa.types.is_dictionary(t_pa):
# downcast to dictionary content, `t_pd` is useless in that case
if ARROW_LARGER_EQ_0141:
return normalize_type(t_pa.value_type, t_np, t_np, None)
else:
return normalize_type(t_pa.dictionary.type, t_np, t_np, None)
else:
return t_pa, t_pd, t_np, metadata
def _is_numeric(arrow_dtype: pa.DataType) -> bool:
return (
pa.types.is_integer(arrow_dtype)
or pa.types.is_floating(arrow_dtype)
or pa.types.is_decimal(arrow_dtype)
)
def _is_numeric(arrow_dtype: pa.DataType) -> bool:
return (
pa.types.is_integer(arrow_dtype)
or pa.types.is_floating(arrow_dtype)
or pa.types.is_decimal(arrow_dtype)
)
-------
value: Any
normalized value, with a type that matches the index dtype
Raises
------
ValueError
If dtype of the index was not set or derived.
NotImplementedError
If the dtype cannot be handled.
"""
if dtype is None:
raise ValueError(
"Cannot normalize index values as long as dtype is not set"
)
elif pa.types.is_string(dtype):
if isinstance(value, bytes):
return value.decode("utf-8")
else:
return str(value)
elif pa.types.is_binary(dtype):
if isinstance(value, bytes):
return value
else:
return str(value).encode("utf-8")
elif pa.types.is_date(dtype):
return pd.Timestamp(value).date()
elif pa.types.is_temporal(dtype):
return pd.Timestamp(value).to_datetime64()
elif pa.types.is_integer(dtype):
return int(value)
elif pa.types.is_floating(dtype):
def _timelike_to_arrow_encoding(value, pa_type):
# Date32 columns are encoded as days since 1970
if pa.types.is_date32(pa_type):
if isinstance(value, datetime.date):
return value.toordinal() - EPOCH_ORDINAL
elif pa.types.is_temporal(pa_type) and not ARROW_LARGER_EQ_0141:
unit = pa_type.unit
if unit == "ns":
conversion_factor = 1
elif unit == "us":
conversion_factor = 10 ** 3
elif unit == "ms":
conversion_factor = 10 ** 6
elif unit == "s":
conversion_factor = 10 ** 9
else:
raise TypeError(
"Unkwnown timestamp resolution encoudtered `{}`".format(unit)
)