How to use the pyarrow.types function in pyarrow

To help you get started, we’ve selected a few pyarrow examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github xhochy / fletcher / tests / test_pandas_extension.py View on Github external
def test_from_dtype(self, data):
        if pa.types.is_string(data.dtype.arrow_dtype):
            pytest.xfail(
                "String construction is failing as Pandas wants to pass the FletcherChunkedDtype to NumPy"
            )
        BaseConstructorsTests.test_from_dtype(self, data)
github xhochy / fletcher / tests / test_pandas_extension.py View on Github external
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
        arrow_dtype = data.dtype.arrow_dtype
        if (
            pa.types.is_integer(arrow_dtype)
            or pa.types.is_floating(arrow_dtype)
            or pa.types.is_decimal(arrow_dtype)
        ):
            pytest.skip("Numeric arrays implement reductions, so don't raise")
        else:
            BaseNoReduceTests.test_reduce_series_numeric(
                self, data, all_numeric_reductions, skipna
            )
github JDASoftwareGroup / kartothek / kartothek / serialization / _parquet.py View on Github external
elif (
        pa.types.is_integer(pa_type)
        and pd.api.types.is_integer(value)
        or pa.types.is_floating(pa_type)
        and pd.api.types.is_float(value)
        or pa.types.is_boolean(pa_type)
        and pd.api.types.is_bool(value)
        or pa.types.is_timestamp(pa_type)
        and not isinstance(value, (bytes, str))
        and (
            pd.api.types.is_datetime64_dtype(value)
            or isinstance(value, datetime.datetime)
        )
    ):
        return value
    elif pa.types.is_date(pa_type):
        if isinstance(value, str):
            return datetime.datetime.strptime(value, "%Y-%m-%d").date()
        elif isinstance(value, bytes):
            value = value.decode("utf-8")
            return datetime.datetime.strptime(value, "%Y-%m-%d").date()
        elif isinstance(value, datetime.date):
            if isinstance(value, datetime.datetime):
                raise TypeError(
                    f"Unexpected type for predicate: Column {column_name!r} is an "
                    f"Arrow date ({pa_type}), but predicate value has type {type(value)}. "
                    f"Use a Python 'datetime.date' object instead."
                )
            else:
                return value
    predicate_value_dtype = pd.Series(value).dtype
    raise TypeError(
github apache / arrow / python / pyarrow / orc.py View on Github external
if isinstance(typ, Schema) or types.is_struct(typ):
        for field in typ:
            path = (field.name,)
            yield path, next(counter)
            for sub, c in _traverse(field.type, counter):
                yield path + sub, c
    elif _is_map(typ):
        for sub_c in _traverse(typ.value_type, counter):
            yield sub_c
    elif types.is_list(typ):
        # Skip one index for list type, since this can never be selected
        # directly
        next(counter)
        for sub_c in _traverse(typ.value_type, counter):
            yield sub_c
    elif types.is_union(typ):
        # Union types not supported, just skip the indexes
        for dtype in typ:
            next(counter)
            for sub_c in _traverse(dtype, counter):
                pass
github xhochy / fletcher / fletcher / base.py View on Github external
def any(self, skipna: bool = False, **kwargs) -> Optional[bool]:
        """Compute whether any boolean value is True."""
        if pa.types.is_boolean(self.data.type):
            return any_op(self.data, skipna=skipna)
        else:
            raise TypeError("Can only execute all on boolean arrays")
github JDASoftwareGroup / kartothek / kartothek / core / common_metadata.py View on Github external
pandas type identifier, e.g. ``"list[int8]"``.
    t_np: string
        numpy type identifier, e.g. ``"object"``.
    metadata: Union[None, Dict[String, Any]]
        metadata associated with the type, e.g. information about categorials.

    Returns
    -------
    type_tuple: Tuple[pyarrow.Type, string, string, Union[None, Dict[String, Any]]]
        tuple of ``t_pa``, ``t_pd``, ``t_np``, ``metadata`` for normalized type
    """
    if pa.types.is_signed_integer(t_pa):
        return pa.int64(), "int64", "int64", None
    elif pa.types.is_unsigned_integer(t_pa):
        return pa.uint64(), "uint64", "uint64", None
    elif pa.types.is_floating(t_pa):
        return pa.float64(), "float64", "float64", None
    elif pa.types.is_list(t_pa):
        t_pa2, t_pd2, t_np2, metadata2 = normalize_type(
            t_pa.value_type, t_pd[len("list[") : -1], None, None
        )
        return pa.list_(t_pa2), "list[{}]".format(t_pd2), "object", None
    elif pa.types.is_dictionary(t_pa):
        # downcast to dictionary content, `t_pd` is useless in that case
        if ARROW_LARGER_EQ_0141:
            return normalize_type(t_pa.value_type, t_np, t_np, None)
        else:
            return normalize_type(t_pa.dictionary.type, t_np, t_np, None)
    else:
        return t_pa, t_pd, t_np, metadata
github xhochy / fletcher / fletcher / base.py View on Github external
def _is_numeric(arrow_dtype: pa.DataType) -> bool:
    return (
        pa.types.is_integer(arrow_dtype)
        or pa.types.is_floating(arrow_dtype)
        or pa.types.is_decimal(arrow_dtype)
    )
github xhochy / fletcher / fletcher / base.py View on Github external
def _is_numeric(arrow_dtype: pa.DataType) -> bool:
    return (
        pa.types.is_integer(arrow_dtype)
        or pa.types.is_floating(arrow_dtype)
        or pa.types.is_decimal(arrow_dtype)
    )
github JDASoftwareGroup / kartothek / kartothek / core / index.py View on Github external
-------
        value: Any
            normalized value, with a type that matches the index dtype

        Raises
        ------
        ValueError
            If dtype of the index was not set or derived.
        NotImplementedError
            If the dtype cannot be handled.
        """
        if dtype is None:
            raise ValueError(
                "Cannot normalize index values as long as dtype is not set"
            )
        elif pa.types.is_string(dtype):
            if isinstance(value, bytes):
                return value.decode("utf-8")
            else:
                return str(value)
        elif pa.types.is_binary(dtype):
            if isinstance(value, bytes):
                return value
            else:
                return str(value).encode("utf-8")
        elif pa.types.is_date(dtype):
            return pd.Timestamp(value).date()
        elif pa.types.is_temporal(dtype):
            return pd.Timestamp(value).to_datetime64()
        elif pa.types.is_integer(dtype):
            return int(value)
        elif pa.types.is_floating(dtype):
github JDASoftwareGroup / kartothek / kartothek / serialization / _parquet.py View on Github external
def _timelike_to_arrow_encoding(value, pa_type):
    # Date32 columns are encoded as days since 1970
    if pa.types.is_date32(pa_type):
        if isinstance(value, datetime.date):
            return value.toordinal() - EPOCH_ORDINAL
    elif pa.types.is_temporal(pa_type) and not ARROW_LARGER_EQ_0141:
        unit = pa_type.unit
        if unit == "ns":
            conversion_factor = 1
        elif unit == "us":
            conversion_factor = 10 ** 3
        elif unit == "ms":
            conversion_factor = 10 ** 6
        elif unit == "s":
            conversion_factor = 10 ** 9
        else:
            raise TypeError(
                "Unkwnown timestamp resolution encoudtered `{}`".format(unit)
            )