How to use the pyarrow.int32 function in pyarrow

To help you get started, we’ve selected a few pyarrow examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github snowflakedb / snowflake-connector-python / test / pandas / test_unit_arrow_chunk_iterator.py View on Github external
return datetime.time(hour, minute, second, milisec)

    def expected_data_transform_int32(data):
        milisec = data % (10**4)
        milisec *= 10**2
        data //= 10**4
        second = data % 60
        data //= 60
        minute = data % 60
        hour = data // 60
        return datetime.time(hour, minute, second, milisec)

    iterate_over_test_chunk([pyarrow.int64(), pyarrow.int64()],
                            column_meta_int64, time_generator_int64, expected_data_transform_int64)

    iterate_over_test_chunk([pyarrow.int32(), pyarrow.int32()],
                            column_meta_int32, time_generator_int32, expected_data_transform_int32)
github hellonarrativ / spectrify / tests / test_parquet.py View on Github external
'unit_test_table',
            self.sa_meta,
            sa.Column('real_col', sa.REAL),
            sa.Column('bigint_col', sa.BIGINT),
            sa.Column('int_col', sa.INTEGER),
            sa.Column('smallint_col', sa.SMALLINT),
            sa.Column('bool_col', sa.BOOLEAN),
            sa.Column('str_col', sa.VARCHAR),
            sa.Column('timestamp_col', sa.TIMESTAMP),
            sa.Column('plaintext_col', sa.TEXT),
        )

        self.expected_datatypes = [
            pa.float32(),
            pa.int64(),
            pa.int32(),
            pa.int16(),
            pa.bool_(),
            pa.string(),
            pa.timestamp('ns'),
            pa.string(),
        ]
github tensorflow / io / tests / test_arrow_eager.py View on Github external
def get_arrow_type(self, dt, is_list):
    """get_arrow_type"""
    if dt == dtypes.bool:
      arrow_type = pa.bool_()
    elif dt == dtypes.int8:
      arrow_type = pa.int8()
    elif dt == dtypes.int16:
      arrow_type = pa.int16()
    elif dt == dtypes.int32:
      arrow_type = pa.int32()
    elif dt == dtypes.int64:
      arrow_type = pa.int64()
    elif dt == dtypes.uint8:
      arrow_type = pa.uint8()
    elif dt == dtypes.uint16:
      arrow_type = pa.uint16()
    elif dt == dtypes.uint32:
      arrow_type = pa.uint32()
    elif dt == dtypes.uint64:
      arrow_type = pa.uint64()
    elif dt == dtypes.float16:
      arrow_type = pa.float16()
    elif dt == dtypes.float32:
      arrow_type = pa.float32()
    elif dt == dtypes.float64:
      arrow_type = pa.float64()
github omnisci / pymapd / tests / test_integration.py View on Github external
varchar_ VARCHAR(40),
            text_ TEXT,
            time_ TIME,
            timestamp_ TIMESTAMP,
            date_ DATE
        );''')
        # skipping decimal for now
        c.execute(create)

        names = ['boolean_', 'smallint_', 'int_', 'bigint_',
                 'float_', 'double_', 'varchar_', 'text_',
                 'time_', 'timestamp_', 'date_']

        columns = [pa.array([True, False, None], type=pa.bool_()),
                   pa.array([1, 0, None]).cast(pa.int16()),
                   pa.array([1, 0, None]).cast(pa.int32()),
                   pa.array([1, 0, None]),
                   pa.array([1.0, 1.1, None]).cast(pa.float32()),
                   pa.array([1.0, 1.1, None]),
                   # no fixed-width string
                   pa.array(['a', 'b', None]),
                   pa.array(['a', 'b', None]),
                   (pa.array([1, 2, None]).cast(pa.int32())
                    .cast(pa.time32('s'))),
                   pa.array([datetime.datetime(2016, 1, 1, 12, 12, 12),
                             datetime.datetime(2017, 1, 1), None]),
                   pa.array([datetime.date(2016, 1, 1),
                             datetime.date(2017, 1, 1), None])]
        table = pa.Table.from_arrays(columns, names=names)
        con.load_table_arrow("all_types", table)
        c.execute('drop table if exists all_types;')
github andrewgross / json2parquet / tests / test_client.py View on Github external
def test_convert_json():
    """
    Test converting a JSON file to Parquet
    """
    schema = pa.schema([
        pa.field("foo", pa.int32()),
        pa.field("bar", pa.int64())
    ])

    input_path = "{}/tests/fixtures/simple_json.txt".format(os.getcwd())
    expected_file = "{}/tests/fixtures/simple.parquet".format(os.getcwd())
    with tempfile.NamedTemporaryFile() as f:
        output_file = f.name
        client.convert_json(input_path, output_file, schema)
        output = pq.ParquetFile(output_file)
        expected = pq.ParquetFile(expected_file)
        assert output.metadata.num_columns == expected.metadata.num_columns
        assert output.metadata.num_rows == expected.metadata.num_rows
        assert output.schema.equals(expected.schema)
        assert output.read_row_group(0).to_pydict() == expected.read_row_group(0).to_pydict()
github allwefantasy / pyjava / python / pyjava / datatype / types.py View on Github external
def to_arrow_type(dt):
    """ Convert Spark data type to pyarrow type
    """
    import pyarrow as pa
    if type(dt) == BooleanType:
        arrow_type = pa.bool_()
    elif type(dt) == ByteType:
        arrow_type = pa.int8()
    elif type(dt) == ShortType:
        arrow_type = pa.int16()
    elif type(dt) == IntegerType:
        arrow_type = pa.int32()
    elif type(dt) == LongType:
        arrow_type = pa.int64()
    elif type(dt) == FloatType:
        arrow_type = pa.float32()
    elif type(dt) == DoubleType:
        arrow_type = pa.float64()
    elif type(dt) == DecimalType:
        arrow_type = pa.decimal128(dt.precision, dt.scale)
    elif type(dt) == StringType:
        arrow_type = pa.string()
    elif type(dt) == BinaryType:
        arrow_type = pa.binary()
    elif type(dt) == DateType:
        arrow_type = pa.date32()
    elif type(dt) == TimestampType:
        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
github xhochy / fletcher / fletcher / base.py View on Github external
pa.string().id: str,
    # Use any list type here, only LIST is important
    pa.list_(pa.string()).id: list,
    pa.duration("ns").id: datetime.timedelta,
}

_string_type_map = {"date64[ms]": pa.date64(), "string": pa.string()}

_examples = {
    pa.null(): pa.array([None, None], type=pa.null()),
    pa.bool_(): pa.array([None, True], type=pa.bool_()),
    pa.int8(): pa.array([None, -1], type=pa.int8()),
    pa.uint8(): pa.array([None, 1], type=pa.uint8()),
    pa.int16(): pa.array([None, -1], type=pa.int16()),
    pa.uint16(): pa.array([None, 1], type=pa.uint16()),
    pa.int32(): pa.array([None, -1], type=pa.int32()),
    pa.uint32(): pa.array([None, 1], type=pa.uint32()),
    pa.int64(): pa.array([None, -1], type=pa.int64()),
    pa.uint64(): pa.array([None, 1], type=pa.uint64()),
    pa.float16(): pa.array([None, np.float16(-0.1)], type=pa.float16()),
    pa.float32(): pa.array([None, -0.1], type=pa.float32()),
    pa.float64(): pa.array([None, -0.1], type=pa.float64()),
    pa.date32(): pa.array([None, datetime.date(2010, 9, 8)], type=pa.date32()),
    pa.date64(): pa.array([None, datetime.date(2010, 9, 8)], type=pa.date64()),
    pa.timestamp("s"): pa.array(
        [None, datetime.datetime(2013, 12, 11, 10, 9, 8)], type=pa.timestamp("s")
    ),
    pa.timestamp("ms"): pa.array(
        [None, datetime.datetime(2013, 12, 11, 10, 9, 8, 1000)], type=pa.timestamp("ms")
    ),
    pa.timestamp("us"): pa.array(
        [None, datetime.datetime(2013, 12, 11, 10, 9, 8, 7)], type=pa.timestamp("us")
github apache / arrow / python / pyarrow / jvm.py View on Github external
Parameters
    ----------
    jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Int

    Returns
    -------
    typ: pyarrow.DataType
    """
    if jvm_type.isSigned:
        if jvm_type.bitWidth == 8:
            return pa.int8()
        elif jvm_type.bitWidth == 16:
            return pa.int16()
        elif jvm_type.bitWidth == 32:
            return pa.int32()
        elif jvm_type.bitWidth == 64:
            return pa.int64()
    else:
        if jvm_type.bitWidth == 8:
            return pa.uint8()
        elif jvm_type.bitWidth == 16:
            return pa.uint16()
        elif jvm_type.bitWidth == 32:
            return pa.uint32()
        elif jvm_type.bitWidth == 64:
            return pa.uint64()
github andrewgross / json2parquet / json2parquet / client.py View on Github external
_converted_col.append(pd.to_datetime(t, format=date_format))
                except pd._libs.tslib.OutOfBoundsDatetime:
                    _converted_col.append(pd.Timestamp.max)
            array_data.append(pa.Array.from_pandas(pd.to_datetime(_converted_col), type=pa.timestamp('ns')))
        elif column.type.id == pa.date32().id:
            _converted_col = map(_date_converter, _col)
            array_data.append(pa.array(_converted_col, type=pa.date32()))
        # Float types are ambiguous for conversions, need to specify the exact type
        elif column.type.id == pa.float64().id:
            array_data.append(pa.array(_col, type=pa.float64()))
        elif column.type.id == pa.float32().id:
            # Python doesn't have a native float32 type
            # and PyArrow cannot cast float64 -> float32
            _col = pd.to_numeric(_col, downcast='float')
            array_data.append(pa.Array.from_pandas(_col, type=pa.float32()))
        elif column.type.id == pa.int32().id:
            # PyArrow 0.8.0 can cast int64 -> int32
            _col64 = pa.array(_col, type=pa.int64())
            array_data.append(_col64.cast(pa.int32()))
        elif column.type.id == pa.bool_().id:
            _col = map(_boolean_converter, _col)
            array_data.append(pa.array(_col, type=column.type))
        else:
            array_data.append(pa.array(_col, type=column.type))
        if isinstance(field_aliases, dict):
            schema_names.append(field_aliases.get(column.name, column.name))
        else:
            schema_names.append(column.name)
    return pa.RecordBatch.from_arrays(array_data, schema_names)