How to use the pymapd._pandas_loaders function in pymapd

To help you get started, we’ve selected a few pymapd examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github omnisci / pymapd / tests / test_loaders.py View on Github external
data = pd.DataFrame({
            "boolean_": [True, False],
            "smallint_": np.array([0, 1], dtype=np.int16),
            "int_": np.array([0, 1], dtype=np.int32),
            "bigint_": np.array([0, 1], dtype=np.int64),
            "float_": np.array([0, 1], dtype=np.float32),
            "double_": np.array([0, 1], dtype=np.float64),
            "varchar_": ["a", "b"],
            "text_": ['a', 'b'],
            "time_": [datetime.time(0, 11, 59), datetime.time(13)],
            "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
            "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)],
        }, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
                    'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
                    'date_'])
        result = _pandas_loaders.build_input_columnar(data,
                                                      preserve_index=False)

        nulls = [False, False]
        expected = [
            TColumn(TColumnData(int_col=[True, False]), nulls=nulls),
            TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int16)), nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int32)), nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int64)), nulls=nulls),  # noqa
            TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float32)), nulls=nulls),  # noqa
            TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float64)), nulls=nulls),  # noqa
            TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls),
            TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls),
            TColumn(TColumnData(int_col=[719, 46800]), nulls=nulls),
            TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls)
        ]
github omnisci / pymapd / tests / test_loaders.py View on Github external
# unreliable since if there is no number outside the int32
            # bounds in a column with nulls then we will be assuming int
            "int_": np.array([0, 1, None], dtype=np.object),
            "bigint_": np.array([0, 9223372036854775807, None],
                                dtype=np.object),
            "double_": np.array([0, 1, None], dtype=np.float64),
            "varchar_": ["a", "b", None],
            "text_": ['a', 'b', None],
            "time_": [datetime.time(0, 11, 59), datetime.time(13), None],
            "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017"), None],
            "date_": [datetime.date(1001, 1, 1), datetime.date(2017, 1, 1),
                      None],
        }, columns=['boolean_', 'int_', 'bigint_',
                    'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
                    'date_'])
        result = _pandas_loaders.build_input_columnar(data,
                                                      preserve_index=False)

        nulls = [False, False, True]
        bool_na = -128
        int_na = -2147483648
        bigint_na = -9223372036854775808
        ns_na = -9223372037
        double_na = 0

        expected = [
            TColumn(TColumnData(int_col=[1, 0, bool_na]), nulls=nulls),
            TColumn(TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int32)), nulls=nulls),  # noqa
            TColumn(TColumnData(int_col=np.array([0, 9223372036854775807, bigint_na], dtype=np.int64)), nulls=nulls),  # noqa
            TColumn(TColumnData(real_col=np.array([0, 1, double_na], dtype=np.float64)), nulls=nulls),  # noqa
            TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
            TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
github omnisci / pymapd / pymapd / connection.py View on Github external
# Validate that there are the same number of columns in the table
            # as there are in the dataframe. No point trying to load the data
            # if this is not the case
            if len(table_details) != len(data.columns):
                raise ValueError('Number of columns in dataframe ({}) does not \
                                  match number of columns in OmniSci table \
                                  ({})'.format(len(data.columns),
                                               len(table_details)))

            col_names = [i[0] for i in table_details] if \
                col_names_from_schema \
                else list(data)

            col_types = [(i[1], i[4]) for i in table_details]

            input_cols = _pandas_loaders.build_input_columnar(
                data,
                preserve_index=preserve_index,
                chunk_size_bytes=chunk_size_bytes,
                col_types=col_types,
                col_names=col_names
            )
        else:
            raise TypeError("Unknown type {}".format(type(data)))
        for cols in input_cols:
            self._client.load_table_binary_columnar(self._session, table_name,
                                                    cols)