Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
data = pd.DataFrame({
"boolean_": [True, False],
"smallint_": np.array([0, 1], dtype=np.int16),
"int_": np.array([0, 1], dtype=np.int32),
"bigint_": np.array([0, 1], dtype=np.int64),
"float_": np.array([0, 1], dtype=np.float32),
"double_": np.array([0, 1], dtype=np.float64),
"varchar_": ["a", "b"],
"text_": ['a', 'b'],
"time_": [datetime.time(0, 11, 59), datetime.time(13)],
"timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
"date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)],
}, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
'date_'])
result = _pandas_loaders.build_input_columnar(data,
preserve_index=False)
nulls = [False, False]
expected = [
TColumn(TColumnData(int_col=[True, False]), nulls=nulls),
TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int16)), nulls=nulls), # noqa
TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int32)), nulls=nulls), # noqa
TColumn(TColumnData(int_col=np.array([0, 1], dtype=np.int64)), nulls=nulls), # noqa
TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float32)), nulls=nulls), # noqa
TColumn(TColumnData(real_col=np.array([0, 1], dtype=np.float64)), nulls=nulls), # noqa
TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls),
TColumn(TColumnData(str_col=['a', 'b']), nulls=nulls),
TColumn(TColumnData(int_col=[719, 46800]), nulls=nulls),
TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls), # noqa
TColumn(TColumnData(int_col=[1451606400, 1483228800]), nulls=nulls)
]
# unreliable since if there is no number outside the int32
# bounds in a column with nulls then we will be assuming int
"int_": np.array([0, 1, None], dtype=np.object),
"bigint_": np.array([0, 9223372036854775807, None],
dtype=np.object),
"double_": np.array([0, 1, None], dtype=np.float64),
"varchar_": ["a", "b", None],
"text_": ['a', 'b', None],
"time_": [datetime.time(0, 11, 59), datetime.time(13), None],
"timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017"), None],
"date_": [datetime.date(1001, 1, 1), datetime.date(2017, 1, 1),
None],
}, columns=['boolean_', 'int_', 'bigint_',
'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
'date_'])
result = _pandas_loaders.build_input_columnar(data,
preserve_index=False)
nulls = [False, False, True]
bool_na = -128
int_na = -2147483648
bigint_na = -9223372036854775808
ns_na = -9223372037
double_na = 0
expected = [
TColumn(TColumnData(int_col=[1, 0, bool_na]), nulls=nulls),
TColumn(TColumnData(int_col=np.array([0, 1, int_na], dtype=np.int32)), nulls=nulls), # noqa
TColumn(TColumnData(int_col=np.array([0, 9223372036854775807, bigint_na], dtype=np.int64)), nulls=nulls), # noqa
TColumn(TColumnData(real_col=np.array([0, 1, double_na], dtype=np.float64)), nulls=nulls), # noqa
TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
TColumn(TColumnData(str_col=['a', 'b', '']), nulls=nulls),
# Validate that there are the same number of columns in the table
# as there are in the dataframe. No point trying to load the data
# if this is not the case
if len(table_details) != len(data.columns):
raise ValueError('Number of columns in dataframe ({}) does not \
match number of columns in OmniSci table \
({})'.format(len(data.columns),
len(table_details)))
col_names = [i[0] for i in table_details] if \
col_names_from_schema \
else list(data)
col_types = [(i[1], i[4]) for i in table_details]
input_cols = _pandas_loaders.build_input_columnar(
data,
preserve_index=preserve_index,
chunk_size_bytes=chunk_size_bytes,
col_types=col_types,
col_names=col_names
)
else:
raise TypeError("Unknown type {}".format(type(data)))
for cols in input_cols:
self._client.load_table_binary_columnar(self._session, table_name,
cols)