Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_numpy_constructor_multi_types(numpy):
# Test that multi-types datatable will be promoted into a common type
tbl = [[1, 5, 10],
[True, False, False],
[30498, 1349810, -134308],
[1.454, 4.9e-23, 10000000]]
d0 = dt.Frame(tbl)
assert d0.stypes == (stype.int8, stype.bool8, stype.int32, stype.float64)
n0 = numpy.array(d0)
assert n0.dtype == numpy.dtype("float64")
assert n0.T.tolist() == [[1.0, 5.0, 10.0],
[1.0, 0, 0],
[30498, 1349810, -134308],
[1.454, 4.9e-23, 1e7]]
assert (d0.to_numpy() == n0).all()
def test_rbind_views0():
# view + data
dt0 = dt.Frame({"d": range(10), "s": list("abcdefghij")})
dt0 = dt0[3:7, :]
dt1 = dt.Frame({"d": [-1, -2], "s": ["the", "end"]})
dt0.rbind(dt1)
dtr = dt.Frame([[3, 4, 5, 6, -1, -2],
["d", "e", "f", "g", "the", "end"]],
names=["d", "s"], stypes=[stype.int32, stype.str32])
assert_equals(dt0, dtr)
def test_stype_instantiate():
from datatable import stype
for st in stype:
assert stype(st) is st
assert stype(st.value) is st
assert stype(st.name) is st
assert stype(st.code) is st
assert stype(bool) is stype.bool8
assert stype("b1") is stype.bool8
assert stype("bool") is stype.bool8
assert stype("boolean") is stype.bool8
assert stype(int) is stype.int64
assert stype("int") is stype.int64
assert stype("integer") is stype.int64
assert stype("int8") is stype.int8
assert stype("int16") is stype.int16
assert stype("int32") is stype.int32
assert stype("int64") is stype.int64
assert stype(float) is stype.float64
assert stype("real") is stype.float64
assert stype("float") is stype.float64
assert stype("float32") is stype.float32
assert stype("float64") is stype.float64
assert stype("boolean") is stype.bool8
assert stype(int) is stype.int64
assert stype("int") is stype.int64
assert stype("integer") is stype.int64
assert stype("int8") is stype.int8
assert stype("int16") is stype.int16
assert stype("int32") is stype.int32
assert stype("int64") is stype.int64
assert stype(float) is stype.float64
assert stype("real") is stype.float64
assert stype("float") is stype.float64
assert stype("float32") is stype.float32
assert stype("float64") is stype.float64
assert stype(str) is stype.str64
assert stype("str") is stype.str64
assert stype("str32") is stype.str32
assert stype("str64") is stype.str64
assert stype(object) is stype.obj64
assert stype("obj") is stype.obj64
assert stype("object") is stype.obj64
from datatable import stype
for st in stype:
assert stype(st) is st
assert stype(st.value) is st
assert stype(st.name) is st
assert stype(st.code) is st
assert stype(bool) is stype.bool8
assert stype("b1") is stype.bool8
assert stype("bool") is stype.bool8
assert stype("boolean") is stype.bool8
assert stype(int) is stype.int64
assert stype("int") is stype.int64
assert stype("integer") is stype.int64
assert stype("int8") is stype.int8
assert stype("int16") is stype.int16
assert stype("int32") is stype.int32
assert stype("int64") is stype.int64
assert stype(float) is stype.float64
assert stype("real") is stype.float64
assert stype("float") is stype.float64
assert stype("float32") is stype.float32
assert stype("float64") is stype.float64
assert stype(str) is stype.str64
assert stype("str") is stype.str64
assert stype("str32") is stype.str32
assert stype("str64") is stype.str64
assert stype(object) is stype.obj64
assert stype("obj") is stype.obj64
assert stype("object") is stype.obj64
def test_issue998():
src = find_file("h2o-3", "bigdata", "laptop", "higgs_head_2M.csv")
# The file is 1.46GB in size. I could not find a smaller file that exhibits
# this problem... The issue only appeared in single-threaded mode, so we
# have to read this file slowly. On my laptop, this test runs in about 8s.
f0 = dt.fread(src, nthreads=1, fill=True, na_strings=["-999"])
assert f0.shape == (2000000, 29)
assert f0.names == tuple("C%d" % i for i in range(f0.ncols))
assert f0.stypes == (dt.stype.float64,) * f0.ncols
assert same_iterables(
f0.sum().to_list(),
[[1058818.0], [1981919.6107614636], [701.7858121241807],
[-195.48500674014213], [1996390.3476011853], [-1759.5364254778178],
[1980743.446578741], [-1108.7512905876065], [1712.947751407064],
[2003064.4534490108], [1985100.3810670376], [1190.8404791812281],
[384.00605312064], [1998592.0739881992], [1984490.1900614202],
[2033.9754767678387], [-1028.0810855487362], [2001341.0813384056],
[1971311.3271338642], [-943.92552991907], [-1079.3848229270661],
[1996588.295421958], [2068619.2163415626], [2049516.5437491536],
[2100795.4839400873], [2019540.6562294513], [1946283.046177674],
[2066298.020782411], [1919714.12131235]])
def test_dt_properties(dt0):
assert isinstance(dt0, dt.Frame)
frame_integrity_check(dt0)
assert dt0.nrows == 4
assert dt0.ncols == 7
assert dt0.shape == (4, 7)
assert dt0.names == ("A", "B", "C", "D", "E", "F", "G")
assert dt0.ltypes == (ltype.int, ltype.bool, ltype.bool, ltype.real,
ltype.bool, ltype.bool, ltype.str)
assert dt0.stypes == (stype.int8, stype.bool8, stype.bool8, stype.float64,
stype.bool8, stype.bool8, stype.str32)
assert sys.getsizeof(dt0) > 500
def test_stype_instantiate_bad():
from datatable import stype
with pytest.raises(TValueError):
print(stype(-1))
with pytest.raises(TValueError):
print(stype(0))
with pytest.raises(TValueError):
print(stype(["i", "4"]))
with pytest.raises(TValueError):
print(stype(1.5))
with pytest.raises(TValueError):
print(stype(True))
def transform(self, X: dt.Frame):
X = X[:, self.time_column]
if X[:, self.time_column].ltypes[0] != dt.ltype.str:
assert self.datetime_formats[self.time_column] in ["%Y%m%d", "%Y%m%d%H%M"]
X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0]))
X.replace(['', 'None'], None)
X = X.to_pandas()
X.loc[:, self.time_column] = pd.to_datetime(X[self.time_column],
format=self.datetime_formats[self.time_column])
X['year'] = X[self.time_column].dt.year
X['doy'] = X[self.time_column].dt.dayofyear
X.drop(self.time_column, axis=1, inplace=True)
feat = 'is_holiday'
self.memo[feat] = 1
X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0)
self.memo.drop(feat, axis=1, inplace=True)
X = X[[feat]].astype(int)
return X
def __getitem__(self, item):
if not isinstance(item, (int, str, slice)):
from datatable import TypeError, stype, ltype
if not(item in [bool, int, float, str, object, None] or
isinstance(item, (stype, ltype))):
raise TypeError("Column selector should be an integer, string, "
"or slice, not %r" % type(item))
return Expr(OpCodes.COL, (item,), (self._id,))