How to use the datatable.stype function in datatable

To help you get started, we’ve selected a few datatable examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / datatable / tests / test_dt.py View on Github external
def test_numpy_constructor_multi_types(numpy):
    # Test that multi-types datatable will be promoted into a common type
    tbl = [[1, 5, 10],
           [True, False, False],
           [30498, 1349810, -134308],
           [1.454, 4.9e-23, 10000000]]
    d0 = dt.Frame(tbl)
    assert d0.stypes == (stype.int8, stype.bool8, stype.int32, stype.float64)
    n0 = numpy.array(d0)
    assert n0.dtype == numpy.dtype("float64")
    assert n0.T.tolist() == [[1.0, 5.0, 10.0],
                             [1.0, 0, 0],
                             [30498, 1349810, -134308],
                             [1.454, 4.9e-23, 1e7]]
    assert (d0.to_numpy() == n0).all()
github h2oai / datatable / tests / munging / test_rbind.py View on Github external
def test_rbind_views0():
    # view + data
    dt0 = dt.Frame({"d": range(10), "s": list("abcdefghij")})
    dt0 = dt0[3:7, :]
    dt1 = dt.Frame({"d": [-1, -2], "s": ["the", "end"]})
    dt0.rbind(dt1)
    dtr = dt.Frame([[3, 4, 5, 6, -1, -2],
                    ["d", "e", "f", "g", "the", "end"]],
                   names=["d", "s"], stypes=[stype.int32, stype.str32])
    assert_equals(dt0, dtr)
github h2oai / datatable / tests / test_types.py View on Github external
def test_stype_instantiate():
    from datatable import stype
    for st in stype:
        assert stype(st) is st
        assert stype(st.value) is st
        assert stype(st.name) is st
        assert stype(st.code) is st
    assert stype(bool) is stype.bool8
    assert stype("b1") is stype.bool8
    assert stype("bool") is stype.bool8
    assert stype("boolean") is stype.bool8
    assert stype(int) is stype.int64
    assert stype("int") is stype.int64
    assert stype("integer") is stype.int64
    assert stype("int8") is stype.int8
    assert stype("int16") is stype.int16
    assert stype("int32") is stype.int32
    assert stype("int64") is stype.int64
    assert stype(float) is stype.float64
    assert stype("real") is stype.float64
    assert stype("float") is stype.float64
    assert stype("float32") is stype.float32
    assert stype("float64") is stype.float64
github h2oai / datatable / tests / test_types.py View on Github external
assert stype("boolean") is stype.bool8
    assert stype(int) is stype.int64
    assert stype("int") is stype.int64
    assert stype("integer") is stype.int64
    assert stype("int8") is stype.int8
    assert stype("int16") is stype.int16
    assert stype("int32") is stype.int32
    assert stype("int64") is stype.int64
    assert stype(float) is stype.float64
    assert stype("real") is stype.float64
    assert stype("float") is stype.float64
    assert stype("float32") is stype.float32
    assert stype("float64") is stype.float64
    assert stype(str) is stype.str64
    assert stype("str") is stype.str64
    assert stype("str32") is stype.str32
    assert stype("str64") is stype.str64
    assert stype(object) is stype.obj64
    assert stype("obj") is stype.obj64
    assert stype("object") is stype.obj64
github h2oai / datatable / tests / test_types.py View on Github external
from datatable import stype
    for st in stype:
        assert stype(st) is st
        assert stype(st.value) is st
        assert stype(st.name) is st
        assert stype(st.code) is st
    assert stype(bool) is stype.bool8
    assert stype("b1") is stype.bool8
    assert stype("bool") is stype.bool8
    assert stype("boolean") is stype.bool8
    assert stype(int) is stype.int64
    assert stype("int") is stype.int64
    assert stype("integer") is stype.int64
    assert stype("int8") is stype.int8
    assert stype("int16") is stype.int16
    assert stype("int32") is stype.int32
    assert stype("int64") is stype.int64
    assert stype(float) is stype.float64
    assert stype("real") is stype.float64
    assert stype("float") is stype.float64
    assert stype("float32") is stype.float32
    assert stype("float64") is stype.float64
    assert stype(str) is stype.str64
    assert stype("str") is stype.str64
    assert stype("str32") is stype.str32
    assert stype("str64") is stype.str64
    assert stype(object) is stype.obj64
    assert stype("obj") is stype.obj64
    assert stype("object") is stype.obj64
github h2oai / datatable / tests / fread / test_fread_issues.py View on Github external
def test_issue998():
    src = find_file("h2o-3", "bigdata", "laptop", "higgs_head_2M.csv")
    # The file is 1.46GB in size. I could not find a smaller file that exhibits
    # this problem... The issue only appeared in single-threaded mode, so we
    # have to read this file slowly. On my laptop, this test runs in about 8s.
    f0 = dt.fread(src, nthreads=1, fill=True, na_strings=["-999"])
    assert f0.shape == (2000000, 29)
    assert f0.names == tuple("C%d" % i for i in range(f0.ncols))
    assert f0.stypes == (dt.stype.float64,) * f0.ncols
    assert same_iterables(
        f0.sum().to_list(),
        [[1058818.0], [1981919.6107614636], [701.7858121241807],
         [-195.48500674014213], [1996390.3476011853], [-1759.5364254778178],
         [1980743.446578741], [-1108.7512905876065], [1712.947751407064],
         [2003064.4534490108], [1985100.3810670376], [1190.8404791812281],
         [384.00605312064], [1998592.0739881992], [1984490.1900614202],
         [2033.9754767678387], [-1028.0810855487362], [2001341.0813384056],
         [1971311.3271338642], [-943.92552991907], [-1079.3848229270661],
         [1996588.295421958], [2068619.2163415626], [2049516.5437491536],
         [2100795.4839400873], [2019540.6562294513], [1946283.046177674],
         [2066298.020782411], [1919714.12131235]])
github h2oai / datatable / tests / test_dt.py View on Github external
def test_dt_properties(dt0):
    assert isinstance(dt0, dt.Frame)
    frame_integrity_check(dt0)
    assert dt0.nrows == 4
    assert dt0.ncols == 7
    assert dt0.shape == (4, 7)
    assert dt0.names == ("A", "B", "C", "D", "E", "F", "G")
    assert dt0.ltypes == (ltype.int, ltype.bool, ltype.bool, ltype.real,
                          ltype.bool, ltype.bool, ltype.str)
    assert dt0.stypes == (stype.int8, stype.bool8, stype.bool8, stype.float64,
                          stype.bool8, stype.bool8, stype.str32)
    assert sys.getsizeof(dt0) > 500
github h2oai / datatable / tests / test_types.py View on Github external
def test_stype_instantiate_bad():
    from datatable import stype
    with pytest.raises(TValueError):
        print(stype(-1))
    with pytest.raises(TValueError):
        print(stype(0))
    with pytest.raises(TValueError):
        print(stype(["i", "4"]))
    with pytest.raises(TValueError):
        print(stype(1.5))
    with pytest.raises(TValueError):
        print(stype(True))
github h2oai / driverlessai-recipes / transformers / augmentation / france_bank_holidays.py View on Github external
def transform(self, X: dt.Frame):
        X = X[:, self.time_column]
        if X[:, self.time_column].ltypes[0] != dt.ltype.str:
            assert self.datetime_formats[self.time_column] in ["%Y%m%d", "%Y%m%d%H%M"]
            X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0]))
        X.replace(['', 'None'], None)
        X = X.to_pandas()
        X.loc[:, self.time_column] = pd.to_datetime(X[self.time_column],
                                                    format=self.datetime_formats[self.time_column])

        X['year'] = X[self.time_column].dt.year
        X['doy'] = X[self.time_column].dt.dayofyear
        X.drop(self.time_column, axis=1, inplace=True)
        feat = 'is_holiday'
        self.memo[feat] = 1
        X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0)
        self.memo.drop(feat, axis=1, inplace=True)
        X = X[[feat]].astype(int)
        return X
github h2oai / datatable / datatable / expr / expr.py View on Github external
def __getitem__(self, item):
        if not isinstance(item, (int, str, slice)):
            from datatable import TypeError, stype, ltype
            if not(item in [bool, int, float, str, object, None] or
                   isinstance(item, (stype, ltype))):
                raise TypeError("Column selector should be an integer, string, "
                                "or slice, not %r" % type(item))
        return Expr(OpCodes.COL, (item,), (self._id,))