How to use the datatable.stype.str32 function in datatable

To help you get started, we’ve selected a few datatable examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / datatable / tests / models / test_ftrl.py View on Github external
df_train = dt.Frame(["cucumber", None, "shift", "sky", "day", "orange",
                         "ocean"])
    df_target = dt.Frame(["green", "red", "red", "blue", "green", None,
                          "blue"])
    ft.interactions = [["C0", "C0"]]
    ft.fit(df_train, df_target)

    ft_pickled = pickle.dumps(ft)
    ft_unpickled = pickle.loads(ft_pickled)
    frame_integrity_check(ft_unpickled.model)
    assert ft_unpickled.model.stypes == (stype.float32,) * 6
    assert_equals(ft.model, ft_unpickled.model)
    assert (ft_unpickled.feature_importances.names ==
            ('feature_name', 'feature_importance',))
    assert (ft_unpickled.feature_importances.stypes ==
            (stype.str32, stype.float32))
    assert_equals(ft.feature_importances, ft_unpickled.feature_importances)
    assert ft.params == ft_unpickled.params
    assert_equals(ft.labels, ft_unpickled.labels)
    assert ft.colnames == ft_unpickled.colnames
    assert ft.interactions == ft_unpickled.interactions

    # Predict
    target = ft.predict(df_train)
    target_unpickled = ft_unpickled.predict(df_train)
    assert_equals(ft.model, ft_unpickled.model)
    assert_equals(target, target_unpickled)

    # Fit and predict
    ft.fit(df_train, df_target)
    target = ft.predict(df_train)
    ft_unpickled.fit(df_train, df_target)
github h2oai / datatable / tests / test_join.py View on Github external
def test_join_and_select_g_col():
    # Check that selecting a g-column does not confuse it with an f-column.
    # See issue #1352
    F = dt.Frame(a=[0, 2, 3], b=[3, 4, 2])
    G = dt.Frame(b=[2, 4], c=["foo", "bar"])
    G.key = "b"
    R = F[:, g.c, join(G)]
    frame_integrity_check(R)
    assert R.shape == (3, 1)
    assert R.stypes == (stype.str32,)
    # assert R.names == ("c",)   # not working yet
    assert R.to_list() == [[None, "bar", "foo"]]
github h2oai / datatable / tests / test_dt_create.py View on Github external
def test_create_as_str32():
    d0 = dt.Frame([1, 2.7, "foo", None, (3, 4)], stype=stype.str32)
    frame_integrity_check(d0)
    assert d0.stypes == (stype.str32, )
    assert d0.shape == (5, 1)
    assert d0.to_list() == [["1", "2.7", "foo", None, "(3, 4)"]]
github h2oai / datatable / tests / test_dt_sort.py View on Github external
                                stype.float64, stype.str32, stype.str64])
def test_sort_view_all_stypes(st):
    def random_bool():
        return random.choice([True, True, False, False, None])

    def random_int():
        if random.random() < 0.1: return None
        return random.randint(-100000, 1999873)

    def random_real():
        if random.random() < 0.1: return None
        return random.normalvariate(5, 10)

    def random_str():
        if random.random() < 0.1: return None
        return random_string(random.randint(1, 20))
github h2oai / driverlessai-recipes / recipes / amazon.py View on Github external
def transform(self, X: dt.Frame):
        return X[:, dt.stype.str32(dt.f[0])]
github h2oai / driverlessai-recipes / transformers / hierarchical / log_scale_target_encoding.py View on Github external
def transform(self, X: dt.Frame):
        assert X.ncols == 1
        return X[:, dt.stype.str32(dt.stype.int32(dt.log(dt.f[0])))]
github h2oai / driverlessai-recipes / transformers / augmentation / is_ramadan.py View on Github external
def transform(self, X: dt.Frame):
        X = X[:, self.time_column]
        if X[:, self.time_column].ltypes[0] != dt.ltype.str:
            assert self.datetime_formats[self.time_column] in ["%Y%m%d", "%Y%m%d%H%M"]
            X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0]))
        X.replace(['', 'None'], None)
        X = X.to_pandas()
        X.loc[:, self.time_column] = pd.to_datetime(X[self.time_column],
                                                    format=self.datetime_formats[self.time_column])
        X['year'] = X[self.time_column].dt.year
        X['doy'] = X[self.time_column].dt.dayofyear
        X.drop(self.time_column, axis=1, inplace=True)
        feat = 'is_ramadan'
        self.memo[feat] = 1
        X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0)
        self.memo.drop(feat, axis=1, inplace=True)
        X = X[[feat]].astype(int)
        return X
github h2oai / driverlessai-recipes / transformers / timeseries / augmentation / singapore_public_holidays.py View on Github external
def transform(self, X: dt.Frame):
        X = X[:, self.time_column]
        if X[:, self.time_column].ltypes[0] != dt.ltype.str:
            assert self.datetime_formats[self.time_column] in ["%Y%m%d", "%Y%m%d%H%M"]
            X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0]))
        X.replace(['', 'None'], None)
        X = X.to_pandas()
        X.loc[:, self.time_column] = pd.to_datetime(X[self.time_column],
                                                    format=self.datetime_formats[self.time_column])

        X['year'] = X[self.time_column].dt.year
        X['doy'] = X[self.time_column].dt.dayofyear
        X.drop(self.time_column, axis=1, inplace=True)
        feat = 'is_holiday'
        self.memo[feat] = 1
        X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0)
        self.memo.drop(feat, axis=1, inplace=True)
        X = X[[feat]].astype(int)
        return X