Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
df_train = dt.Frame(["cucumber", None, "shift", "sky", "day", "orange",
"ocean"])
df_target = dt.Frame(["green", "red", "red", "blue", "green", None,
"blue"])
ft.interactions = [["C0", "C0"]]
ft.fit(df_train, df_target)
ft_pickled = pickle.dumps(ft)
ft_unpickled = pickle.loads(ft_pickled)
frame_integrity_check(ft_unpickled.model)
assert ft_unpickled.model.stypes == (stype.float32,) * 6
assert_equals(ft.model, ft_unpickled.model)
assert (ft_unpickled.feature_importances.names ==
('feature_name', 'feature_importance',))
assert (ft_unpickled.feature_importances.stypes ==
(stype.str32, stype.float32))
assert_equals(ft.feature_importances, ft_unpickled.feature_importances)
assert ft.params == ft_unpickled.params
assert_equals(ft.labels, ft_unpickled.labels)
assert ft.colnames == ft_unpickled.colnames
assert ft.interactions == ft_unpickled.interactions
# Predict
target = ft.predict(df_train)
target_unpickled = ft_unpickled.predict(df_train)
assert_equals(ft.model, ft_unpickled.model)
assert_equals(target, target_unpickled)
# Fit and predict
ft.fit(df_train, df_target)
target = ft.predict(df_train)
ft_unpickled.fit(df_train, df_target)
def test_join_and_select_g_col():
# Check that selecting a g-column does not confuse it with an f-column.
# See issue #1352
F = dt.Frame(a=[0, 2, 3], b=[3, 4, 2])
G = dt.Frame(b=[2, 4], c=["foo", "bar"])
G.key = "b"
R = F[:, g.c, join(G)]
frame_integrity_check(R)
assert R.shape == (3, 1)
assert R.stypes == (stype.str32,)
# assert R.names == ("c",) # not working yet
assert R.to_list() == [[None, "bar", "foo"]]
def test_create_as_str32():
d0 = dt.Frame([1, 2.7, "foo", None, (3, 4)], stype=stype.str32)
frame_integrity_check(d0)
assert d0.stypes == (stype.str32, )
assert d0.shape == (5, 1)
assert d0.to_list() == [["1", "2.7", "foo", None, "(3, 4)"]]
stype.float64, stype.str32, stype.str64])
def test_sort_view_all_stypes(st):
def random_bool():
return random.choice([True, True, False, False, None])
def random_int():
if random.random() < 0.1: return None
return random.randint(-100000, 1999873)
def random_real():
if random.random() < 0.1: return None
return random.normalvariate(5, 10)
def random_str():
if random.random() < 0.1: return None
return random_string(random.randint(1, 20))
def transform(self, X: dt.Frame):
return X[:, dt.stype.str32(dt.f[0])]
def transform(self, X: dt.Frame):
assert X.ncols == 1
return X[:, dt.stype.str32(dt.stype.int32(dt.log(dt.f[0])))]
def transform(self, X: dt.Frame):
X = X[:, self.time_column]
if X[:, self.time_column].ltypes[0] != dt.ltype.str:
assert self.datetime_formats[self.time_column] in ["%Y%m%d", "%Y%m%d%H%M"]
X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0]))
X.replace(['', 'None'], None)
X = X.to_pandas()
X.loc[:, self.time_column] = pd.to_datetime(X[self.time_column],
format=self.datetime_formats[self.time_column])
X['year'] = X[self.time_column].dt.year
X['doy'] = X[self.time_column].dt.dayofyear
X.drop(self.time_column, axis=1, inplace=True)
feat = 'is_ramadan'
self.memo[feat] = 1
X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0)
self.memo.drop(feat, axis=1, inplace=True)
X = X[[feat]].astype(int)
return X
def transform(self, X: dt.Frame):
X = X[:, self.time_column]
if X[:, self.time_column].ltypes[0] != dt.ltype.str:
assert self.datetime_formats[self.time_column] in ["%Y%m%d", "%Y%m%d%H%M"]
X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0]))
X.replace(['', 'None'], None)
X = X.to_pandas()
X.loc[:, self.time_column] = pd.to_datetime(X[self.time_column],
format=self.datetime_formats[self.time_column])
X['year'] = X[self.time_column].dt.year
X['doy'] = X[self.time_column].dt.dayofyear
X.drop(self.time_column, axis=1, inplace=True)
feat = 'is_holiday'
self.memo[feat] = 1
X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0)
self.memo.drop(feat, axis=1, inplace=True)
X = X[[feat]].astype(int)
return X