Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
ft = Ftrl(alpha = 0.1, nepochs = 10000, model_type = "binomial")
df_train_odd = dt.Frame([[1, 3, 7, 5, 9]])
df_target_odd = dt.Frame([["odd", "odd", "odd", "odd", "odd"]])
ft.fit(df_train_odd, df_target_odd)
assert_equals(ft.labels, dt.Frame([["odd"], [0]], names = ["label", "id"]))
df_train_wrong = dt.Frame([[2, 4, None, 6]])
df_target_wrong = dt.Frame([["even", "even", "none", "even"]])
with pytest.raises(ValueError) as e:
ft.fit(df_train_wrong, df_target_wrong)
assert ("Got two new labels in the target column, however, positive "
"label is already set"
== str(e.value))
df_train_even_odd = dt.Frame([[2, 1, 8, 3]])
df_target_even_odd = dt.Frame([["even", "odd", "even", "odd"]])
ft.fit(df_train_even_odd, df_target_even_odd)
assert_equals(ft.labels, dt.Frame([["even", "odd"], [1, 0]], names = ["label", "id"]))
p = ft.predict(df_train_odd)
p_dict = p.to_dict()
delta_odd = [abs(i - j) for i, j in zip(p_dict["odd"], [1, 1, 1, 1, 1])]
delta_even = [abs(i - j) for i, j in zip(p_dict["even"], [0, 0, 0, 0, 0])]
assert ft.model_type_trained == "binomial"
assert max(delta_odd) < epsilon
assert max(delta_even) < epsilon
p = ft.predict(df_train_even_odd)
p_dict = p.to_dict()
delta_even = [abs(i - j) for i, j in zip(p_dict["even"], [1, 0, 1, 0])]
delta_odd = [abs(i - j) for i, j in zip(p_dict["odd"], [0, 1, 0, 1])]
assert ft.model_type_trained == "binomial"
def test_to_dict():
d0 = dt.Frame(A=["purple", "yellow", "indigo", "crimson"],
B=[0, None, 123779, -299],
C=[1.23, 4.56, 7.89, 10.11])
assert d0.to_dict() == {"A": ["purple", "yellow", "indigo", "crimson"],
"B": [0, None, 123779, -299],
"C": [1.23, 4.56, 7.89, 10.11]}
def dt0():
return dt.Frame([
[2, 7, 0, 0],
[True, False, False, True],
[1, 1, 1, 1],
[0.1, 2, -4, 4.4],
[None, None, None, None],
[0, 0, 0, 0],
["1", "2", "hello", "world"],
], names=list("ABCDEFG"))
def test_aggregate_0d_continuous_integer_random():
n_bins = 3 # `nrows < min_rows`, so we also test that this input is ignored
min_rows = 500
d_in = dt.Frame([None, 9, 8, None, 2, 3, 3, 0, 5, 5, 8, 1, None])
d_members = aggregate(d_in, min_rows=min_rows, n_bins=n_bins,
progress_fn=report_progress)
d_members.internal.check()
assert d_members.shape == (13, 1)
assert d_members.ltypes == (ltype.int,)
assert d_members.to_list() == [[0, 12, 10, 1, 5, 6, 7, 3, 8, 9, 11, 4, 2]]
d_in.internal.check()
assert d_in.shape == (13, 2)
assert d_in.ltypes == (ltype.int, ltype.int)
assert d_in.to_list() == [[None, None, None, 0, 1, 2, 3, 3, 5, 5, 8, 8, 9],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
def test_groupby_on_view():
# See issue #1542
DT = dt.Frame(A=[1, 2, 3, 1, 2, 3],
B=[3, 6, 2, 4, 3, 1],
C=['b', 'd', 'b', 'b', 'd', 'b'])
V = DT[f.A != 1, :]
assert isview(V)
assert_equals(V, dt.Frame(A=[2, 3, 2, 3],
B=[6, 2, 3, 1],
C=['d', 'b', 'd', 'b']))
RES = V[:, max(f.B), by(f.C)]
assert_equals(RES, dt.Frame(C=['b', 'd'], B=[2, 6]))
def test_cbind_error_1():
DT = dt.Frame(A=[1, 5])
with pytest.raises(ValueError) as e:
DT.cbind(dt.Frame(B=[]))
assert ("Cannot cbind frame with 0 rows to a frame with 2 rows"
in str(e.value))
distribution as datatable Frame
:param topic_word_distrib: topic-word distribution; shape KxM, where K is number of topics, M is vocabulary size
:param vocab: vocabulary list/array of length K
:param colname_rowindex: column name for the "row index", i.e. the column that identifies each row
:param row_labels: format string for each row index where ``{i0}`` or ``{i1}`` are replaced by the respective
zero- or one-indexed topic numbers or an array with individual row labels
:return: datatable Frame
"""
if isinstance(row_labels, str):
rownames = [row_labels.format(i0=i, i1=i + 1) for i in range(topic_word_distrib.shape[0])]
else:
rownames = row_labels
return dt.cbind(dt.Frame({colname_rowindex: rownames}),
dt.Frame(topic_word_distrib, names=list(vocab)))
"combine with rows / groupby argument.")
assert not delete_mode
if update_mode:
assert grbynode is None
allrows = isinstance(rowsnode, AllRFNode)
# Without `materialize`, when an update is applied to a view,
# `rowsnode.execute()` will merge the rowindex implied by
# `rowsnode` with its parent's rowindex. This will cause the
# parent's data to be updated, which is wrong.
dt.materialize()
if isinstance(replacement, (int, float, str, type(None))):
replacement = datatable.Frame([replacement])
if allrows:
replacement = datatable.repeat(replacement, dt.nrows)
elif isinstance(replacement, datatable.Frame):
pass
elif isinstance(replacement, BaseExpr):
_col = replacement.evaluate_eager(ee)
_colset = core.columns_from_columns([_col])
replacement = _colset.to_frame(None)
else:
replacement = datatable.Frame(replacement)
rowsnode.execute()
colsnode.execute_update(dt, replacement)
return
rowsnode.execute()
if grbynode:
grbynode.execute(ee)
colsnode.execute()
def transform(self, X: dt.Frame):
ua_column_names = ['ua', 'user-agent', 'user_agent', 'useragent']
col_name = X.names[0]
if col_name in ua_column_names:
newnames = ("browser", "os", "device", "is_mobile", "is_tablet")
Y = X[col_name].to_list()[0]
Z = dt.Frame([get_ua_info(x) for x in Y], names=[f"{col_name}_{s}" for s in newnames])
X.cbind(Z)
return X
else:
return X.to_pandas().iloc[:, 0]
b = 12742 * dt.math.arcsin(dt.math.sqrt(a)) # 2*R*asin...
all_dt.cbind(all_dt[:, {"distance_km": b}])
self._output_feature_names = self._output_feature_names + ["{}.{}".format(self.transformer_name, f) for f in
['elevation_diff', 'lat_diff', 'long_diff', 'distance_km']]
self._feature_desc = self._feature_desc + [
'Elevation difference between Origin and Destination',
'Latitude difference between Origin and Destination',
'Longitude difference between Origin and Destination',
'Distance in km between Origin and Destination (Harvestine approx.)']
elif (isOrigin and not isDest):
all_dt = X_origin
elif (isDest and not isOrigin):
all_dt = X_dest
else:
all_dt = dt.Frame(np.zeros((X.shape[0], 1)), names = ["dummy"])
self._output_feature_names = ["dummy"]
self._feature_desc = ["dummy"]
return all_dt