Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_TargetEncoder(generate_data):
df = generate_data()
feature_cols = [x for x in df.columns if x != TARGET_COL]
cat_cols = [x for x in feature_cols if df[x].nunique() < 100]
te = TargetEncoder()
X_cat = te.fit_transform(df[cat_cols], df[TARGET_COL])
print('Without CV:\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
cv = KFold(n_splits=N_FOLD, shuffle=True, random_state=RANDOM_SEED)
te = TargetEncoder(cv=cv)
X_cat = te.fit_transform(df[cat_cols], df[TARGET_COL])
print('With CV (fit_transform()):\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
te = TargetEncoder(cv=cv)
te.fit(df[cat_cols], df[TARGET_COL])
X_cat = te.transform(df[cat_cols])
print('With CV (fit() and transform() separately):\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
cat_cols = [x for x in feature_cols if df[x].nunique() < 100]
te = TargetEncoder()
X_cat = te.fit_transform(df[cat_cols], df[TARGET_COL])
print('Without CV:\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
cv = KFold(n_splits=N_FOLD, shuffle=True, random_state=RANDOM_SEED)
te = TargetEncoder(cv=cv)
X_cat = te.fit_transform(df[cat_cols], df[TARGET_COL])
print('With CV (fit_transform()):\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
te = TargetEncoder(cv=cv)
te.fit(df[cat_cols], df[TARGET_COL])
X_cat = te.transform(df[cat_cols])
print('With CV (fit() and transform() separately):\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
def test_TargetEncoder(generate_data):
df = generate_data()
feature_cols = [x for x in df.columns if x != TARGET_COL]
cat_cols = [x for x in feature_cols if df[x].nunique() < 100]
te = TargetEncoder()
X_cat = te.fit_transform(df[cat_cols], df[TARGET_COL])
print('Without CV:\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
cv = KFold(n_splits=N_FOLD, shuffle=True, random_state=RANDOM_SEED)
te = TargetEncoder(cv=cv)
X_cat = te.fit_transform(df[cat_cols], df[TARGET_COL])
print('With CV (fit_transform()):\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
te = TargetEncoder(cv=cv)
te.fit(df[cat_cols], df[TARGET_COL])
X_cat = te.transform(df[cat_cols])
print('With CV (fit() and transform() separately):\n{}'.format(X_cat.head()))