Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def fit_then_transform_dense(expected, input,
categorical_features='all',
minimum_fraction=None):
ohe = OneHotEncoder(categorical_features=categorical_features,
sparse=False, minimum_fraction=minimum_fraction)
transformation = ohe.fit_transform(input.copy())
assert_array_almost_equal(expected, transformation)
ohe2 = OneHotEncoder(categorical_features=categorical_features,
sparse=False, minimum_fraction=minimum_fraction)
ohe2.fit(input.copy())
transformation = ohe2.transform(input.copy())
assert_array_almost_equal(expected, transformation)
def fit_then_transform(expected, input, categorical_features='all',
minimum_fraction=None):
# Test fit_transform
ohe = OneHotEncoder(categorical_features=categorical_features,
minimum_fraction=minimum_fraction)
transformation = ohe.fit_transform(input.copy())
assert_array_almost_equal(expected.astype(float),
transformation.todense())
# Test fit, and afterwards transform
ohe2 = OneHotEncoder(categorical_features=categorical_features,
minimum_fraction=minimum_fraction)
ohe2.fit(input.copy())
transformation = ohe2.transform(input.copy())
assert_array_almost_equal(expected, transformation.todense())
def fit_then_transform(expected, input, categorical_features='all',
minimum_fraction=None):
# Test fit_transform
ohe = OneHotEncoder(categorical_features=categorical_features,
minimum_fraction=minimum_fraction)
transformation = ohe.fit_transform(input.copy())
assert_array_almost_equal(expected.astype(float),
transformation.todense())
# Test fit, and afterwards transform
ohe2 = OneHotEncoder(categorical_features=categorical_features,
minimum_fraction=minimum_fraction)
ohe2.fit(input.copy())
transformation = ohe2.transform(input.copy())
assert_array_almost_equal(expected, transformation.todense())
def fit_then_transform_dense(expected, input,
categorical_features='all',
minimum_fraction=None):
ohe = OneHotEncoder(categorical_features=categorical_features,
sparse=False, minimum_fraction=minimum_fraction)
transformation = ohe.fit_transform(input.copy())
assert_array_almost_equal(expected, transformation)
ohe2 = OneHotEncoder(categorical_features=categorical_features,
sparse=False, minimum_fraction=minimum_fraction)
ohe2.fit(input.copy())
transformation = ohe2.transform(input.copy())
assert_array_almost_equal(expected, transformation)
def test_transform():
"""Test OneHotEncoder with both dense and sparse matrixes."""
input = np.array(((0, 1, 2, 3, 4, 5), (0, 1, 2, 3, 4, 5))).transpose()
ohe = OneHotEncoder()
ohe.fit(input)
test_data = np.array(((0, 1, 2, 6), (0, 1, 6, 7))).transpose()
output = ohe.transform(test_data).todense()
assert np.sum(output) == 5
input = np.array(((0, 1, 2, 3, 4, 5), (0, 1, 2, 3, 4, 5))).transpose()
ips = scipy.sparse.csr_matrix(input)
ohe = OneHotEncoder()
ohe.fit(ips)
test_data = np.array(((0, 1, 2, 6), (0, 1, 6, 7))).transpose()
tds = scipy.sparse.csr_matrix(test_data)
output = ohe.transform(tds).todense()
assert np.sum(output) == 3
def test_k_fold_cv():
"""Test OneHotEncoder with categorical_features='auto'."""
boston = load_boston()
clf = make_pipeline(
OneHotEncoder(
categorical_features='auto',
sparse=False,
minimum_fraction=0.05
),
LinearRegression()
)
cross_val_score(clf, boston.data, boston.target, cv=KFold(n_splits=10, shuffle=True))
----------
X: numpy ndarray, {n_samples, n_components}
New data, where n_samples is the number of samples and n_components is the number of components.
Returns
-------
array-like, {n_samples, n_components}
"""
selected = auto_select_categorical_features(X, threshold=self.threshold)
X_sel, _, n_selected, _ = _X_selected(X, selected)
if n_selected == 0:
# No features selected.
raise ValueError('No categorical feature was found!')
else:
ohe = OneHotEncoder(categorical_features='all', sparse=False, minimum_fraction=self.minimum_fraction)
return ohe.fit_transform(X_sel)