Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test__fit(self, ohle_mock):
ce = CategoricalEncoder()
ce.encoders = dict()
x = pd.Series(['a', 'b', 'a'], name='test')
ce._fit(x)
assert ce.encoders['test'] == ohle_mock.return_value
ohle_mock.return_value.fit.assert_called_once_with(x)
def test_fit(self, fit_mock):
"""Check how self.encoders is reset, and super.fit called."""
ce = CategoricalEncoder()
ce.encoders = {
'past': 'encoders'
}
ce.fit('some_X')
assert ce.encoders == dict()
fit_mock.assert_called_once_with('some_X')
def test___init__(self):
ce = CategoricalEncoder(max_labels=5, max_unique_ratio=0.5, features='auto')
assert ce.max_labels == 5
assert ce.max_unique_ratio == 0.5
assert ce.features == 'auto'
def test__detect_features_max_unique(self):
ce = CategoricalEncoder(max_unique_ratio=0.5)
X = pd.DataFrame({
'completely_unique': ['a', 'b', 'c', 'd', 'e'],
'too_unique': ['a', 'b', 'c', 'd', 'a'],
'not_unique': ['a', 'b', 'a', 'a', 'a'],
'not_feature': [1, 2, 3, 4, 5],
})
features = ce._detect_features(X)
assert features == ['not_unique']
def test__transform(self):
ce = CategoricalEncoder()
ohle_instance = Mock()
ohle_instance.transform.return_value = pd.DataFrame({
'test=a': [1, 0, 1],
'test=b': [0, 1, 0],
})
ce.encoders = {
'test': ohle_instance
}
x = pd.Series(['a', 'b', 'a'], name='test')
returned = ce._transform(x)
expected = pd.DataFrame({
'test=a': [1, 0, 1],
'test=b': [0, 1, 0],
})
def test__detect_features_no_max_unique(self):
ce = CategoricalEncoder(max_unique_ratio=0)
X = pd.DataFrame({
'unique': ['a', 'b', 'c', 'd'],
'not_unique': ['a', 'b', 'a', 'a'],
'not_feature': [1, 2, 3, 4],
})
features = ce._detect_features(X)
assert features == ['unique', 'not_unique']
def test__detect_features_nones(self):
ce = CategoricalEncoder(max_unique_ratio=0.5)
X = pd.DataFrame({
'completely_unique': ['a', 'b', 'c', 'd', 'e', None],
'too_unique': ['a', 'b', 'c', 'd', None, 'a'],
'not_unique': ['a', 'b', 'a', None, 'a', 'a'],
'not_feature': [1, 2, None, 4, 5, 6],
})
features = ce._detect_features(X)
assert features == ['not_unique']
def test__detect_features_category(self):
ce = CategoricalEncoder(max_unique_ratio=0)
X = pd.DataFrame({
'unique': ['a', 'b', 'c', 'd'],
'not_unique': ['a', 'b', 'a', 'a'],
'category': ['a', 'b', 'a', 'b'],
'not_feature': [1, 2, 3, 4],
})
X['category'] = X['category'].astype('category')
features = ce._detect_features(X)
assert features == ['unique', 'not_unique', 'category']
def __init__(self, max_labels=None, max_unique_ratio=0, dropna=True, **kwargs):
self.max_labels = max_labels
self.max_unique_ratio = max_unique_ratio
self.dropna = dropna
super(CategoricalEncoder, self).__init__(**kwargs)