Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test__get_primary_keys_no_pk(self):
"""If no primary key, _get_primary_keys return a duple of None """
# Setup
data_navigator = MagicMock(spec=DataNavigator)
data_navigator.get_meta_data.return_value = {}
modeler = MagicMock(spec=Modeler)
sampler = Sampler(data_navigator=data_navigator, modeler=modeler)
# Run
result = sampler._get_primary_keys('table', 5)
# Check
primary_key, primary_key_values = result
assert primary_key is None
assert primary_key_values is None
def test_sample_table(self, rows_mock):
""" """
# Setup
data_navigator = MagicMock(spec=DataNavigator)
data_navigator.tables = {
'table': MagicMock(**{'data.shape': ('rows', 'columns')})
}
modeler = MagicMock(spec=Modeler)
sampler = Sampler(data_navigator=data_navigator, modeler=modeler)
rows_mock.return_value = 'samples'
table_name = 'table'
reset_primary_keys = False
expected_result = 'samples'
# Run
result = sampler.sample_table(table_name, reset_primary_keys=reset_primary_keys)
# Check
assert result == expected_result
rows_mock.assert_called_once_with(
sampler, 'table', 'rows', sample_children=False, reset_primary_keys=False)
def test__reset_primary_keys_generators(self):
"""_reset_primary_keys deletes all generators and counters."""
# Setup
data_navigator = MagicMock(spec=DataNavigator)
modeler = MagicMock(spec=Modeler)
sampler = Sampler(data_navigator=data_navigator, modeler=modeler)
sampler.primary_key = {
'table': 'generator for table'
}
sampler.remaining_primary_key = {
'table': 'counter for table'
}
# Run
sampler._reset_primary_keys_generators()
# Check
assert sampler.primary_key == dict()
assert sampler.remaining_primary_key == dict()
def test__get_missing_valid_rows_excess_rows(self):
"""If more rows than required are passed, the result is cut to num_rows."""
# Setup
data_navigator = MagicMock(spec=DataNavigator)
modeler = MagicMock(spec=Modeler)
sampler = Sampler(data_navigator, modeler)
synthesized = pd.DataFrame(columns=list('AB'), index=range(3, 7))
drop_indices = pd.Series(False, index=range(3, 7))
valid_rows = pd.DataFrame(columns=list('AB'), index=range(2))
num_rows = 5
# Run
result = sampler._get_missing_valid_rows(synthesized, drop_indices, valid_rows, num_rows)
missing_rows, valid_rows = result
# Check
assert missing_rows == 0
assert valid_rows.equals(pd.DataFrame(columns=list('AB'), index=range(5)))
data_navigator.assert_not_called()
assert data_navigator.method_calls == []
def test__sample_model_vine(self, qualified_mock):
"""_sample_model sample the number of rows from the given model."""
# Setup
data_navigator = MagicMock(spec=DataNavigator)
modeler = MagicMock(spec=Modeler)
sampler = Sampler(data_navigator, modeler)
model = MagicMock()
values = [
np.array([1, 1, 1]),
np.array([2, 2, 2]),
np.array([3, 3, 3])
]
qualified_mock.return_value = 'copulas.multivariate.vine.VineCopula'
model.sample.side_effect = values
num_rows = 3
columns = list('ABC')
expected_result = pd.DataFrame(values, columns=columns)
# Run
def test_sample_all(self, rows_mock):
"""Check sample_all and returns some value."""
# Setup
data_navigator = MagicMock()
data_navigator.tables = ['TABLE_A', 'TABLE_B']
data_navigator.get_parents.side_effect = lambda x: x != 'TABLE_A'
modeler = MagicMock()
sampler = Sampler(data_navigator, modeler)
def fake_dataframe(*args, **kwargs):
kwargs['sampled_data'][args[1]] = 'sampled_data'
rows_mock.side_effect = fake_dataframe
expected_get_parents_call_list = [(('TABLE_A',), {}), (('TABLE_B',), {})]
expected_result = {
'TABLE_A': 'sampled_data'
}
# Run
result = sampler.sample_all(num_rows=5)
# Check
assert result == expected_result
def test__get_primary_keys_raises_error(self):
"""_get_primary_keys raises an exception if there aren't enough values."""
# Setup
data_navigator = MagicMock(spec=DataNavigator)
data_navigator.get_meta_data.return_value = {
'primary_key': 'table_pk',
'fields': {
'table_pk': {
'regex': 'regex for table_pk',
'type': 'number',
'subtype': 'integer'
},
}
}
modeler = MagicMock(spec=Modeler)
sampler = Sampler(data_navigator=data_navigator, modeler=modeler)
sampler.primary_key['table'] = 'a generator'
sampler.remaining_primary_key['table'] = 0
# Run / Check
with self.assertRaises(ValueError):
sampler._get_primary_keys('table', 5)
def test__unflatten_dict_mixed_array(self):
"""unflatten_dict restructure arrays."""
# Setup
data_navigator = MagicMock()
modeler = MagicMock()
sampler = Sampler(data_navigator, modeler)
flat = {
'first_key__0__0': 1,
'first_key__0__1': 0,
'first_key__1__0': 0,
'first_key__1__1': 1,
'second_key__0__std': 0.5,
'second_key__0__mean': 0.5,
'second_key__1__std': 0.25,
'second_key__1__mean': 0.25
}
expected_result = {
'first_key': [
[1, 0],
[0, 1]
],
def test___init__(self):
"""Test create a default instance of Sampler class"""
# Run
models = {'test': Mock()}
sampler = Sampler('test_metadata', models, SDVModel, dict())
# Asserts
assert sampler.metadata == 'test_metadata'
assert sampler.models == models
assert sampler.primary_key == dict()
assert sampler.remaining_primary_key == dict()
assert sampler.model == SDVModel
assert sampler.model_kwargs == dict()
def test__unflatten_dict_respect_covariance_matrix(self):
"""unflatten_dict restructures the covariance matrix into an square matrix."""
# Setup
data_navigator = MagicMock()
modeler = MagicMock()
sampler = Sampler(data_navigator, modeler)
def fake_values(i, j):
return '{}, {}'.format(i, j)
expected_result = {
'covariance': np.array([
[fake_values(i, j) for j in range(40)]
for i in range(40)
]).tolist()
}
flat = {
'covariance__{}__{}'.format(i, j): fake_values(i, j)
for i in range(40) for j in range(40)
}