Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _fit(self, data):
"""Fit the model to the table.
Args:
table_data (pandas.DataFrame):
Data to be fitted.
"""
self._model = copulas.multivariate.GaussianMultivariate(distribution=self._distribution)
self._model.fit(data)
self._update_metadata()
def fit(self, table_data):
"""Fit the model to the table.
Impute the table data before fit the model.
Args:
table_data (pandas.DataFrame):
Data to be fitted.
"""
table_data = impute(table_data)
self.model = multivariate.GaussianMultivariate(distribution=self.distribution)
self.model.fit(table_data)
in order to set expected parameters for the copula.
Args:
dict:
Copula flatten parameters.
"""
parameters = unflatten_dict(parameters)
parameters.setdefault('fitted', True)
parameters.setdefault('distribution', self.distribution)
parameters = self._unflatten_gaussian_copula(parameters)
for param in parameters['distribs'].values():
param.setdefault('type', self.distribution)
param.setdefault('fitted', True)
self.model = multivariate.GaussianMultivariate.from_dict(parameters)
Add additional keys after unflatte the parameters
in order to set expected parameters for the copula.
Args:
dict:
Copula flatten parameters.
unflatten (bool):
Whether the parameters need to be unflattened or not.
"""
if unflatten:
parameters = unflatten_dict(parameters)
parameters.setdefault('distribution', self._distribution)
parameters = self._unflatten_gaussian_copula(parameters)
self._model = copulas.multivariate.GaussianMultivariate.from_dict(parameters)
unflatten_dict)
class GaussianCopula(BaseTabularModel):
"""Model wrapping ``copulas.multivariate.GaussianMultivariate`` copula.
Args:
distribution (copulas.univariate.Univariate or str):
Copulas univariate distribution to use.
categorical_transformer (str):
Type of transformer to use for the categorical variables, to choose
from ``one_hot_encoding``, ``label_encoding``, ``categorical`` and
``categorical_fuzzy``.
"""
DEFAULT_DISTRIBUTION = copulas.univariate.Univariate
_distribution = None
_categorical_transformer = None
_model = None
HYPERPARAMETERS = {
'distribution': {
'type': 'str or copulas.univariate.Univariate',
'default': 'copulas.univariate.Univariate',
'description': 'Univariate distribution to use to model each column',
'choices': [
'copulas.univariate.Univariate',
'copulas.univariate.GaussianUnivariate',
'copulas.univariate.GammaUnivariate',
'copulas.univariate.BetaUnivariate',
'copulas.univariate.StudentTUnivariate',
'copulas.univariate.GaussianKDE',
Copula flatten parameters.
"""
parameters = unflatten_dict(parameters)
parameters.setdefault('fitted', True)
parameters.setdefault('distribution', self.distribution)
parameters = self._unflatten_gaussian_copula(parameters)
for param in parameters['distribs'].values():
param.setdefault('type', self.distribution)
param.setdefault('fitted', True)
self.model = multivariate.GaussianMultivariate.from_dict(parameters)
class GaussianCopulaTruncated(GaussianCopula):
DISTRIBUTION = univariate.TruncatedGaussian
values = list()
triangle = np.tril(self._model.covariance)
for index, row in enumerate(triangle.tolist()):
values.append(row[:index + 1])
self._model.covariance = np.array(values)
params = self._model.to_dict()
univariates = dict()
for name, univariate in zip(params.pop('columns'), params['univariates']):
univariates[name] = univariate
if 'scale' in univariate:
scale = univariate['scale']
if scale == 0:
scale = copulas.EPSILON
univariate['scale'] = np.log(scale)
params['univariates'] = univariates
return flatten_dict(params)
# -*- coding: utf-8 -*-
"""Main module."""
import pickle
from copulas.univariate import GaussianUnivariate
from sdv.metadata import Metadata
from sdv.modeler import Modeler
from sdv.models.copulas import GaussianCopula
from sdv.sampler import Sampler
DEFAULT_MODEL = GaussianCopula
DEFAULT_MODEL_KWARGS = {
'distribution': GaussianUnivariate
}
class NotFittedError(Exception):
pass
class SDV:
"""Automated generative modeling and sampling tool.
Allows the users to generate synthetic data after creating generative models for their data.
Args:
model (type):
Class of the ``copula`` to use. Defaults to
``sdv.models.copulas.GaussianCopula``.