Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def assert_full_rank(m):
m = atleast_2d_column_default(m)
if m.shape[1] == 0:
return True
u, s, v = np.linalg.svd(m)
rank = np.sum(s > 1e-10)
assert rank == m.shape[1]
factor_infos1 = {f1: FactorInfo(f1, "numerical", {},
num_columns=1, categories=None),
f2: FactorInfo(f2, "categorical", {},
num_columns=None, categories=["a", "b"]),
f3: FactorInfo(f3, "numerical", {},
num_columns=1, categories=None),
}
contrast_matrices = {f2: contrast}
subterm1 = SubtermInfo([f1, f2, f3], contrast_matrices, 2)
assert (list(_subterm_column_names_iter(factor_infos1, subterm1))
== ["f1:f2[c1]:f3", "f1:f2[c2]:f3"])
mat = np.empty((3, 2))
_build_subterm(subterm1, factor_infos1,
{f1: atleast_2d_column_default([1, 2, 3]),
f2: np.asarray([0, 0, 1]),
f3: atleast_2d_column_default([7.5, 2, -12])},
mat)
assert np.allclose(mat, [[0, 0.5 * 1 * 7.5],
[0, 0.5 * 2 * 2],
[3 * 3 * -12, 0]])
# Check that missing categorical values blow up
assert_raises(PatsyError, _build_subterm, subterm1, factor_infos1,
{f1: atleast_2d_column_default([1, 2, 3]),
f2: np.asarray([0, -1, 1]),
f3: atleast_2d_column_default([7.5, 2, -12])},
mat)
factor_infos2 = dict(factor_infos1)
factor_infos2[f1] = FactorInfo(f1, "numerical", {},
num_columns=2, categories=None)
def _regularize_matrix(m, default_column_prefix):
di = DesignInfo.from_array(m, default_column_prefix)
if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)):
orig_index = m.index
else:
orig_index = None
if return_type == "dataframe":
m = atleast_2d_column_default(m, preserve_pandas=True)
m = pandas.DataFrame(m)
m.columns = di.column_names
m.design_info = di
return (m, orig_index)
else:
return (DesignMatrix(m, di), orig_index)
rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
num_column_counts = {}
cat_sniffers = {}
examine_needed = set(factors)
for data in data_iter_maker():
for factor in list(examine_needed):
value = factor.eval(factor_states[factor], data)
if factor in cat_sniffers or guess_categorical(value):
if factor not in cat_sniffers:
cat_sniffers[factor] = CategoricalSniffer(NA_action,
factor.origin)
done = cat_sniffers[factor].sniff(value)
if done:
examine_needed.remove(factor)
else:
# Numeric
value = atleast_2d_column_default(value)
_max_allowed_dim(2, value, factor)
column_count = value.shape[1]
num_column_counts[factor] = column_count
examine_needed.remove(factor)
if not examine_needed:
break
# Pull out the levels
cat_levels_contrasts = {}
for factor, sniffer in six.iteritems(cat_sniffers):
cat_levels_contrasts[factor] = sniffer.levels_contrast()
return (num_column_counts, cat_levels_contrasts)
def _eval_factor(factor_info, data, NA_action):
factor = factor_info.factor
result = factor.eval(factor_info.state, data)
# Returns either a 2d ndarray, or a DataFrame, plus is_NA mask
if factor_info.type == "numerical":
result = atleast_2d_column_default(result, preserve_pandas=True)
_max_allowed_dim(2, result, factor)
if result.shape[1] != factor_info.num_columns:
raise PatsyError("when evaluating factor %s, I got %s columns "
"instead of the %s I was expecting"
% (factor.name(),
factor_info.num_columns,
result.shape[1]),
factor)
if not safe_issubdtype(np.asarray(result).dtype, np.number):
raise PatsyError("when evaluating numeric factor %s, "
"I got non-numeric data of type '%s'"
% (factor.name(), result.dtype),
factor)
return result, NA_action.is_numerical_NA(result)
# returns either a 1d ndarray or a pandas.Series, plus is_NA mask
else:
def memorize_chunk(self, x, center=True, rescale=True, ddof=0):
x = atleast_2d_column_default(x)
if self.current_mean is None:
self.current_mean = np.zeros(x.shape[1], dtype=wide_dtype_for(x))
self.current_M2 = np.zeros(x.shape[1], dtype=wide_dtype_for(x))
# XX this can surely be vectorized but I am feeling lazy:
for i in range(x.shape[0]):
self.current_n += 1
delta = x[i, :] - self.current_mean
self.current_mean += delta / self.current_n
self.current_M2 += delta * (x[i, :] - self.current_mean)
`design_info` argument is not given, then one is created via
:meth:`DesignInfo.from_array` using the given
`default_column_prefix`.
Depending on the input array, it is possible this will pass through
its input unchanged, or create a view.
"""
# Pass through existing DesignMatrixes. The design_info check is
# necessary because numpy is sort of annoying and cannot be stopped
# from turning non-design-matrix arrays into DesignMatrix
# instances. (E.g., my_dm.diagonal() will return a DesignMatrix
# object, but one without a design_info attribute.)
if (isinstance(input_array, DesignMatrix)
and hasattr(input_array, "design_info")):
return input_array
self = atleast_2d_column_default(input_array).view(cls)
# Upcast integer to floating point
if safe_issubdtype(self.dtype, np.integer):
self = np.asarray(self, dtype=float).view(cls)
if self.ndim > 2:
raise ValueError("DesignMatrix must be 2d")
assert self.ndim == 2
if design_info is None:
design_info = DesignInfo.from_array(self, default_column_prefix)
if len(design_info.column_names) != self.shape[1]:
raise ValueError("wrong number of column names for design matrix "
"(got %s, wanted %s)"
% (len(design_info.column_names), self.shape[1]))
self.design_info = design_info
if not safe_issubdtype(self.dtype, np.floating):
raise ValueError("design matrix must be real-valued floating point")
return self
def __init__(self, variable_names, coefs, constants=None):
self.variable_names = list(variable_names)
self.coefs = np.atleast_2d(np.asarray(coefs, dtype=float))
if constants is None:
constants = np.zeros(self.coefs.shape[0], dtype=float)
constants = np.asarray(constants, dtype=float)
self.constants = atleast_2d_column_default(constants)
if self.constants.ndim != 2 or self.constants.shape[1] != 1:
raise ValueError("constants is not (convertible to) a column matrix")
if self.coefs.ndim != 2 or self.coefs.shape[1] != len(variable_names):
raise ValueError("wrong shape for coefs")
if self.coefs.shape[0] == 0:
raise ValueError("must have at least one row in constraint matrix")
if self.coefs.shape[0] != self.constants.shape[0]:
raise ValueError("shape mismatch between coefs and constants")