Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
mismatch_factor_infos = dict(factor_infos)
mismatch_factor_infos[f_x] = FactorInfo(f_a, "numerical", {}, num_columns=3)
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], mismatch_factor_infos, term_codings)
# bad term_codings
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, dict(term_codings))
not_term_codings = OrderedDict(term_codings)
not_term_codings["this is a string"] = term_codings[t_x]
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, not_term_codings)
non_list_term_codings = OrderedDict(term_codings)
non_list_term_codings[t_y] = tuple(term_codings[t_y])
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, non_list_term_codings)
non_subterm_term_codings = OrderedDict(term_codings)
non_subterm_term_codings[t_y][0] = "not a SubtermInfo"
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, non_subterm_term_codings)
bad_subterm = OrderedDict(term_codings)
# f_x is a factor in this model, but it is not a factor in t_y
term_codings[t_y][0] = SubtermInfo([f_x], {}, 1)
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, bad_subterm)
# contrast matrix has wrong number of rows
def _make_subterm_infos(terms,
num_column_counts,
cat_levels_contrasts):
# Sort each term into a bucket based on the set of numeric factors it
# contains:
term_buckets = OrderedDict()
bucket_ordering = []
for term in terms:
num_factors = []
for factor in term.factors:
if factor in num_column_counts:
num_factors.append(factor)
bucket = frozenset(num_factors)
if bucket not in term_buckets:
bucket_ordering.append(bucket)
term_buckets.setdefault(bucket, []).append(term)
# Special rule: if there is a no-numerics bucket, then it always comes
# first:
if frozenset() in term_buckets:
bucket_ordering.remove(frozenset())
bucket_ordering.insert(0, frozenset())
term_to_subterm_infos = OrderedDict()
# bad term_codings
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, dict(term_codings))
not_term_codings = OrderedDict(term_codings)
not_term_codings["this is a string"] = term_codings[t_x]
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, not_term_codings)
non_list_term_codings = OrderedDict(term_codings)
non_list_term_codings[t_y] = tuple(term_codings[t_y])
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, non_list_term_codings)
non_subterm_term_codings = OrderedDict(term_codings)
non_subterm_term_codings[t_y][0] = "not a SubtermInfo"
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, non_subterm_term_codings)
bad_subterm = OrderedDict(term_codings)
# f_x is a factor in this model, but it is not a factor in t_y
term_codings[t_y][0] = SubtermInfo([f_x], {}, 1)
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, bad_subterm)
# contrast matrix has wrong number of rows
factor_codings_a = {f_a:
FactorInfo(f_a, "categorical", {},
categories=["a1", "a2"])}
term_codings_a_bad_rows = OrderedDict([
(t_a,
def test_linear_constraint():
from nose.tools import assert_raises
from patsy.compat import OrderedDict
t = _check_lincon
t(LinearConstraint(["a", "b"], [2, 3]), ["a", "b"], [[2, 3]], [[0]])
assert_raises(ValueError, linear_constraint,
LinearConstraint(["b", "a"], [2, 3]),
["a", "b"])
t({"a": 2}, ["a", "b"], [[1, 0]], [[2]])
t(OrderedDict([("a", 2), ("b", 3)]),
["a", "b"], [[1, 0], [0, 1]], [[2], [3]])
t(OrderedDict([("a", 2), ("b", 3)]),
["b", "a"], [[0, 1], [1, 0]], [[2], [3]])
t({0: 2}, ["a", "b"], [[1, 0]], [[2]])
t(OrderedDict([(0, 2), (1, 3)]), ["a", "b"], [[1, 0], [0, 1]], [[2], [3]])
t(OrderedDict([("a", 2), (1, 3)]),
["a", "b"], [[1, 0], [0, 1]], [[2], [3]])
assert_raises(ValueError, linear_constraint, {"q": 1}, ["a", "b"])
assert_raises(ValueError, linear_constraint, {"a": 1, 0: 2}, ["a", "b"])
t(np.array([2, 3]), ["a", "b"], [[2, 3]], [[0]])
t(np.array([[2, 3], [4, 5]]), ["a", "b"], [[2, 3], [4, 5]], [[0], [0]])
f_x:
FactorInfo(f_x, "numerical", {},
num_columns=2)}
term_codings_ax_extra_cm = OrderedDict([
(t_ax,
[SubtermInfo([f_a, f_x],
{f_a: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]),
f_x: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])},
4)])])
assert_raises(ValueError, DesignInfo,
["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
factor_codings_ax,
term_codings_ax_extra_cm)
# no contrast matrix for a categorical factor
term_codings_ax_missing_cm = OrderedDict([
(t_ax,
[SubtermInfo([f_a, f_x],
{},
4)])])
# This actually fails before it hits the relevant check with a KeyError,
# but that's okay... the previous test still exercises the check.
assert_raises((ValueError, KeyError), DesignInfo,
["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
factor_codings_ax,
term_codings_ax_missing_cm)
# subterm num_columns doesn't match the value computed from the individual
# factors
term_codings_ax_wrong_subterm_columns = OrderedDict([
(t_ax,
[SubtermInfo([f_a, f_x],
if cat_factors != set(subterm.contrast_matrices):
raise ValueError("Mismatch between contrast_matrices "
"and categorical factors")
if exp_cols != subterm.num_columns:
raise ValueError("Unexpected num_columns")
if term_codings is None:
# Need to invent term information
self.term_slices = None
# We invent one term per column, with the same name as the column
term_names = column_names
slices = [slice(i, i + 1) for i in range(len(column_names))]
self.term_name_slices = OrderedDict(zip(term_names, slices))
else:
# Need to derive term information from term_codings
self.term_slices = OrderedDict()
idx = 0
for term, subterm_infos in six.iteritems(self.term_codings):
term_columns = 0
for subterm_info in subterm_infos:
term_columns += subterm_info.num_columns
self.term_slices[term] = slice(idx, idx + term_columns)
idx += term_columns
if idx != len(self.column_names):
raise ValueError("mismatch between column_names and columns "
"coded by given terms")
self.term_name_slices = OrderedDict(
[(term.name(), slice_)
for (term, slice_) in six.iteritems(self.term_slices)])
# Guarantees:
# term_name_slices is never None
assert_raises(ValueError, DesignInfo,
["x1", "x2", "y", "y2"], factor_infos, term_codings)
# duplicate name
assert_raises(ValueError, DesignInfo,
["x1", "x1", "x1", "y"], factor_infos, term_codings)
# f_y is in factor_infos, but not mentioned in any term
term_codings_x_only = OrderedDict(term_codings)
del term_codings_x_only[t_y]
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3"], factor_infos, term_codings_x_only)
# f_a is in a term, but not in factor_infos
f_a = _MockFactor("a")
t_a = Term([f_a])
term_codings_with_a = OrderedDict(term_codings)
term_codings_with_a[t_a] = [SubtermInfo([f_a], {}, 1)]
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y", "a"],
factor_infos, term_codings_with_a)
# bad factor_infos
not_factor_infos = dict(factor_infos)
not_factor_infos[f_x] = "what is this I don't even"
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], not_factor_infos, term_codings)
mismatch_factor_infos = dict(factor_infos)
mismatch_factor_infos[f_x] = FactorInfo(f_a, "numerical", {}, num_columns=3)
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], mismatch_factor_infos, term_codings)
# bad factor_infos
not_factor_infos = dict(factor_infos)
not_factor_infos[f_x] = "what is this I don't even"
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], not_factor_infos, term_codings)
mismatch_factor_infos = dict(factor_infos)
mismatch_factor_infos[f_x] = FactorInfo(f_a, "numerical", {}, num_columns=3)
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], mismatch_factor_infos, term_codings)
# bad term_codings
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, dict(term_codings))
not_term_codings = OrderedDict(term_codings)
not_term_codings["this is a string"] = term_codings[t_x]
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, not_term_codings)
non_list_term_codings = OrderedDict(term_codings)
non_list_term_codings[t_y] = tuple(term_codings[t_y])
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, non_list_term_codings)
non_subterm_term_codings = OrderedDict(term_codings)
non_subterm_term_codings[t_y][0] = "not a SubtermInfo"
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos, non_subterm_term_codings)
bad_subterm = OrderedDict(term_codings)
# f_x is a factor in this model, but it is not a factor in t_y
def __init__(self, column_names,
factor_infos=None, term_codings=None):
self.column_name_indexes = OrderedDict(zip(column_names,
range(len(column_names))))
if (factor_infos is None) != (term_codings is None):
raise ValueError("Must specify either both or neither of "
"factor_infos= and term_codings=")
self.factor_infos = factor_infos
self.term_codings = term_codings
# factor_infos is a dict containing one entry for every factor
# mentioned in our terms
# and mapping each to FactorInfo object
if self.factor_infos is not None:
if not isinstance(self.factor_infos, dict):
raise ValueError("factor_infos should be a dict")