Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_FactorInfo():
fi1 = FactorInfo("asdf", "numerical", {"a": 1}, num_columns=10)
assert fi1.factor == "asdf"
assert fi1.state == {"a": 1}
assert fi1.type == "numerical"
assert fi1.num_columns == 10
assert fi1.categories is None
# smoke test
repr(fi1)
fi2 = FactorInfo("asdf", "categorical", {"a": 2}, categories=["z", "j"])
assert fi2.factor == "asdf"
assert fi2.state == {"a": 2}
assert fi2.type == "categorical"
assert fi2.num_columns is None
assert fi2.categories == ("z", "j")
for term in termlist:
all_factors.update(term.factors)
factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
# Now all the factors have working eval methods, so we can evaluate them
# on some data to find out what type of data they return.
(num_column_counts,
cat_levels_contrasts) = _examine_factor_types(all_factors,
factor_states,
data_iter_maker,
NA_action)
# Now we need the factor infos, which encapsulate the knowledge of
# how to turn any given factor into a chunk of data:
factor_infos = {}
for factor in all_factors:
if factor in num_column_counts:
fi = FactorInfo(factor,
"numerical",
factor_states[factor],
num_columns=num_column_counts[factor],
categories=None)
else:
assert factor in cat_levels_contrasts
categories = cat_levels_contrasts[factor][0]
fi = FactorInfo(factor,
"categorical",
factor_states[factor],
num_columns=None,
categories=categories)
factor_infos[factor] = fi
# And now we can construct the DesignInfo for each termlist:
design_infos = []
for termlist in termlists:
def test_DesignInfo():
from nose.tools import assert_raises
class _MockFactor(object):
def __init__(self, name):
self._name = name
def name(self):
return self._name
f_x = _MockFactor("x")
f_y = _MockFactor("y")
t_x = Term([f_x])
t_y = Term([f_y])
factor_infos = {f_x:
FactorInfo(f_x, "numerical", {}, num_columns=3),
f_y:
FactorInfo(f_y, "numerical", {}, num_columns=1),
}
term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]),
(t_y, [SubtermInfo([f_y], {}, 1)])])
di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings)
assert di.column_names == ["x1", "x2", "x3", "y"]
assert di.term_names == ["x", "y"]
assert di.terms == [t_x, t_y]
assert di.column_name_indexes == {"x1": 0, "x2": 1, "x3": 2, "y": 3}
assert di.term_name_slices == {"x": slice(0, 3), "y": slice(3, 4)}
assert di.term_slices == {t_x: slice(0, 3), t_y: slice(3, 4)}
assert di.describe() == "x + y"
assert di.slice(1) == slice(1, 2)
assert di.slice("x1") == slice(0, 1)
def test__subterm_column_names_iter_and__build_subterm():
from nose.tools import assert_raises
from patsy.contrasts import ContrastMatrix
from patsy.categorical import C
f1 = _MockFactor("f1")
f2 = _MockFactor("f2")
f3 = _MockFactor("f3")
contrast = ContrastMatrix(np.array([[0, 0.5],
[3, 0]]),
["[c1]", "[c2]"])
factor_infos1 = {f1: FactorInfo(f1, "numerical", {},
num_columns=1, categories=None),
f2: FactorInfo(f2, "categorical", {},
num_columns=None, categories=["a", "b"]),
f3: FactorInfo(f3, "numerical", {},
num_columns=1, categories=None),
}
contrast_matrices = {f2: contrast}
subterm1 = SubtermInfo([f1, f2, f3], contrast_matrices, 2)
assert (list(_subterm_column_names_iter(factor_infos1, subterm1))
== ["f1:f2[c1]:f3", "f1:f2[c2]:f3"])
mat = np.empty((3, 2))
_build_subterm(subterm1, factor_infos1,
{f1: atleast_2d_column_default([1, 2, 3]),
f2: np.asarray([0, 0, 1]),
f3: atleast_2d_column_default([7.5, 2, -12])},
mat)
assert np.allclose(mat, [[0, 0.5 * 1 * 7.5],
[0, 0.5 * 2 * 2],
[3 * 3 * -12, 0]])
def test__eval_factor_numerical():
from nose.tools import assert_raises
naa = NAAction()
f = _MockFactor()
fi1 = FactorInfo(f, "numerical", {}, num_columns=1, categories=None)
assert fi1.factor is f
eval123, is_NA = _eval_factor(fi1, {"mock": [1, 2, 3]}, naa)
assert eval123.shape == (3, 1)
assert np.all(eval123 == [[1], [2], [3]])
assert is_NA.shape == (3,)
assert np.all(~is_NA)
assert_raises(PatsyError, _eval_factor, fi1, {"mock": [[[1]]]}, naa)
assert_raises(PatsyError, _eval_factor, fi1, {"mock": [[1, 2]]}, naa)
assert_raises(PatsyError, _eval_factor, fi1, {"mock": ["a", "b"]}, naa)
assert_raises(PatsyError, _eval_factor, fi1, {"mock": [True, False]}, naa)
fi2 = FactorInfo(_MockFactor(), "numerical",
{}, num_columns=2, categories=None)
eval123321, is_NA = _eval_factor(fi2,
{"mock": [[1, 3], [2, 2], [3, 1]]},
naa)
def test_FactorInfo():
fi1 = FactorInfo("asdf", "numerical", {"a": 1}, num_columns=10)
assert fi1.factor == "asdf"
assert fi1.state == {"a": 1}
assert fi1.type == "numerical"
assert fi1.num_columns == 10
assert fi1.categories is None
# smoke test
repr(fi1)
fi2 = FactorInfo("asdf", "categorical", {"a": 2}, categories=["z", "j"])
assert fi2.factor == "asdf"
assert fi2.state == {"a": 2}
assert fi2.type == "categorical"
assert fi2.num_columns is None
assert fi2.categories == ("z", "j")
# smoke test
repr(fi2)
from nose.tools import assert_raises
assert_raises(ValueError, FactorInfo, "asdf", "non-numerical", {})
assert_raises(ValueError, FactorInfo, "asdf", "numerical", {})
assert_raises(ValueError, FactorInfo, "asdf", "numerical", {},
num_columns="asdf")
assert_raises(ValueError, FactorInfo, "asdf", "numerical", {},
term_factors = set(term.factors)
for subterm in subterms:
if not isinstance(subterm, SubtermInfo):
raise ValueError("expected SubtermInfo, "
"not %r" % (subterm,))
if not term_factors.issuperset(subterm.factors):
raise ValueError("unexpected factors in subterm")
all_factors = set()
for term in self.term_codings:
all_factors.update(term.factors)
if all_factors != set(self.factor_infos):
raise ValueError("Provided Term objects and factor_infos "
"do not match")
for factor, factor_info in six.iteritems(self.factor_infos):
if not isinstance(factor_info, FactorInfo):
raise ValueError("expected FactorInfo object, not %r"
% (factor_info,))
if factor != factor_info.factor:
raise ValueError("mismatched factor_info.factor")
for term, subterms in six.iteritems(self.term_codings):
for subterm in subterms:
exp_cols = 1
cat_factors = set()
for factor in subterm.factors:
fi = self.factor_infos[factor]
if fi.type == "numerical":
exp_cols *= fi.num_columns
else:
assert fi.type == "categorical"
cm = subterm.contrast_matrices[factor].matrix
assert_raises(ValueError, FactorInfo, "asdf", "numerical", {},
num_columns="asdf")
assert_raises(ValueError, FactorInfo, "asdf", "numerical", {},
num_columns=1, categories=1)
assert_raises(TypeError, FactorInfo, "asdf", "categorical", {})
assert_raises(ValueError, FactorInfo, "asdf", "categorical", {},
num_columns=1)
assert_raises(TypeError, FactorInfo, "asdf", "categorical", {},
categories=1)
# Make sure longs are legal for num_columns
# (Important on python2+win64, where array shapes are tuples-of-longs)
if not six.PY3:
fi_long = FactorInfo("asdf", "numerical", {"a": 1},
num_columns=long(10))
assert fi_long.num_columns == 10
def test_DesignInfo():
from nose.tools import assert_raises
class _MockFactor(object):
def __init__(self, name):
self._name = name
def name(self):
return self._name
f_x = _MockFactor("x")
f_y = _MockFactor("y")
t_x = Term([f_x])
t_y = Term([f_y])
factor_infos = {f_x:
FactorInfo(f_x, "numerical", {}, num_columns=3),
f_y:
FactorInfo(f_y, "numerical", {}, num_columns=1),
}
term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]),
(t_y, [SubtermInfo([f_y], {}, 1)])])
di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings)
assert di.column_names == ["x1", "x2", "x3", "y"]
assert di.term_names == ["x", "y"]
assert di.terms == [t_x, t_y]
assert di.column_name_indexes == {"x1": 0, "x2": 1, "x3": 2, "y": 3}
assert di.term_name_slices == {"x": slice(0, 3), "y": slice(3, 4)}
assert di.term_slices == {t_x: slice(0, 3), t_y: slice(3, 4)}
assert di.describe() == "x + y"
assert di.slice(1) == slice(1, 2)
assert di.slice("x1") == slice(0, 1)
assert di.slice("x2") == slice(1, 2)
assert di.slice("x3") == slice(2, 3)