Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
code = "y ~ a + (foo(b,c + 2)) + -1 + 0 + 10"
tokens = list(_tokenize_formula(code, ["+", "-", "~"]))
expecteds = [("PYTHON_EXPR", Origin(code, 0, 1), "y"),
("~", Origin(code, 2, 3), None),
("PYTHON_EXPR", Origin(code, 4, 5), "a"),
("+", Origin(code, 6, 7), None),
(Token.LPAREN, Origin(code, 8, 9), None),
("PYTHON_EXPR", Origin(code, 9, 23), "foo(b, c + 2)"),
(Token.RPAREN, Origin(code, 23, 24), None),
("+", Origin(code, 25, 26), None),
("-", Origin(code, 27, 28), None),
("ONE", Origin(code, 28, 29), "1"),
("+", Origin(code, 30, 31), None),
("ZERO", Origin(code, 32, 33), "0"),
("+", Origin(code, 34, 35), None),
("NUMBER", Origin(code, 36, 38), "10"),
]
for got, expected in zip(tokens, expecteds):
assert isinstance(got, Token)
assert got.type == expected[0]
assert got.origin == expected[1]
assert got.extra == expected[2]
def test_parse_origin():
tree = parse_formula("a ~ b + c")
assert tree.origin == Origin("a ~ b + c", 0, 9)
assert tree.token.origin == Origin("a ~ b + c", 2, 3)
assert tree.args[0].origin == Origin("a ~ b + c", 0, 1)
assert tree.args[1].origin == Origin("a ~ b + c", 4, 9)
assert tree.args[1].token.origin == Origin("a ~ b + c", 6, 7)
assert tree.args[1].args[0].origin == Origin("a ~ b + c", 4, 5)
assert tree.args[1].args[1].origin == Origin("a ~ b + c", 8, 9)
def test_python_tokenize():
code = "a + (foo * -1)"
tokens = list(python_tokenize(code))
expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
(tokenize.OP, "+", Origin(code, 2, 3)),
(tokenize.OP, "(", Origin(code, 4, 5)),
(tokenize.NAME, "foo", Origin(code, 5, 8)),
(tokenize.OP, "*", Origin(code, 9, 10)),
(tokenize.OP, "-", Origin(code, 11, 12)),
(tokenize.NUMBER, "1", Origin(code, 12, 13)),
(tokenize.OP, ")", Origin(code, 13, 14))]
assert tokens == expected
code2 = "a + (b"
tokens2 = list(python_tokenize(code2))
expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
(tokenize.OP, "+", Origin(code2, 2, 3)),
(tokenize.OP, "(", Origin(code2, 4, 5)),
(tokenize.NAME, "b", Origin(code2, 5, 6))]
assert tokens2 == expected2
expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
(tokenize.OP, "+", Origin(code, 2, 3)),
(tokenize.OP, "(", Origin(code, 4, 5)),
(tokenize.NAME, "foo", Origin(code, 5, 8)),
(tokenize.OP, "*", Origin(code, 9, 10)),
(tokenize.OP, "-", Origin(code, 11, 12)),
(tokenize.NUMBER, "1", Origin(code, 12, 13)),
(tokenize.OP, ")", Origin(code, 13, 14))]
assert tokens == expected
code2 = "a + (b"
tokens2 = list(python_tokenize(code2))
expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
(tokenize.OP, "+", Origin(code2, 2, 3)),
(tokenize.OP, "(", Origin(code2, 4, 5)),
(tokenize.NAME, "b", Origin(code2, 5, 6))]
assert tokens2 == expected2
from nose.tools import assert_raises
assert_raises(PatsyError, list, python_tokenize("a b # c"))
from nose.tools import assert_raises
assert_raises(PatsyError, list, python_tokenize("a b \"c"))
def test_python_tokenize():
code = "a + (foo * -1)"
tokens = list(python_tokenize(code))
expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
(tokenize.OP, "+", Origin(code, 2, 3)),
(tokenize.OP, "(", Origin(code, 4, 5)),
(tokenize.NAME, "foo", Origin(code, 5, 8)),
(tokenize.OP, "*", Origin(code, 9, 10)),
(tokenize.OP, "-", Origin(code, 11, 12)),
(tokenize.NUMBER, "1", Origin(code, 12, 13)),
(tokenize.OP, ")", Origin(code, 13, 14))]
assert tokens == expected
code2 = "a + (b"
tokens2 = list(python_tokenize(code2))
expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
(tokenize.OP, "+", Origin(code2, 2, 3)),
(tokenize.OP, "(", Origin(code2, 4, 5)),
(tokenize.NAME, "b", Origin(code2, 5, 6))]
assert tokens2 == expected2
from nose.tools import assert_raises
assert_raises(PatsyError, list, python_tokenize("a b # c"))
def test__tokenize_formula():
code = "y ~ a + (foo(b,c + 2)) + -1 + 0 + 10"
tokens = list(_tokenize_formula(code, ["+", "-", "~"]))
expecteds = [("PYTHON_EXPR", Origin(code, 0, 1), "y"),
("~", Origin(code, 2, 3), None),
("PYTHON_EXPR", Origin(code, 4, 5), "a"),
("+", Origin(code, 6, 7), None),
(Token.LPAREN, Origin(code, 8, 9), None),
("PYTHON_EXPR", Origin(code, 9, 23), "foo(b, c + 2)"),
(Token.RPAREN, Origin(code, 23, 24), None),
("+", Origin(code, 25, 26), None),
("-", Origin(code, 27, 28), None),
("ONE", Origin(code, 28, 29), "1"),
("+", Origin(code, 30, 31), None),
("ZERO", Origin(code, 32, 33), "0"),
("+", Origin(code, 34, 35), None),
("NUMBER", Origin(code, 36, 38), "10"),
]
for got, expected in zip(tokens, expecteds):
assert isinstance(got, Token)
assert got.type == expected[0]
assert got.origin == expected[1]
assert got.extra == expected[2]
lexicon = [
(lparen_re, _token_maker(Token.LPAREN, string)),
(rparen_re, _token_maker(Token.RPAREN, string)),
(op_re, _token_maker("__OP__", string)),
(variable_re, _token_maker("VARIABLE", string)),
(num_re, _token_maker("NUMBER", string)),
(whitespace_re, None),
]
scanner = re.Scanner(lexicon)
tokens, leftover = scanner.scan(string)
if leftover:
offset = len(string) - len(leftover)
raise PatsyError("unrecognized token in constraint",
Origin(string, offset, offset + 1))
return tokens
def test_python_tokenize():
code = "a + (foo * -1)"
tokens = list(python_tokenize(code))
expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
(tokenize.OP, "+", Origin(code, 2, 3)),
(tokenize.OP, "(", Origin(code, 4, 5)),
(tokenize.NAME, "foo", Origin(code, 5, 8)),
(tokenize.OP, "*", Origin(code, 9, 10)),
(tokenize.OP, "-", Origin(code, 11, 12)),
(tokenize.NUMBER, "1", Origin(code, 12, 13)),
(tokenize.OP, ")", Origin(code, 13, 14))]
assert tokens == expected
code2 = "a + (b"
tokens2 = list(python_tokenize(code2))
expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
(tokenize.OP, "+", Origin(code2, 2, 3)),
(tokenize.OP, "(", Origin(code2, 4, 5)),
(tokenize.NAME, "b", Origin(code2, 5, 6))]
assert tokens2 == expected2
from nose.tools import assert_raises
assert_raises(PatsyError, list, python_tokenize("a b # c"))
from nose.tools import assert_raises
assert_raises(PatsyError, list, python_tokenize("a b \"c"))
Example usage: if we wanted to represent the origin of the "x1:x2"
term, we could do ``Origin.combine([x1_obj, x2_obj])``.
Single argument is an iterable, and each element in the iterable
should be either:
* An Origin object
* ``None``
* An object that has a ``.origin`` attribute which fulfills the above
criteria.
Returns either an Origin object, or None.
"""
origins = []
for obj in origin_objs:
if obj is not None and not isinstance(obj, Origin):
obj = obj.origin
if obj is None:
continue
origins.append(obj)
if not origins:
return None
codes = set([o.code for o in origins])
assert len(codes) == 1
start = min([o.start for o in origins])
end = max([o.end for o in origins])
return cls(codes.pop(), start, end)
def test_parse_origin():
tree = parse_formula("a ~ b + c")
assert tree.origin == Origin("a ~ b + c", 0, 9)
assert tree.token.origin == Origin("a ~ b + c", 2, 3)
assert tree.args[0].origin == Origin("a ~ b + c", 0, 1)
assert tree.args[1].origin == Origin("a ~ b + c", 4, 9)
assert tree.args[1].token.origin == Origin("a ~ b + c", 6, 7)
assert tree.args[1].args[0].origin == Origin("a ~ b + c", 4, 5)
assert tree.args[1].args[1].origin == Origin("a ~ b + c", 8, 9)