Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if count > 0 and count % 2 == 0:
return Descape
else:
return Dnone
elif A.delimiter == B.delimiter:
Aq, Ae = A.quotechar, A.escapechar
Bq, Be = B.quotechar, B.escapechar
if (Aq, Ae) == ("", "") or (Bq, Be) == ("", ""):
# This case is activated if the escapechar+quotechar combination
# occurs in the cells (i.e. "Jill\'s data") but no actual quoting
# is done with the quote character.
d_no = A if (Aq, Ae) == ("", "") else B
d_yes = B if d_no == A else A
X = list(parse_string(data, dialect=d_no))
Y = list(parse_string(data, dialect=d_yes))
if len(X) != len(Y):
return None
for x, y in zip(X, Y):
if len(x) != len(y):
return None
# if we're here, then there is no effect on structure.
# we test if the only cells that differ are those that have an
# escapechar+quotechar combination.
eq = d_yes.escapechar + d_yes.quotechar
for rX, rY in zip(X, Y):
for x, y in zip(rX, rY):
if x != y:
if not eq in x:
return None
# Artifact due to type detection (comma as radix point)
if A.delimiter == ",":
return A
else:
return B
elif A.delimiter == "-" or B.delimiter == "-":
# Artifact due to type detection (dash as minus sign)
if A.delimiter == "-":
return B
else:
return A
elif diff_only_in_key("escapechar"):
Dnone, Descape = (A, B) if A.escapechar == "" else (B, A)
X = list(parse_string(data, Dnone))
Y = list(parse_string(data, Descape))
# double check shape. Usually if the shape differs the pattern score
# should have caught it, but if by a freakish occurance it hasn't then
# we can't break this tie (for now)
if len(X) != len(Y):
return None
for x, y in zip(X, Y):
if len(x) != len(y):
return None
cells_escaped = []
cells_unescaped = []
for x, y in zip(X, Y):
for u, v in zip(x, y):
if u != v:
cells_unescaped.append(u)
Parameters
----------
data: str
the data as a single string
dialect: SimpleDialect
the dialect to use
eps: float
the minimum value of the type score
"""
total = 0
known = 0
td = TypeDetector()
for row in parse_string(data, dialect, return_quoted=True):
for cell, is_quoted in row:
total += 1
known += td.is_known_type(cell, is_quoted=is_quoted)
if total == 0:
return eps
return max(eps, known / total)
Returns
-------
dialects: list
List of SimpleDialect objects.
"""
equal_delim = len(set([d.delimiter for d in dialects])) == 1
if not equal_delim:
return None
# First, identify dialects that result in the same parsing result.
equal_dialects = []
for a, b in pairwise(dialects):
X = list(parse_string(data, a))
Y = list(parse_string(data, b))
if X == Y:
equal_dialects.append((a, b))
# Try to break the ties in these pairs
new_dialects = set()
visited = set()
for A, B in equal_dialects:
ans = break_ties_two(data, A, B)
if not ans is None:
new_dialects.add(ans)
visited.add(A)
visited.add(B)
# and add the dialects that we didn't visit
for d in dialects:
if not d in visited:
if sorted([A.delimiter, B.delimiter]) == sorted([",", " "]):
# Artifact due to type detection (comma as radix point)
if A.delimiter == ",":
return A
else:
return B
elif A.delimiter == "-" or B.delimiter == "-":
# Artifact due to type detection (dash as minus sign)
if A.delimiter == "-":
return B
else:
return A
elif diff_only_in_key("escapechar"):
Dnone, Descape = (A, B) if A.escapechar == "" else (B, A)
X = list(parse_string(data, Dnone))
Y = list(parse_string(data, Descape))
# double check shape. Usually if the shape differs the pattern score
# should have caught it, but if by a freakish occurance it hasn't then
# we can't break this tie (for now)
if len(X) != len(Y):
return None
for x, y in zip(X, Y):
if len(x) != len(y):
return None
cells_escaped = []
cells_unescaped = []
for x, y in zip(X, Y):
for u, v in zip(x, y):
if u != v:
Notes
-----
We have only observed one tie for each case during development, so
this may need to be improved in the future.
"""
equal_delim = A.delimiter == B.delimiter == C.delimiter
equal_escape = A.escapechar == B.escapechar == C.escapechar
if equal_delim and equal_escape:
# difference is *only* in quotechar
dialects = [A, B, C]
pA = list(parse_string(data, A))
pB = list(parse_string(data, B))
pC = list(parse_string(data, C))
if len(pA) != len(pB) or len(pA) != len(pC) or len(pB) != len(pC):
return None
p_none, d_none = next(
(
(p, d)
for p, d in zip([pA, pB, pC], dialects)
if d.quotechar == ""
),
(None, None),
)
if p_none is None:
return None
List of SimpleDialect objects
Returns
-------
dialects: list
List of SimpleDialect objects.
"""
equal_delim = len(set([d.delimiter for d in dialects])) == 1
if not equal_delim:
return None
# First, identify dialects that result in the same parsing result.
equal_dialects = []
for a, b in pairwise(dialects):
X = list(parse_string(data, a))
Y = list(parse_string(data, b))
if X == Y:
equal_dialects.append((a, b))
# Try to break the ties in these pairs
new_dialects = set()
visited = set()
for A, B in equal_dialects:
ans = break_ties_two(data, A, B)
if not ans is None:
new_dialects.add(ans)
visited.add(A)
visited.add(B)
# and add the dialects that we didn't visit
for d in dialects:
Notes
-----
We have only observed one tie for each case during development, so
this may need to be improved in the future.
"""
equal_delim = A.delimiter == B.delimiter == C.delimiter
equal_escape = A.escapechar == B.escapechar == C.escapechar
if equal_delim and equal_escape:
# difference is *only* in quotechar
dialects = [A, B, C]
pA = list(parse_string(data, A))
pB = list(parse_string(data, B))
pC = list(parse_string(data, C))
if len(pA) != len(pB) or len(pA) != len(pC) or len(pB) != len(pC):
return None
p_none, d_none = next(
(
(p, d)
for p, d in zip([pA, pB, pC], dialects)
if d.quotechar == ""
),
(None, None),
)
if p_none is None:
return None
-----
We have only observed one tie for each case during development, so
this may need to be improved in the future.
"""
equal_delim = A.delimiter == B.delimiter == C.delimiter
equal_escape = A.escapechar == B.escapechar == C.escapechar
if equal_delim and equal_escape:
# difference is *only* in quotechar
dialects = [A, B, C]
pA = list(parse_string(data, A))
pB = list(parse_string(data, B))
pC = list(parse_string(data, C))
if len(pA) != len(pB) or len(pA) != len(pC) or len(pB) != len(pC):
return None
p_none, d_none = next(
(
(p, d)
for p, d in zip([pA, pB, pC], dialects)
if d.quotechar == ""
),
(None, None),
)
if p_none is None:
return None
rem = [