Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_accents(self):
self.assertEqual(
LatexNodes2Text().nodelist_to_text(LatexWalker(r"Fran\c cais").get_latex_nodes()[0]),
'''Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais'''
)
self.assertEqual(
LatexNodes2Text().nodelist_to_text(LatexWalker(r"Fr\'en{\'{e}}tique").get_latex_nodes()[0]),
'''Fr\N{LATIN SMALL LETTER E WITH ACUTE}n\N{LATIN SMALL LETTER E WITH ACUTE}tique'''
)
self.assertEqual(
LatexNodes2Text(math_mode='with-delimiters')
.nodelist_to_text(LatexWalker(r"$1 \not= 2$").get_latex_nodes()[0]),
'''$1 {} 2$'''.format(unicodedata.normalize('NFC', "=\N{COMBINING LONG SOLIDUS OVERLAY}"))
)
correct_text = r'''ABCDEF fdksanfkld safnkd anfklsa
hi there! This is an equation:
x + y i = 0
where i is the imaginary unit.
MORENKFDNSN'''
testdir = os.path.realpath(os.path.abspath(os.path.dirname(__file__)))
l2t = LatexNodes2Text()
l2t.set_tex_input_directory(testdir)
output = l2t.nodelist_to_text(LatexWalker(latex).get_latex_nodes()[0])
self.assertEqualUpToWhitespace(
output,
correct_text
)
latex = r'''ABCDEF fdksanfkld safnkd anfklsa
\input{test_input_1}
MORENKFDNSN'''
self.assertEqualUpToWhitespace(
l2t.nodelist_to_text(LatexWalker(latex).get_latex_nodes()[0]),
correct_text
)
def test_get_latex_nodes_read_max_nodes(self):
latextext = r'''Text and \`accent and \textbf{bold text} and $\vec b$ more stuff for Fran\c cois
\begin{enumerate}[(i)]
\item Hi there! % here goes a comment
\item[a] Hello! @@@
\end{enumerate}
Indeed thanks to \cite[Lemma 3]{Author}, we know that...
Also: {\itshape some italic text}.
'''
lw = LatexWalker(latextext, tolerant_parsing=False)
parsing_state = lw.make_parsing_state()
p = 0
self.assertEqual(
lw.get_latex_nodes(pos=p, read_max_nodes=1, parsing_state=parsing_state),
([
LatexCharsNode(parsing_state=parsing_state,
chars='Text and ',
pos=p, len=33-24),
], p, 33-24))
p = latextext.find(r'ent and ') + 4 # points on second "and" on first line
nodes, pos, len_ = lw.get_latex_nodes(pos=p, read_max_nodes=5, parsing_state=parsing_state)
parsing_state_inner = nodes[3].nodelist[0].parsing_state # inner state -- math mode -- get this
self.assertTrue(parsing_state_inner.in_math_mode)
self.assertEqual(
p,11,))
p = latextext.find(r'%')-2 # check: correctly skips comments also after space
self.assertEqual(lw.get_latex_expression(pos=p, parsing_state=parsing_state),
(LatexMacroNode(parsing_state=parsing_state,
macroname='item',
nodeargd=None,
pos=p+2+len('% here goes a comment\n'),
len=5),
p+2+len('% here goes a comment\n'),5,))
# check correct behavior if directly on brace close
p = latextext.find(r'}')
self.assertEqual(lw.get_latex_expression(pos=p, parsing_state=parsing_state,
strict_braces=True),
(LatexCharsNode(parsing_state=parsing_state,
chars='', pos=p, len=0),p,0,))
lw2 = LatexWalker(latextext, tolerant_parsing=False)
parsing_state2 = lw2.make_parsing_state()
self.assertEqual(lw2.get_latex_expression(pos=p, parsing_state=parsing_state2,
strict_braces=False),
(LatexCharsNode(parsing_state=parsing_state2,
chars='', pos=p, len=0),p,0,))
with self.assertRaises(LatexWalkerParseError):
dummy = lw2.get_latex_expression(pos=p, parsing_state=parsing_state2,
strict_braces=True)
p = latextext.find(r'?`')
self.assertEqual(lw.get_latex_expression(pos=p, parsing_state=parsing_state),
(LatexSpecialsNode(parsing_state=parsing_state,
specials_chars='?`',
nodeargd=None,
pos=p, len=2),
p, 2))
def test_star_2(self):
lw = latexwalker.LatexWalker(r'\cmd * xyz')
s = MacroStandardArgsParser('*')
parsing_state = lw.make_parsing_state()
(argd, p, l) = s.parse_args(lw, len(r'\cmd'), parsing_state=parsing_state)
self.assertPMAEqual(
argd,
ParsedMacroArgs(argspec='*', argnlist=[
LatexCharsNode(parsing_state=parsing_state,
chars='*',
pos=5,len=1)
])
def test_marg_1(self):
lw = latexwalker.LatexWalker(r'\cmd ab')
s = MacroStandardArgsParser('{')
parsing_state = lw.make_parsing_state()
(argd, p, l) = s.parse_args(lw, len(r'\cmd'), parsing_state=parsing_state)
self.assertPMAEqual(
argd,
ParsedMacroArgs(argspec='{', argnlist=[
LatexCharsNode(parsing_state=parsing_state,
chars='a',
pos=len(r'\cmd')+1,len=1)
])
def test_star_0(self):
lw = latexwalker.LatexWalker(r'\cmd xyz')
s = MacroStandardArgsParser('*')
(argd, p, l) = s.parse_args(lw, len(r'\cmd'))
self.assertPMAEqual(
argd,
ParsedMacroArgs(argspec='*', argnlist=[ None ])
)
def repl_ltx_str(n, r, x):
# scan string until next '{', read latex expression and skip it, etc.
lw = latexwalker.LatexWalker(x, tolerant_parsing=True)
pos = 0
newx = u''
therx = re.compile(r'((?P\{)|'+r.pattern+r')', re.IGNORECASE)
while True:
m = therx.search(x, pos)
if m is None:
newx += x[pos:]
break
newpos = m.start()
newx += x[pos:newpos]
if m.group('openbrace'):
# we encountered an opening brace, so we need to copy in everything verbatim
(junknode, np, nl) = lw.get_latex_expression(newpos)
# just copy the contents as is and move on
newx += x[newpos:np+nl]
newpos = np + nl
def remove_full_braces(val):
val = val.strip()
if len(val) and val[0] == '{' and val[-1] == '}':
# remove the extra braces. But first, check that the braces
# enclose the full field, and we don't have e.g. "{Maxwell}'s
# demon versus {Szilard}", in which case a dumb algorithm would
# leave the invalid LaTeX string "Maxwell}'s demon versus
# {Szilard"
try:
(nodes,pos,length) = \
latexwalker.LatexWalker(val, tolerant_parsing=True).get_latex_braced_group(0)
if pos + length == len(val):
# yes, all fine: the braces are one block for the field
return val[1:-1]
except latexwalker.LatexWalkerError:
logger.longdebug(
"LatexWalkerError while attempting to remove curly braces around valud in %s",
val
)
return val