Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_basic_2d(self):
u = UnicodeToLatexEncoder(non_ascii_only=False)
ascii_chars_convert = " \" # $ % & \\ _ { } ~ "
self.assertEqual(u.unicode_to_latex(ascii_chars_convert),
" '' \\# \\$ \\% \\& {\\textbackslash} \\_ \\{ \\} {\\textasciitilde} ")
def test_rules_00(self):
def acallable(s, pos):
if s[pos] == "\N{LATIN SMALL LETTER E WITH ACUTE}":
return (1, r"{\'{e}}")
if s.startswith('...', pos):
return (3, r"\ldots")
return None
u = UnicodeToLatexEncoder(conversion_rules=[
latexencode.UnicodeToLatexConversionRule(latexencode.RULE_DICT, {
ord("\N{LATIN CAPITAL LETTER A WITH GRAVE}"): r"{{\`{A}}}",
ord("%"): r"\textpercent",
}),
latexencode.UnicodeToLatexConversionRule(latexencode.RULE_REGEX, [
(re.compile('v(otre)'), r'n\1'),
(re.compile("s'exclama", flags=re.I), r"s'exprima"),
(re.compile('\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}'), r"{\^i}"),
]),
] + latexencode.get_builtin_conversion_rules('defaults') + [
latexencode.UnicodeToLatexConversionRule(latexencode.RULE_CALLABLE, acallable),
])
input = "\"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison ... \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
self.assertEqual(u.unicode_to_latex(input),
"''{{\\`{A}}} notre sant\\'e!'' s'exprima le ma{\\^i}tre de maison {\\ldots} \\`a 100{\\textpercent}.")
def test_basic_2b(self):
u = UnicodeToLatexEncoder(replacement_latex_protection='none')
input = "\"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
self.assertEqual(u.unicode_to_latex(input),
"''\\`A votre sant\\'e!'' s'exclama le ma\\^\\itre de maison \\`a 100\\%.")
def test_basic_3b(self):
test_unknown_chars = "A unicode character: \N{THAI CHARACTER THO THONG}"
# generates warnings -- that's good
with self.assertLogs(logger='pylatexenc.latexencode', level='WARNING') as cm:
u = UnicodeToLatexEncoder(unknown_char_policy='replace')
self.assertEqual(u.unicode_to_latex(test_unknown_chars),
"A unicode character: {\\bfseries ?}")
def test_basic_2c(self):
u = UnicodeToLatexEncoder(non_ascii_only=True)
ascii_chars_convert = " \" # $ % & \\ _ { } ~ "
self.assertEqual(u.unicode_to_latex(ascii_chars_convert), ascii_chars_convert)
def test_issue_no21(self):
# test for https://github.com/phfaist/pylatexenc/issues/21
def capitalize_acronyms(s, pos):
if s[pos] in ('{', '}'):
# preserve existing braces
return (1, s[pos])
m = re.compile(r'\b[A-Z]{2,}\w*\b').match(s, pos)
if m is None:
return None
return (m.end()-m.start(), "{" + m.group() + "}")
u = UnicodeToLatexEncoder(
conversion_rules=[
latexencode.UnicodeToLatexConversionRule(latexencode.RULE_CALLABLE, capitalize_acronyms),
] + latexencode.get_builtin_conversion_rules('defaults')
)
input = "Title with {Some} ABC acronyms LIKe this."
self.assertEqual(
u.unicode_to_latex(input),
"Title with {Some} {ABC} acronyms {LIKe} this."
)
u = UnicodeToLatexEncoder(
conversion_rules=[
latexencode.UnicodeToLatexConversionRule(
latexencode.RULE_REGEX,
[ (re.compile(r'([{}])'), r'\1'), # keep existing braces
(re.compile(r'\b([A-Z]{2,}\w*)\b'), r'{\1}'), ]
def test_rules_01(self):
def acallable(s, pos):
if s[pos] == "\N{LATIN SMALL LETTER E WITH ACUTE}":
return (1, r"{\'{e}}")
if s.startswith('...', pos):
return (3, r"\ldots")
return None
u = UnicodeToLatexEncoder(conversion_rules=[
latexencode.UnicodeToLatexConversionRule(latexencode.RULE_DICT, {
ord("\N{LATIN CAPITAL LETTER A WITH GRAVE}"): r"{{\`{A}}}",
ord("%"): r"\textpercent",
}),
latexencode.UnicodeToLatexConversionRule(latexencode.RULE_REGEX, [
(re.compile('v(otre)'), r'n\1'),
(re.compile("s'exclama", flags=re.I), r"s'exprima"),
(re.compile('\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}'), r"{\^i}"),
]),
'unicode-xml', # expand built-in rule names
latexencode.UnicodeToLatexConversionRule(latexencode.RULE_CALLABLE, acallable),
])
input = "\"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison ... \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
self.assertEqual(u.unicode_to_latex(input),
"\"{{\\`{A}}} notre sant\\'{e}!\" s'exprima le ma{\\^i}tre de maison {\\ldots} \\`{a} 100{\\textpercent}.")
def test_rules_03(self):
u = UnicodeToLatexEncoder(conversion_rules=['unicode-xml'])
input = "* \"\N{LATIN CAPITAL LETTER A WITH GRAVE} votre sant\N{LATIN SMALL LETTER E WITH ACUTE}!\" s'exclama\N{SUPERSCRIPT TWO} le ma\N{LATIN SMALL LETTER I WITH CIRCUMFLEX}tre de maison \N{LATIN SMALL LETTER A WITH GRAVE} 100%."
self.assertEqual(u.unicode_to_latex(input),
"{\\ast} \"\\`{A} votre sant\\'{e}!\" s{\\textquotesingle}exclama{^2} le ma\\^{\\i}tre de maison \\`{a} 100\\%.")
# that is not fully protected by braces like "\v{C}adz Zykzyz"
if '\\' not in repl and '{' not in repl:
# no macros/groups, keep like this
return repl
return '{' + repl + '}'
# k = repl.rfind('\\')
# if k >= 0 and repl[k+1:].isalpha():
# # has dangling named macro, apply protection.
# return '{' + repl + '}'
# return repl
_our_uni2latex_map = {
k: _apply_protection(v)
for k,v in latexencode.get_builtin_uni2latex_dict().items()
if chr(k) not in r""" $ " \ _ { } ~ < > """
}
_our_unicode_to_latex = latexencode.UnicodeToLatexEncoder(
conversion_rules=[
latexencode.UnicodeToLatexConversionRule(
latexencode.RULE_CALLABLE,
_keep_latex_macros
),
latexencode.UnicodeToLatexConversionRule(
latexencode.RULE_DICT,
_our_uni2latex_map
),
],
# protection is done manually:
replacement_latex_protection='none'
)
def custom_uni_to_latex(s):
return _our_unicode_to_latex.unicode_to_latex(s)