Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
)
u_word_main = b_word_main.decode(
self.sourceEncoding,
"ignore",
)
else:
u_word_main = b_word_main.decode(self.sourceEncoding, "ignore")
if self.processHtmlInKey:
# u_word_main_orig = u_word_main
u_word_main = stripHtmlTags(u_word_main)
u_word_main = replaceHtmlEntriesInKeys(u_word_main)
# if(re.match(".*[&<>].*", u_word_main_orig)):
# log.debug("original text: " + u_word_main_orig + "\n" \
# + "new text: " + u_word_main + "\n")
u_word_main = removeControlChars(u_word_main)
u_word_main = removeNewlines(u_word_main)
u_word_main = u_word_main.lstrip()
if self.keyRStripChars:
u_word_main = u_word_main.rstrip(self.keyRStripChars)
return u_word_main
if fields.b_title:
fields.u_title, singleEncoding = self.decodeCharsetTags(
fields.b_title,
self.sourceEncoding,
)
fields.u_title = replaceHtmlEntries(fields.u_title)
fields.u_title = removeControlChars(fields.u_title)
if fields.b_title_trans:
# sourceEncoding or targetEncoding ?
fields.u_title_trans, singleEncoding = self.decodeCharsetTags(
fields.b_title_trans,
self.sourceEncoding,
)
fields.u_title_trans = replaceHtmlEntries(fields.u_title_trans)
fields.u_title_trans = removeControlChars(fields.u_title_trans)
if fields.b_transcription_50:
if fields.code_transcription_50 == 0x10:
# contains values like this (char codes):
# 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07
# this is not utf-16
# what is this?
pass
elif fields.code_transcription_50 == 0x1b:
fields.u_transcription_50, singleEncoding = \
self.decodeCharsetTags(
fields.b_transcription_50,
self.sourceEncoding,
)
fields.u_transcription_50 = \
replaceHtmlEntries(fields.u_transcription_50)
if fields.code_transcription_50 == 0x10:
# contains values like this (char codes):
# 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07
# this is not utf-16
# what is this?
pass
elif fields.code_transcription_50 == 0x1b:
fields.u_transcription_50, singleEncoding = \
self.decodeCharsetTags(
fields.b_transcription_50,
self.sourceEncoding,
)
fields.u_transcription_50 = \
replaceHtmlEntries(fields.u_transcription_50)
fields.u_transcription_50 = \
removeControlChars(fields.u_transcription_50)
elif fields.code_transcription_50 == 0x18:
# incomplete text like:
# t c=T>02D0;g0259;-
# This defi normally contains fields.b_transcription_60
# in this case.
pass
else:
log.debug(
"processDefi(%s)\n" % b_defi +
"b_key = %s:\n" % b_key +
"defi field 50, " +
"unknown code: %#.2x" % fields.code_transcription_50
)
if fields.b_transcription_60:
if fields.code_transcription_60 == 0x1b:
)
if fields.singleEncoding:
fields.encoding = self.targetEncoding
fields.u_defi = fixImgLinks(fields.u_defi)
fields.u_defi = replaceHtmlEntries(fields.u_defi)
fields.u_defi = removeControlChars(fields.u_defi)
fields.u_defi = normalizeNewlines(fields.u_defi)
fields.u_defi = fields.u_defi.strip()
if fields.b_title:
fields.u_title, singleEncoding = self.decodeCharsetTags(
fields.b_title,
self.sourceEncoding,
)
fields.u_title = replaceHtmlEntries(fields.u_title)
fields.u_title = removeControlChars(fields.u_title)
if fields.b_title_trans:
# sourceEncoding or targetEncoding ?
fields.u_title_trans, singleEncoding = self.decodeCharsetTags(
fields.b_title_trans,
self.sourceEncoding,
)
fields.u_title_trans = replaceHtmlEntries(fields.u_title_trans)
fields.u_title_trans = removeControlChars(fields.u_title_trans)
if fields.b_transcription_50:
if fields.code_transcription_50 == 0x10:
# contains values like this (char codes):
# 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07
# this is not utf-16
# what is this?
"b_key = %s:\n" % b_key +
"defi field 50, " +
"unknown code: %#.2x" % fields.code_transcription_50
)
if fields.b_transcription_60:
if fields.code_transcription_60 == 0x1b:
fields.u_transcription_60, singleEncoding = \
self.decodeCharsetTags(
fields.b_transcription_60,
self.sourceEncoding,
)
fields.u_transcription_60 = \
replaceHtmlEntries(fields.u_transcription_60)
fields.u_transcription_60 = \
removeControlChars(fields.u_transcription_60)
else:
log.debug(
"processDefi(%s)\n" % b_defi +
"b_key = %s:\n" % b_key +
"defi field 60" +
"unknown code: %#.2x" % fields.code_transcription_60,
)
if fields.b_field_1a:
fields.u_field_1a, singleEncoding = self.decodeCharsetTags(
fields.b_field_1a,
self.sourceEncoding,
)
self.processDefiStat(fields, b_defi, b_key)
# strip "/" before words
u_word_main = re.sub(
self.stripSlashAltKeyPattern,
r"\1\2",
u_word_main,
)
if self.processHtmlInKey:
# u_word_main_orig = u_word_main
u_word_main = stripHtmlTags(u_word_main)
u_word_main = replaceHtmlEntriesInKeys(u_word_main)
# if(re.match(".*[&<>].*", u_word_main_orig)):
# log.debug("original text: " + u_word_main_orig + "\n" \
# + "new text: " + u_word_main + "\n")
u_word_main = removeControlChars(u_word_main)
u_word_main = removeNewlines(u_word_main)
u_word_main = u_word_main.lstrip()
u_word_main = u_word_main.rstrip(self.keyRStripChars)
return u_word_main