How to use the pyglossary.plugins.babylon_bgl.bgl_text.replaceHtmlEntries function in pyglossary

To help you get started, we’ve selected a few pyglossary examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
if fields.b_title:
			fields.u_title, singleEncoding = self.decodeCharsetTags(
				fields.b_title,
				self.sourceEncoding,
			)
			fields.u_title = replaceHtmlEntries(fields.u_title)
			fields.u_title = removeControlChars(fields.u_title)

		if fields.b_title_trans:
			# sourceEncoding or targetEncoding ?
			fields.u_title_trans, singleEncoding = self.decodeCharsetTags(
				fields.b_title_trans,
				self.sourceEncoding,
			)
			fields.u_title_trans = replaceHtmlEntries(fields.u_title_trans)
			fields.u_title_trans = removeControlChars(fields.u_title_trans)

		if fields.b_transcription_50:
			if fields.code_transcription_50 == 0x10:
				# contains values like this (char codes):
				# 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07
				# this is not utf-16
				# what is this?
				pass
			elif fields.code_transcription_50 == 0x1b:
				fields.u_transcription_50, singleEncoding = \
					self.decodeCharsetTags(
						fields.b_transcription_50,
						self.sourceEncoding,
					)
				fields.u_transcription_50 = \
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
if fields.b_transcription_50:
			if fields.code_transcription_50 == 0x10:
				# contains values like this (char codes):
				# 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07
				# this is not utf-16
				# what is this?
				pass
			elif fields.code_transcription_50 == 0x1b:
				fields.u_transcription_50, singleEncoding = \
					self.decodeCharsetTags(
						fields.b_transcription_50,
						self.sourceEncoding,
					)
				fields.u_transcription_50 = \
					replaceHtmlEntries(fields.u_transcription_50)
				fields.u_transcription_50 = \
					removeControlChars(fields.u_transcription_50)
			elif fields.code_transcription_50 == 0x18:
				# incomplete text like:
				# t c=T>02D0;g0259;-
				# This defi normally contains fields.b_transcription_60
				# in this case.
				pass
			else:
				log.debug(
					"processDefi(%s)\n" % b_defi +
					"b_key = %s:\n" % b_key +
					"defi field 50, " +
					"unknown code: %#.2x" % fields.code_transcription_50
				)
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
log.debug(
					"processDefi(%s)\n" % b_defi +
					"b_key = %s:\n" % b_key +
					"defi field 50, " +
					"unknown code: %#.2x" % fields.code_transcription_50
				)

		if fields.b_transcription_60:
			if fields.code_transcription_60 == 0x1b:
				fields.u_transcription_60, singleEncoding = \
					self.decodeCharsetTags(
						fields.b_transcription_60,
						self.sourceEncoding,
					)
				fields.u_transcription_60 = \
					replaceHtmlEntries(fields.u_transcription_60)
				fields.u_transcription_60 = \
					removeControlChars(fields.u_transcription_60)
			else:
				log.debug(
					"processDefi(%s)\n" % b_defi +
					"b_key = %s:\n" % b_key +
					"defi field 60" +
					"unknown code: %#.2x" % fields.code_transcription_60,
				)

		if fields.b_field_1a:
			fields.u_field_1a, singleEncoding = self.decodeCharsetTags(
				fields.b_field_1a,
				self.sourceEncoding,
			)
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
self.targetEncoding,
		)
		if fields.singleEncoding:
			fields.encoding = self.targetEncoding
		fields.u_defi = fixImgLinks(fields.u_defi)
		fields.u_defi = replaceHtmlEntries(fields.u_defi)
		fields.u_defi = removeControlChars(fields.u_defi)
		fields.u_defi = normalizeNewlines(fields.u_defi)
		fields.u_defi = fields.u_defi.strip()

		if fields.b_title:
			fields.u_title, singleEncoding = self.decodeCharsetTags(
				fields.b_title,
				self.sourceEncoding,
			)
			fields.u_title = replaceHtmlEntries(fields.u_title)
			fields.u_title = removeControlChars(fields.u_title)

		if fields.b_title_trans:
			# sourceEncoding or targetEncoding ?
			fields.u_title_trans, singleEncoding = self.decodeCharsetTags(
				fields.b_title_trans,
				self.sourceEncoding,
			)
			fields.u_title_trans = replaceHtmlEntries(fields.u_title_trans)
			fields.u_title_trans = removeControlChars(fields.u_title_trans)

		if fields.b_transcription_50:
			if fields.code_transcription_50 == 0x10:
				# contains values like this (char codes):
				# 00 18 00 19 00 1A 00 1B 00 1C 00 1D 00 1E 00 40 00 07
				# this is not utf-16
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
b_key: bytes

		return: u_defi_format
		"""

		fields = DefinitionFields()
		self.collectDefiFields(b_defi, b_key, fields)

		fields.u_defi, fields.singleEncoding = self.decodeCharsetTags(
			fields.b_defi,
			self.targetEncoding,
		)
		if fields.singleEncoding:
			fields.encoding = self.targetEncoding
		fields.u_defi = fixImgLinks(fields.u_defi)
		fields.u_defi = replaceHtmlEntries(fields.u_defi)
		fields.u_defi = removeControlChars(fields.u_defi)
		fields.u_defi = normalizeNewlines(fields.u_defi)
		fields.u_defi = fields.u_defi.strip()

		if fields.b_title:
			fields.u_title, singleEncoding = self.decodeCharsetTags(
				fields.b_title,
				self.sourceEncoding,
			)
			fields.u_title = replaceHtmlEntries(fields.u_title)
			fields.u_title = removeControlChars(fields.u_title)

		if fields.b_title_trans:
			# sourceEncoding or targetEncoding ?
			fields.u_title_trans, singleEncoding = self.decodeCharsetTags(
				fields.b_title_trans,