How to use the pyglossary.entry_filters.EntryFilter function in pyglossary

To help you get started, we’ve selected a few pyglossary examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
# defi = "\n".join([RLM+line for line in defi.split("\n")])
		# for GoldenDict ^^ FIXME
		return entry

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		langs = (
			self.glos.getInfo("sourceLang") +
			self.glos.getInfo("targetLang")
		).lower()
		if "persian" in langs or "farsi" in langs:
			entry = self.run_fa(entry)

		return entry


class CleanEntryFilter(EntryFilter):  # FIXME
	name = "clean"
	desc = "Clean"

	def cleanDefi(self, st: str) -> str:
		st = st.replace("♦  ", "♦ ")
		st = re.sub("[\r\n]+", "\n", st)
		st = re.sub(" *\n *", "\n", st)

		"""
		This code may correct snippets like:
		- First sentence .Second sentence. -> First sentence. Second sentence.
		- First clause ,second clause. -> First clause, second clause.
		But there are cases when this code have undesirable effects
		( "<" represented as "<" in HTML markup):
		-  -> < Adj. >
		-  -> < fig. >
github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
entry.editFuncDefi(fixStr)
		return entry


class SkipDataEntryFilter(EntryFilter):
	name = "skip_resources"
	desc = "Skip Resources"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		if entry.isData():
			return
		return entry


class LangEntryFilter(EntryFilter):
	name = "lang"
	desc = "Language-dependent Filters"

	def run_fa(self, entry: BaseEntry) -> Optional[BaseEntry]:
		from pyglossary.persian_utils import faEditStr
		entry.editFuncWord(faEditStr)
		entry.editFuncDefi(faEditStr)
		# RLM = "\xe2\x80\x8f"
		# defi = "\n".join([RLM+line for line in defi.split("\n")])
		# for GoldenDict ^^ FIXME
		return entry

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		langs = (
			self.glos.getInfo("sourceLang") +
			self.glos.getInfo("targetLang")
github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
#		if not wordsStr:
#			return
		return entry


class NonEmptyDefiFilter(EntryFilter):
	name = "non_empty_defi"
	desc = "Non-empty Definition"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		if not entry.getDefi():
			return
		return entry


class RemoveEmptyAndDuplicateAltWords(EntryFilter):
	name = "remove_empty_dup_alt_words"
	desc = "Remove Empty and Duplicate Alternate Words"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.removeEmptyAndDuplicateAltWords()
		if not entry.getWords():
			return
		return entry


class FixUnicodeFilter(EntryFilter):
	name = "fix_unicode"
	desc = "Fix Unicode"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.editFuncWord(fixUtf8)
github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.editFuncWord(fixUtf8)
		entry.editFuncDefi(fixUtf8)
		return entry


class LowerWordFilter(EntryFilter):
	name = "lower_word"
	desc = "Lowercase Words"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.editFuncWord(str.lower)
		return entry


class RemoveHtmlTagsAll(EntryFilter):
	name = "remove_html_all"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		from bs4 import BeautifulSoup

		def fixStr(st: str) -> str:
			return BeautifulSoup(st, "lxml").text

		entry.editFuncDefi(fixStr)
		return entry


class RemoveHtmlTags(EntryFilter):
	name = "remove_html"

	def __init__(self, glos: Glossary, tags: List[str]):
github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
or return a new Entry object
		"""
		return entry


class StripEntryFilter(EntryFilter):
	name = "strip"
	desc = "Strip Whitespaces"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.strip()
		entry.replace("\r", "")
		return entry


class NonEmptyWordFilter(EntryFilter):
	name = "non_empty_word"
	desc = "Non-empty Words"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		if not entry.getWord():
			return
#		words = entry.getWords()
#		if not words:
#			return
#		wordsStr = "".join([w.strip() for w in words])
#		if not wordsStr:
#			return
		return entry


class NonEmptyDefiFilter(EntryFilter):
github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
desc = ""

	def __init__(self, glos: Glossary):
		self.glos = glos

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		"""
			returns an Entry object, or None to skip
				may return the same `entry`,
				or modify and return it,
				or return a new Entry object
		"""
		return entry


class StripEntryFilter(EntryFilter):
	name = "strip"
	desc = "Strip Whitespaces"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.strip()
		entry.replace("\r", "")
		return entry


class NonEmptyWordFilter(EntryFilter):
	name = "non_empty_word"
	desc = "Non-empty Words"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		if not entry.getWord():
			return
github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
return
		return entry


class RemoveEmptyAndDuplicateAltWords(EntryFilter):
	name = "remove_empty_dup_alt_words"
	desc = "Remove Empty and Duplicate Alternate Words"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.removeEmptyAndDuplicateAltWords()
		if not entry.getWords():
			return
		return entry


class FixUnicodeFilter(EntryFilter):
	name = "fix_unicode"
	desc = "Fix Unicode"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.editFuncWord(fixUtf8)
		entry.editFuncDefi(fixUtf8)
		return entry


class LowerWordFilter(EntryFilter):
	name = "lower_word"
	desc = "Lowercase Words"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.editFuncWord(str.lower)
		return entry
github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
class RemoveHtmlTagsAll(EntryFilter):
	name = "remove_html_all"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		from bs4 import BeautifulSoup

		def fixStr(st: str) -> str:
			return BeautifulSoup(st, "lxml").text

		entry.editFuncDefi(fixStr)
		return entry


class RemoveHtmlTags(EntryFilter):
	name = "remove_html"

	def __init__(self, glos: Glossary, tags: List[str]):
		self.glos = glos
		self.tags = tags

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		import re

		def fixStr(st: str) -> str:
			tagsRE = "|".join(self.tags)
			pattern = f"<.?({tagsRE})[^>]*>"
			st = re.sub(pattern, "", st)
			return st

		entry.editFuncDefi(fixStr)
github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
name = "non_empty_word"
	desc = "Non-empty Words"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		if not entry.getWord():
			return
#		words = entry.getWords()
#		if not words:
#			return
#		wordsStr = "".join([w.strip() for w in words])
#		if not wordsStr:
#			return
		return entry


class NonEmptyDefiFilter(EntryFilter):
	name = "non_empty_defi"
	desc = "Non-empty Definition"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		if not entry.getDefi():
			return
		return entry


class RemoveEmptyAndDuplicateAltWords(EntryFilter):
	name = "remove_empty_dup_alt_words"
	desc = "Remove Empty and Duplicate Alternate Words"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.removeEmptyAndDuplicateAltWords()
		if not entry.getWords():
github ilius / pyglossary / pyglossary / entry_filters.py View on Github external
if not entry.getWords():
			return
		return entry


class FixUnicodeFilter(EntryFilter):
	name = "fix_unicode"
	desc = "Fix Unicode"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.editFuncWord(fixUtf8)
		entry.editFuncDefi(fixUtf8)
		return entry


class LowerWordFilter(EntryFilter):
	name = "lower_word"
	desc = "Lowercase Words"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		entry.editFuncWord(str.lower)
		return entry


class RemoveHtmlTagsAll(EntryFilter):
	name = "remove_html_all"

	def run(self, entry: BaseEntry) -> Optional[BaseEntry]:
		from bs4 import BeautifulSoup

		def fixStr(st: str) -> str:
			return BeautifulSoup(st, "lxml").text