Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def truecase_file(modelfile, processes, is_asr, encoding, quiet):
moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr, encoding=encoding)
moses_truecase = partial(moses.truecase, return_str=True)
with click.get_text_stream("stdin", encoding=encoding) as fin:
with click.get_text_stream("stdout", encoding=encoding) as fout:
fin = fin if quiet else tqdm(fin)
for line in fin:
print(moses.truecase(line, return_str=True), end="\n", file=fout)
# FIXME: parallelize job don't work properly for MosesTruecaser.truecase
def truecase_file(modelfile, processes, is_asr, encoding, quiet):
moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr, encoding=encoding)
moses_truecase = partial(moses.truecase, return_str=True)
with click.get_text_stream("stdin", encoding=encoding) as fin:
with click.get_text_stream("stdout", encoding=encoding) as fout:
fin = fin if quiet else tqdm(fin)
for line in fin:
print(moses.truecase(line, return_str=True), end="\n", file=fout)
# FIXME: parallelize job don't work properly for MosesTruecaser.truecase
def train_truecaser(modelfile, processes, is_asr, possibly_use_first_token, encoding, quiet):
moses = MosesTruecaser(is_asr=is_asr, encoding=encoding)
with click.get_text_stream("stdin", encoding=encoding) as fin:
model = moses.train_from_file_object(
fin,
possibly_use_first_token=possibly_use_first_token,
processes=processes,
progress_bar=(not quiet),
)
moses.save_model(modelfile)
def train_truecaser(modelfile, processes, is_asr, possibly_use_first_token, encoding, quiet):
moses = MosesTruecaser(is_asr=is_asr, encoding=encoding)
with click.get_text_stream("stdin", encoding=encoding) as fin:
model = moses.train_from_file_object(
fin,
possibly_use_first_token=possibly_use_first_token,
processes=processes,
progress_bar=(not quiet),
)
moses.save_model(modelfile)
def __init__(self, load_from=None, is_asr=None, encoding="utf8"):
"""
:param load_from:
:type load_from:
:param is_asr: A flag to indicate that model is for ASR. ASR input has
no case, make sure it is lowercase, and make sure known are cased
eg. 'i' to be uppercased even if i is known.
:type is_asr: bool
"""
# Initialize the object.
super(MosesTruecaser, self).__init__()
# Initialize the language specific nonbreaking prefixes.
self.SKIP_LETTERS_REGEX = re.compile(
u"[{}{}{}]".format(
self.Lowercase_Letter, self.Uppercase_Letter, self.Titlecase_Letter
)
)
self.XML_SPLIT_REGX = re.compile("(<.*(?<=>))(.*)((?=]*>)")
self.SENT_END = {".", ":", "?", "!"}
self.DELAYED_SENT_START = {
"(",
"[",
'"',
"'",
"'",