Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""Tag text with chosen tagger and clean tags.
Tag format: [('word', 'tag')]
:param text: string
:return: list of tuples, with each tuple containing the word and its pos tag
:rtype : list
"""
if self.tagger == 'tag_ngram_123_backoff': # Data format: Perseus Style (see https://github.com/cltk/latin_treebank_perseus)
tags = POSTag('latin').tag_ngram_123_backoff(text.lower())
return [(tag[0], tag[1]) for tag in tags]
elif self.tagger == 'tag_tnt':
tags = POSTag('latin').tag_tnt(text.lower())
return [(tag[0], tag[1]) for tag in tags]
elif self.tagger == 'tag_crf':
tags = POSTag('latin').tag_crf(text.lower())
return [(tag[0], tag[1]) for tag in tags]
def _retrieve_tag(self, text):
"""Tag text with chosen tagger and clean tags.
Tag format: [('word', 'tag')]
:param text: string
:return: list of tuples, with each tuple containing the word and its pos tag
:rtype : list
"""
if self.tagger == 'tag_ngram_123_backoff': # Data format: Perseus Style (see https://github.com/cltk/latin_treebank_perseus)
tags = POSTag('latin').tag_ngram_123_backoff(text.lower())
return [(tag[0], tag[1]) for tag in tags]
elif self.tagger == 'tag_tnt':
tags = POSTag('latin').tag_tnt(text.lower())
return [(tag[0], tag[1]) for tag in tags]
elif self.tagger == 'tag_crf':
tags = POSTag('latin').tag_crf(text.lower())
return [(tag[0], tag[1]) for tag in tags]
def _retrieve_tag(self, text):
"""Tag text with chosen tagger and clean tags.
Tag format: [('word', 'tag')]
:param text: string
:return: list of tuples, with each tuple containing the word and its pos tag
:rtype : list
"""
if self.tagger == 'tag_ngram_123_backoff': # Data format: Perseus Style (see https://github.com/cltk/latin_treebank_perseus)
tags = POSTag('latin').tag_ngram_123_backoff(text.lower())
return [(tag[0], tag[1]) for tag in tags]
elif self.tagger == 'tag_tnt':
tags = POSTag('latin').tag_tnt(text.lower())
return [(tag[0], tag[1]) for tag in tags]
elif self.tagger == 'tag_crf':
tags = POSTag('latin').tag_crf(text.lower())
return [(tag[0], tag[1]) for tag in tags]
def _get_pos_tags(self, tokens):
"""Iterate through list of tokens and use POS tagger to build
a corresponding list of tags.
:param tokens: List of tokens to be POS-tagged
:return: List with POS-tag for each token
"""
# Import (and define tagger) with other imports?
from cltk.tag.pos import POSTag
tagger = POSTag('latin')
tokens = " ".join(tokens)
tags = tagger.tag_ngram_123_backoff(tokens)
tags = [tag[1][0].lower() if tag[1] else tag[1] for tag in tags]
return tags
def __init__(self):
self.syllabifier = Syllabifier(language="old_norse_ipa")
self.tr = Transcriber(DIPHTHONGS_IPA, DIPHTHONGS_IPA_class, IPA_class, old_norse_rules)
self.tagger = POSTag('old_norse')