Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _extract_ngrams(self):
'''Extract n-grams from target text.'''
RANGE = list(xrange(1, NGram.N_GRAM + 1))
result = []
ngram = NGram()
for ch in self.text:
ngram.add_char(ch)
if ngram.capitalword:
continue
for n in RANGE:
# optimized w = ngram.get(n)
if len(ngram.grams) < n:
break
w = ngram.grams[-n:]
if w and w != ' ' and w in self.word_lang_prob_map:
result.append(w)
return result
def add(self, gram):
'''Add n-gram to profile.'''
if self.name is None or gram is None: # Illegal
return
length = len(gram)
if length < 1 or length > NGram.N_GRAM: # Illegal
return
self.n_words[length - 1] += 1
self.freq[gram] += 1
def _extract_ngrams(self):
'''Extract n-grams from target text.'''
RANGE = list(xrange(1, NGram.N_GRAM + 1))
result = []
ngram = NGram()
for ch in self.text:
ngram.add_char(ch)
if ngram.capitalword:
continue
for n in RANGE:
# optimized w = ngram.get(n)
if len(ngram.grams) < n:
break
w = ngram.grams[-n:]
if w and w != ' ' and w in self.word_lang_prob_map:
result.append(w)
return result
def update(self, text):
'''Update the language profile with (fragmented) text.
Extract n-grams from text and add their frequency into the profile.
'''
if text is None:
return
text = NGram.normalize_vi(text)
gram = NGram()
for ch in text:
gram.add_char(ch)
for n in xrange(1, NGram.N_GRAM+1):
self.add(gram.get(n))
messages.get_string('NGram.KANJI_7_29'),
messages.get_string('NGram.KANJI_7_32'),
messages.get_string('NGram.KANJI_7_33'),
messages.get_string('NGram.KANJI_7_35'),
messages.get_string('NGram.KANJI_7_37')]
CJK_MAP = {}
@classmethod
def _init_cjk_map(cls):
for cjk_list in cls.CJK_CLASS:
representative = cjk_list[0]
for ch in cjk_list:
cls.CJK_MAP[ch] = representative
NGram._init_cjk_map()