Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def kkma_instance():
from konlpy.tag import Kkma
k = Kkma()
return k
def _kkma_parse(self, str_arr, tag_combine=True):
"""
:param h5file:
:return:
"""
kkma = Kkma()
return_arr = []
for data in str_arr:
return_arr = return_arr + self._flat(kkma.pos(str(data)), tag_combine=tag_combine)
return return_arr
def load_tokenizer(lang):
if lang=="en":
from nltk.tokenize import word_tokenize as wt
tokenizer = wt
elif lang=="ko":
from konlpy.tag import Kkma
tokenizer = Kkma()
elif lang=="ja":
import Mykytea
opt="-model jp-0.4.7-1.mod"
tokenizer = Mykytea.Mykytea(opt)
elif lang=="zh_cn":
import Mykytea
opt = "-model ctb-0.4.0-1.mod"
tokenizer = Mykytea.Mykytea(opt)
elif lang=="zh_tw":
import jieba
tokenizer = jieba
elif lang=="vi":
from pyvi import ViTokenizer
tokenizer = ViTokenizer
elif lang=="th":
from pythainlp.tokenize import word_tokenize
def tokenize(self, text):
try:
from konlpy.tag import Kkma
except ImportError as e:
raise ValueError("Korean tokenizer requires konlpy. Please, install it by command 'pip install konlpy'.")
kkma = Kkma()
return kkma.sentences(text)
def get_tokenizer(tokenizer_name):
if tokenizer_name == "komoran":
tokenizer = Komoran()
elif tokenizer_name == "okt":
tokenizer = Okt()
elif tokenizer_name == "mecab":
tokenizer = Mecab()
elif tokenizer_name == "hannanum":
tokenizer = Hannanum()
elif tokenizer_name == "kkma":
tokenizer = Kkma()
elif tokenizer_name == "khaiii":
tokenizer = KhaiiiApi()
else:
tokenizer = Mecab()
return tokenizer
def analyzer(message):
kkma = Kkma()
sentences = kkma.sentences(message)
max_score = 0
total_score = 0
total_line = 0
for sentence in sentences:
morphemes = kkma.pos(sentence)
print('Sentence : {}'.format(sentence))
print('Morphemes : {}'.format(morphemes))
labels = []
score = 0
for idx, morpheme in enumerate(morphemes):
if morpheme[1] in rule_database or '{}_'.format(morpheme[1][:-1]) in rule_database:
if morpheme[1] in rule_database:
current_rule = rule_database[morpheme[1]]