Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# -*- coding:utf-8 -*-
# Author:hankcs
# Date: 2018-05-23 17:26
import os
from pyhanlp import SafeJClass
from tests.test_utility import ensure_data
NaiveBayesClassifier = SafeJClass('com.hankcs.hanlp.classification.classifiers.NaiveBayesClassifier')
IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil')
sogou_corpus_path = ensure_data('搜狗文本分类语料库迷你版',
'http://file.hankcs.com/corpus/sogou-text-classification-corpus-mini.zip')
def train_or_load_classifier():
model_path = sogou_corpus_path + '.ser'
if os.path.isfile(model_path):
return NaiveBayesClassifier(IOUtil.readObjectFrom(model_path))
classifier = NaiveBayesClassifier()
classifier.train(sogou_corpus_path)
model = classifier.getModel()
IOUtil.saveObjectTo(model, model_path)
return NaiveBayesClassifier(model)
def predict(classifier, text):
# -*- coding:utf-8 -*-
# Author:hankcs
# Date: 2018-05-23 17:26
import os
from pyhanlp import SafeJClass
from tests.test_utility import ensure_data
NaiveBayesClassifier = SafeJClass('com.hankcs.hanlp.classification.classifiers.NaiveBayesClassifier')
IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil')
sogou_corpus_path = ensure_data('搜狗文本分类语料库迷你版',
'http://file.hankcs.com/corpus/sogou-text-classification-corpus-mini.zip')
def train_or_load_classifier():
model_path = sogou_corpus_path + '.ser'
if os.path.isfile(model_path):
return NaiveBayesClassifier(IOUtil.readObjectFrom(model_path))
classifier = NaiveBayesClassifier()
classifier.train(sogou_corpus_path)
model = classifier.getModel()
IOUtil.saveObjectTo(model, model_path)
return NaiveBayesClassifier(model)
if sys.version_info[0] < 3:
reload(sys)
sys.setdefaultencoding("utf-8")
# raise "Must be using Python 3"
from absl import flags # absl-py
from absl import logging # absl-py
FLAGS = flags.FLAGS
import unittest
import threading
import time
from pyhanlp import HanLP, SafeJClass
# 在线程体外部用SafeJClass线程安全地引入类名
CRFLexicalAnalyzer = SafeJClass("com.hankcs.hanlp.model.crf.CRFLexicalAnalyzer")
class MyThread(threading.Thread):
def __init__(self, name, counter, analyzer):
threading.Thread.__init__(self)
self.thread_name = name
self.counter = counter
self.analyzer = analyzer
def run(self):
print("Starting " + self.thread_name)
while self.counter:
time.sleep(1)
sentence = self.analyzer.analyze("商品和服务")
print("%s: %s, seg: %s" % (self.thread_name, time.ctime(time.time()), sentence))
self.counter -= 1