Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_provide_oov012(self):
cinfo = MeCabOovPlugin.CategoryInfo()
cinfo.type_ = CategoryType.KANJI
cinfo.is_invoke = False
cinfo.is_group = True
cinfo.length = 2
self.plugin.categories[CategoryType.KANJI] = cinfo
mock_inputtext.set_category_type(0, 3, CategoryType.KANJI)
nodes = self.plugin.provide_oov(self.mocked_input_text, 0, False)
self.assertEqual(3, len(nodes))
n = nodes[0]
self.assertEqual('あいう', n.get_word_info().surface)
self.assertEqual(3, n.get_word_info().length())
self.assertEqual(1, n.get_word_info().pos_id)
def test_read_character_property_duplicate_definitions(self):
input_ = os.path.join(self.test_dir, 'test.txt')
with open(input_, 'w') as wf:
wf.write("DEFAULT 0 1 2\nDEFAULT 1 1 2")
plugin = MeCabOovPlugin()
with self.assertRaises(ValueError) as cm:
plugin.read_character_property(input_)
self.assertEqual('`DEFAULT` is already defined at line 2', cm.exception.args[0])
def test_read_character_property_with_too_few_columns(self):
input_ = os.path.join(self.test_dir, 'test.txt')
with open(input_, 'w') as wf:
wf.write("DEFAULT 0 1\n")
plugin = MeCabOovPlugin()
with self.assertRaises(ValueError) as cm:
plugin.read_character_property(input_)
self.assertEqual('invalid format at line 1', cm.exception.args[0])
def setUp(self):
self.plugin = MeCabOovPlugin()
oov1 = MeCabOovPlugin.OOV()
oov1.pos_id = 1
oov2 = MeCabOovPlugin.OOV()
oov2.pos_id = 2
self.plugin.oov_list[CategoryType.KANJI] = [oov1]
self.plugin.oov_list[CategoryType.KANJINUMERIC] = [oov1, oov2]
self.mocked_input_text = mock_inputtext.mocked_input_text
mock_inputtext.set_text('あいうえお')
self.test_dir = tempfile.mkdtemp()
def test_read_character_property(self):
input_ = os.path.join(self.test_dir, 'test.txt')
with open(input_, 'w') as wf:
wf.write("#\n \nDEFAULT 0 1 2\nALPHA 1 0 0\n0x0000...0x0002 ALPHA")
plugin = MeCabOovPlugin()
plugin.read_character_property(input_)
self.assertFalse(plugin.categories[CategoryType.DEFAULT].is_invoke)
self.assertTrue(plugin.categories[CategoryType.DEFAULT].is_group)
self.assertEqual(2, plugin.categories[CategoryType.DEFAULT].length)
def test_read_oov(self):
oov = os.path.join(self.test_dir, 'test.txt')
with open(oov, 'w') as wf:
wf.write("DEFAULT,1,2,3,補助記号,一般,*,*,*,*\n")
wf.write("DEFAULT,3,4,5,補助記号,一般,*,*,*,*\n")
plugin = MeCabOovPlugin()
plugin.categories[CategoryType.DEFAULT] = MeCabOovPlugin.CategoryInfo()
plugin.read_oov(oov, mock_grammar.mocked_grammar)
self.assertEqual(1, len(plugin.oov_list))
self.assertEqual(2, len(plugin.oov_list[CategoryType.DEFAULT]))
self.assertEqual(1, plugin.oov_list[CategoryType.DEFAULT][0].left_id)
self.assertEqual(2, plugin.oov_list[CategoryType.DEFAULT][0].right_id)
self.assertEqual(3, plugin.oov_list[CategoryType.DEFAULT][0].cost)
self.assertEqual(0, plugin.oov_list[CategoryType.DEFAULT][0].pos_id)
def test_read_oov_with_undefined_type(self):
input_ = os.path.join(self.test_dir, 'test.txt')
with open(input_, 'w') as wf:
wf.write("FOO,1,2,3,補助記号,一般,*,*,*,*\n")
plugin = MeCabOovPlugin()
plugin.categories[CategoryType.DEFAULT] = MeCabOovPlugin.CategoryInfo()
with self.assertRaises(ValueError) as cm:
plugin.read_oov(input_, mock_grammar.mocked_grammar)
self.assertEqual('`FOO` is invalid type at line 1', cm.exception.args[0])
def test_read_oov_with_too_few_columns(self):
input_ = os.path.join(self.test_dir, 'test.txt')
with open(input_, 'w') as wf:
wf.write("DEFAULT,1,2,3\n")
plugin = MeCabOovPlugin()
plugin.categories[CategoryType.DEFAULT] = MeCabOovPlugin.CategoryInfo()
with self.assertRaises(ValueError) as cm:
plugin.read_oov(input_, mock_grammar.mocked_grammar)
self.assertEqual('invalid format at line 1', cm.exception.args[0])
def test_read_character_property_with_undefined_type(self):
input_ = os.path.join(self.test_dir, 'test.txt')
with open(input_, 'w') as wf:
wf.write("FOO 0 1 2\n")
plugin = MeCabOovPlugin()
with self.assertRaises(ValueError) as cm:
plugin.read_character_property(input_)
self.assertEqual('`FOO` is invalid type at line 1', cm.exception.args[0])
def get_oov_plugin(json_obj) -> OovProviderPlugin:
# In the future, users can define plugin by themselves
try:
if json_obj['class'] == 'sudachipy.plugin.oov.MeCabOovProviderPlugin':
return MeCabOovPlugin(json_obj)
if json_obj['class'] == 'sudachipy.plugin.oov.SimpleOovProviderPlugin':
return SimpleOovPlugin(json_obj)
raise ValueError('{} is invalid OovProviderPlugin class'.format(json_obj['class']))
except KeyError:
raise ValueError('config file is invalid format')