Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_read_character_definition_with_invalid_type(self):
f = os.path.join(self.test_dir, 'test_file.txt')
with open(f, 'w') as wf:
wf.write("0x0030..0x0039 FOO\n")
cat = charactercategory.CharacterCategory()
with self.assertRaises(AttributeError) as cm:
cat.read_character_definition(f)
self.assertEqual('FOO is invalid type at line 0', cm.exception.args[0])
wf.write("0x0030 KANJI\n")
cat = charactercategory.CharacterCategory()
cat.read_character_definition(f)
self.assertEqual({CategoryType.NUMERIC, CategoryType.KANJI}, cat.get_category_types(0x0030))
self.assertEqual({CategoryType.NUMERIC}, cat.get_category_types(0x0039))
self.assertEqual({CategoryType.KANJI}, cat.get_category_types(0x3007))
f = os.path.join(self.test_dir, 'test_file.txt')
with open(f, 'w') as wf:
wf.write("#\n \n")
wf.write("0x0030..0x0039 KATAKANA\n")
wf.write("0x3007 KANJI\n")
wf.write("0x0039..0x0040 ALPHA\n")
wf.write("0x0030..0x0039 NUMERIC\n")
wf.write("0x0030 KANJI\n")
cat = charactercategory.CharacterCategory()
cat.read_character_definition(f)
self.assertEqual({CategoryType.NUMERIC, CategoryType.KANJI, CategoryType.KATAKANA}, cat.get_category_types(0x0030))
self.assertEqual({CategoryType.NUMERIC, CategoryType.KATAKANA, CategoryType.ALPHA}, cat.get_category_types(0x0039))
self.assertEqual({CategoryType.KANJI}, cat.get_category_types(0x3007))
self.assertEqual({CategoryType.DEFAULT}, cat.get_category_types(0x4007))
def test_get_category_types(self):
cat = charactercategory.CharacterCategory()
cat.read_character_definition(os.path.join(self.test_resources_dir, 'char.def'))
self.assertEqual({CategoryType.KANJI}, cat.get_category_types(ord('熙')))
self.assertNotEqual({CategoryType.DEFAULT}, cat.get_category_types(ord('熙')))
def mocked_get_character_category():
cat = CharacterCategory()
test_resources_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
os.pardir,
'sudachipy',
'resources')
try:
cat.read_character_definition(os.path.join(test_resources_dir, 'char.def'))
except IOError as e:
print(e)
return cat
def setUp(self):
self.TEXT = "âbC1あ234漢字𡈽アゴ"
self.bytes = [
b'0xC3', b'0xA2', b'0xEF', b'0xBD', b'0x82',
b'0x43', b'0x31', b'0xE3', b'0x81', b'0x82',
b'0x32', b'0x33', b'0x34', b'0xE6', b'0xBC',
b'0xA2', b'0xE5', b'0xAD', b'0x97', b'0xF0',
b'0xA1', b'0x88', b'0xBD', b'0xE3', b'0x82',
b'0xA2', b'0xEF', b'0xBD', b'0xBA', b'0xEF',
b'0xBE', b'0x9E'
]
self.input = None
grammar = self.MockGrammar()
char_category = dictionarylib.charactercategory.CharacterCategory()
this_dir = os.path.dirname(os.path.abspath(__file__))
char_category.read_character_definition(os.path.join(this_dir, 'resources/char.def'))
grammar.set_character_category(char_category)
self.builder = sudachipy.utf8inputtextbuilder.UTF8InputTextBuilder(self.TEXT, grammar)