How to use the sudachipy.dictionarylib.charactercategory.CharacterCategory function in SudachiPy

To help you get started, we’ve selected a few SudachiPy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github WorksApplications / SudachiPy / tests / dictionarylib / test_charactercategory.py View on Github external
def test_read_character_definition_with_invalid_type(self):
        f = os.path.join(self.test_dir, 'test_file.txt')
        with open(f, 'w') as wf:
            wf.write("0x0030..0x0039 FOO\n")
        cat = charactercategory.CharacterCategory()
        with self.assertRaises(AttributeError) as cm:
            cat.read_character_definition(f)
        self.assertEqual('FOO is invalid type at line 0', cm.exception.args[0])
github WorksApplications / SudachiPy / tests / dictionarylib / test_charactercategory.py View on Github external
wf.write("0x0030         KANJI\n")
        cat = charactercategory.CharacterCategory()
        cat.read_character_definition(f)
        self.assertEqual({CategoryType.NUMERIC, CategoryType.KANJI}, cat.get_category_types(0x0030))
        self.assertEqual({CategoryType.NUMERIC}, cat.get_category_types(0x0039))
        self.assertEqual({CategoryType.KANJI}, cat.get_category_types(0x3007))

        f = os.path.join(self.test_dir, 'test_file.txt')
        with open(f, 'w') as wf:
            wf.write("#\n \n")
            wf.write("0x0030..0x0039 KATAKANA\n")
            wf.write("0x3007         KANJI\n")
            wf.write("0x0039..0x0040 ALPHA\n")
            wf.write("0x0030..0x0039 NUMERIC\n")
            wf.write("0x0030         KANJI\n")
        cat = charactercategory.CharacterCategory()
        cat.read_character_definition(f)
        self.assertEqual({CategoryType.NUMERIC, CategoryType.KANJI, CategoryType.KATAKANA}, cat.get_category_types(0x0030))
        self.assertEqual({CategoryType.NUMERIC, CategoryType.KATAKANA, CategoryType.ALPHA}, cat.get_category_types(0x0039))
        self.assertEqual({CategoryType.KANJI}, cat.get_category_types(0x3007))
        self.assertEqual({CategoryType.DEFAULT}, cat.get_category_types(0x4007))
github WorksApplications / SudachiPy / tests / dictionarylib / test_charactercategory.py View on Github external
def test_get_category_types(self):
        cat = charactercategory.CharacterCategory()
        cat.read_character_definition(os.path.join(self.test_resources_dir, 'char.def'))
        self.assertEqual({CategoryType.KANJI}, cat.get_category_types(ord('熙')))
        self.assertNotEqual({CategoryType.DEFAULT}, cat.get_category_types(ord('熙')))
github WorksApplications / SudachiPy / tests / mock_grammar.py View on Github external
def mocked_get_character_category():
    cat = CharacterCategory()
    test_resources_dir = os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        os.pardir,
        'sudachipy',
        'resources')
    try:
        cat.read_character_definition(os.path.join(test_resources_dir, 'char.def'))
    except IOError as e:
        print(e)
    return cat
github WorksApplications / SudachiPy / tests / test_utf8inputtext.py View on Github external
def setUp(self):
        self.TEXT = "âbC1あ234漢字𡈽アゴ"
        self.bytes = [
            b'0xC3', b'0xA2', b'0xEF', b'0xBD', b'0x82',
            b'0x43', b'0x31', b'0xE3', b'0x81', b'0x82',
            b'0x32', b'0x33', b'0x34', b'0xE6', b'0xBC',
            b'0xA2', b'0xE5', b'0xAD', b'0x97', b'0xF0',
            b'0xA1', b'0x88', b'0xBD', b'0xE3', b'0x82',
            b'0xA2', b'0xEF', b'0xBD', b'0xBA', b'0xEF',
            b'0xBE', b'0x9E'
        ]

        self.input = None

        grammar = self.MockGrammar()
        char_category = dictionarylib.charactercategory.CharacterCategory()
        this_dir = os.path.dirname(os.path.abspath(__file__))
        char_category.read_character_definition(os.path.join(this_dir, 'resources/char.def'))
        grammar.set_character_category(char_category)

        self.builder = sudachipy.utf8inputtextbuilder.UTF8InputTextBuilder(self.TEXT, grammar)