How to use the sudachipy.dictionarylib.dictionaryheader.DictionaryHeader function in SudachiPy

To help you get started, we’ve selected a few SudachiPy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github WorksApplications / SudachiPy / tests / dictionarylib / test_dictionarybuilder.py View on Github external
"""
        Copy of sudachipy.dictionary.Dictionary.read_system_dictionary
        :param filename:
        :return:
        """
        import mmap
        from sudachipy import dictionarylib
        buffers = []
        if filename is None:
            raise AttributeError("system dictionary is not specified")
        with open(filename, 'r+b') as system_dic:
            bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
        buffers.append(bytes_)

        offset = 0
        header = dictionarylib.dictionaryheader.DictionaryHeader.from_bytes(bytes_, offset)
        if header.version != SYSTEM_DICT_VERSION:
            raise Exception("invalid system dictionary")
        offset += header.storage_size()

        grammar = dictionarylib.grammar.Grammar(bytes_, offset)
        offset += grammar.get_storage_size()

        lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
        return buffers, header, grammar, lexicon
github WorksApplications / SudachiPy / tests / dictionarylib / test_dictionaryheader.py View on Github external
def setUp(self):
        # Copied from sudachipy.dictionay.Dictionary.read_system_dictionary
        test_resources_dir = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            os.pardir,
            'resources')
        filename = os.path.join(test_resources_dir, 'system.dic')
        with open(filename, 'r+b') as system_dic:
            bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
        offset = 0
        self.header = DictionaryHeader.from_bytes(bytes_, offset)
github WorksApplications / SudachiPy / tests / dictionarylib / test_doublearraylexicon.py View on Github external
def setUp(self):
        # Copied from sudachipy.dictionay.Dictionary.read_system_dictionary
        test_resources_dir = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            os.pardir,
            'resources')
        filename = os.path.join(test_resources_dir, 'system.dic')
        with open(filename, 'r+b') as system_dic:
            bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
        header = DictionaryHeader.from_bytes(bytes_, 0)
        if header.version != SYSTEM_DICT_VERSION:
            raise Exception('invalid system dictionary')
        self.lexicon = DoubleArrayLexicon(bytes_, header.storage_size() + 470)
github megagonlabs / ginza / sudachipy / command_line.py View on Github external
from . import dictionarylib
    """
    Copy of sudachipy.dictionary.Dictionary.read_system_dictionary
    :param filename:
    :return:
    """
    import mmap
    buffers = []
    if filename is None:
        raise AttributeError("system dictionary is not specified")
    with open(filename, 'r+b') as system_dic:
        bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
    buffers.append(bytes_)

    offset = 0
    header = DictionaryHeader.from_bytes(bytes_, offset)
    if header.version != DictionaryVersion.SYSTEM_DICT_VERSION:
        raise Exception("invalid system dictionary")
    offset += header.storage_size()

    grammar = dictionarylib.grammar.Grammar(bytes_, offset)
    offset += grammar.get_storage_size()

    lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
    return buffers, header, grammar, lexicon
github WorksApplications / SudachiPy / sudachipy / dictionarylib / binarydictionary.py View on Github external
def _read_dictionary(filename, access=mmap.ACCESS_READ):
        with open(filename, 'r+b') as system_dic:
            bytes_ = mmap.mmap(system_dic.fileno(), 0, access=access)
        offset = 0
        header = DictionaryHeader.from_bytes(bytes_, offset)
        offset += header.storage_size()
        if header.version not in [SYSTEM_DICT_VERSION, USER_DICT_VERSION_1, USER_DICT_VERSION_2]:
            raise Exception('invalid dictionary version')
        grammar = None
        if header.version != USER_DICT_VERSION_1:
            grammar = Grammar(bytes_, offset)
            offset += grammar.get_storage_size()

        lexicon = DoubleArrayLexicon(bytes_, offset)
        return bytes_, grammar, header, lexicon
github megagonlabs / ginza / sudachipy / dictionary.py View on Github external
def read_system_dictionary(self, filename):
        if filename is None:
            raise AttributeError("system dictionary is not specified")
        with open(filename, 'r+b') as system_dic:
            bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
        self.buffers.append(bytes_)

        offset = 0
        self.header = dictionarylib.dictionaryheader.DictionaryHeader.from_bytes(bytes_, offset)
        if self.header.version != DictionaryVersion.SYSTEM_DICT_VERSION:
            raise Exception("invalid system dictionary")
        offset += self.header.storage_size()

        self.grammar = dictionarylib.grammar.Grammar(bytes_, offset)
        offset += self.grammar.get_storage_size()

        self.lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
github WorksApplications / SudachiPy / sudachipy / command_line.py View on Github external
def _command_build(args, print_usage):
    _matrix_file_checker(args, print_usage)
    _input_files_checker(args, print_usage)
    header = DictionaryHeader(
        SYSTEM_DICT_VERSION, int(time.time()), args.description)
    with open(args.out_file, 'wb') as wf, open(args.matrix_file, 'r') as rf:
        wf.write(header.to_bytes())
        builder = DictionaryBuilder()
        builder.build(args.in_files, rf, wf)
github megagonlabs / ginza / sudachipy / command_line.py View on Github external
def _command_build(args, print_usage):
    _matrix_file_checker(args, print_usage)
    _input_files_checker(args, print_usage)
    header = DictionaryHeader(
        DictionaryVersion.SYSTEM_DICT_VERSION, int(time.time()), args.description)
    with open(args.out_file, 'wb') as wf, open(args.matrix_file, 'r') as rf:
        wf.write(header.to_bytes())
        builder = DictionaryBuilder()
        builder.build(args.in_files, rf, wf)
github megagonlabs / ginza / sudachipy / command_line.py View on Github external
def _command_user_build(args, print_usage):
    _system_dic_checker(args, print_usage)
    _input_files_checker(args, print_usage)
    header = DictionaryHeader(
        DictionaryVersion.USER_DICT_VERSION_2, int(time.time()), args.description)
    _, _, grammar, system_lexicon = _read_system_dictionary(args.system_dic)
    with open(args.out_file, 'wb') as wf:
        wf.write(header.to_bytes())
        builder = UserDictionaryBuilder(grammar, system_lexicon)
        builder.build(args.in_files, None, wf)
github WorksApplications / SudachiPy / sudachipy / command_line.py View on Github external
def _command_user_build(args, print_usage):
    _system_dic_checker(args, print_usage)
    _input_files_checker(args, print_usage)
    header = DictionaryHeader(
        USER_DICT_VERSION_2, int(time.time()), args.description)
    dict_ = BinaryDictionary.from_system_dictionary(args.system_dic)
    with open(args.out_file, 'wb') as wf:
        wf.write(header.to_bytes())
        builder = UserDictionaryBuilder(dict_.grammar, dict_.lexicon)
        builder.build(args.in_files, None, wf)