Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
Copy of sudachipy.dictionary.Dictionary.read_system_dictionary
:param filename:
:return:
"""
import mmap
from sudachipy import dictionarylib
buffers = []
if filename is None:
raise AttributeError("system dictionary is not specified")
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
buffers.append(bytes_)
offset = 0
header = dictionarylib.dictionaryheader.DictionaryHeader.from_bytes(bytes_, offset)
if header.version != SYSTEM_DICT_VERSION:
raise Exception("invalid system dictionary")
offset += header.storage_size()
grammar = dictionarylib.grammar.Grammar(bytes_, offset)
offset += grammar.get_storage_size()
lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
return buffers, header, grammar, lexicon
def setUp(self):
# Copied from sudachipy.dictionay.Dictionary.read_system_dictionary
test_resources_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
os.pardir,
'resources')
filename = os.path.join(test_resources_dir, 'system.dic')
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
offset = 0
self.header = DictionaryHeader.from_bytes(bytes_, offset)
def setUp(self):
# Copied from sudachipy.dictionay.Dictionary.read_system_dictionary
test_resources_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
os.pardir,
'resources')
filename = os.path.join(test_resources_dir, 'system.dic')
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
header = DictionaryHeader.from_bytes(bytes_, 0)
if header.version != SYSTEM_DICT_VERSION:
raise Exception('invalid system dictionary')
self.lexicon = DoubleArrayLexicon(bytes_, header.storage_size() + 470)
from . import dictionarylib
"""
Copy of sudachipy.dictionary.Dictionary.read_system_dictionary
:param filename:
:return:
"""
import mmap
buffers = []
if filename is None:
raise AttributeError("system dictionary is not specified")
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
buffers.append(bytes_)
offset = 0
header = DictionaryHeader.from_bytes(bytes_, offset)
if header.version != DictionaryVersion.SYSTEM_DICT_VERSION:
raise Exception("invalid system dictionary")
offset += header.storage_size()
grammar = dictionarylib.grammar.Grammar(bytes_, offset)
offset += grammar.get_storage_size()
lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
return buffers, header, grammar, lexicon
def _read_dictionary(filename, access=mmap.ACCESS_READ):
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=access)
offset = 0
header = DictionaryHeader.from_bytes(bytes_, offset)
offset += header.storage_size()
if header.version not in [SYSTEM_DICT_VERSION, USER_DICT_VERSION_1, USER_DICT_VERSION_2]:
raise Exception('invalid dictionary version')
grammar = None
if header.version != USER_DICT_VERSION_1:
grammar = Grammar(bytes_, offset)
offset += grammar.get_storage_size()
lexicon = DoubleArrayLexicon(bytes_, offset)
return bytes_, grammar, header, lexicon
def read_system_dictionary(self, filename):
if filename is None:
raise AttributeError("system dictionary is not specified")
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
self.buffers.append(bytes_)
offset = 0
self.header = dictionarylib.dictionaryheader.DictionaryHeader.from_bytes(bytes_, offset)
if self.header.version != DictionaryVersion.SYSTEM_DICT_VERSION:
raise Exception("invalid system dictionary")
offset += self.header.storage_size()
self.grammar = dictionarylib.grammar.Grammar(bytes_, offset)
offset += self.grammar.get_storage_size()
self.lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
def _command_build(args, print_usage):
_matrix_file_checker(args, print_usage)
_input_files_checker(args, print_usage)
header = DictionaryHeader(
SYSTEM_DICT_VERSION, int(time.time()), args.description)
with open(args.out_file, 'wb') as wf, open(args.matrix_file, 'r') as rf:
wf.write(header.to_bytes())
builder = DictionaryBuilder()
builder.build(args.in_files, rf, wf)
def _command_build(args, print_usage):
_matrix_file_checker(args, print_usage)
_input_files_checker(args, print_usage)
header = DictionaryHeader(
DictionaryVersion.SYSTEM_DICT_VERSION, int(time.time()), args.description)
with open(args.out_file, 'wb') as wf, open(args.matrix_file, 'r') as rf:
wf.write(header.to_bytes())
builder = DictionaryBuilder()
builder.build(args.in_files, rf, wf)
def _command_user_build(args, print_usage):
_system_dic_checker(args, print_usage)
_input_files_checker(args, print_usage)
header = DictionaryHeader(
DictionaryVersion.USER_DICT_VERSION_2, int(time.time()), args.description)
_, _, grammar, system_lexicon = _read_system_dictionary(args.system_dic)
with open(args.out_file, 'wb') as wf:
wf.write(header.to_bytes())
builder = UserDictionaryBuilder(grammar, system_lexicon)
builder.build(args.in_files, None, wf)
def _command_user_build(args, print_usage):
_system_dic_checker(args, print_usage)
_input_files_checker(args, print_usage)
header = DictionaryHeader(
USER_DICT_VERSION_2, int(time.time()), args.description)
dict_ = BinaryDictionary.from_system_dictionary(args.system_dic)
with open(args.out_file, 'wb') as wf:
wf.write(header.to_bytes())
builder = UserDictionaryBuilder(dict_.grammar, dict_.lexicon)
builder.build(args.in_files, None, wf)