Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from unittest import mock
from sudachipy.dictionarylib.charactercategory import CharacterCategory
from sudachipy.dictionarylib.grammar import Grammar
mocked_grammar = mock.Mock(spec=Grammar)
mocked_grammar.get_part_of_speech_size.return_value = 0
mocked_grammar.get_part_of_speech_string.return_value = None
mocked_grammar.get_part_of_speech_id.return_value = 0
mocked_grammar.get_connect_cost.return_value = 0
# mocked_grammar.set_connect_cost.return_value = None
mocked_grammar.get_bos_parameter.return_value = None
mocked_grammar.get_eos_parameter.return_value = None
def mocked_get_character_category():
cat = CharacterCategory()
test_resources_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
os.pardir,
'sudachipy',
'resources')
import mmap
from sudachipy import dictionarylib
buffers = []
if filename is None:
raise AttributeError("system dictionary is not specified")
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
buffers.append(bytes_)
offset = 0
header = dictionarylib.dictionaryheader.DictionaryHeader.from_bytes(bytes_, offset)
if header.version != SYSTEM_DICT_VERSION:
raise Exception("invalid system dictionary")
offset += header.storage_size()
grammar = dictionarylib.grammar.Grammar(bytes_, offset)
offset += grammar.get_storage_size()
lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
return buffers, header, grammar, lexicon
def read_system_dictionary(self, filename):
if filename is None:
raise AttributeError("system dictionary is not specified")
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
self.buffers.append(bytes_)
offset = 0
self.header = dictionarylib.dictionaryheader.DictionaryHeader.from_bytes(bytes_, offset)
if self.header.version != DictionaryVersion.SYSTEM_DICT_VERSION:
raise Exception("invalid system dictionary")
offset += self.header.storage_size()
self.grammar = dictionarylib.grammar.Grammar(bytes_, offset)
offset += self.grammar.get_storage_size()
self.lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
def connect_node(self, r_node: LatticeNode) -> None:
begin = r_node.begin
r_node.total_cost = float('inf')
for l_node in self.end_lists[begin]:
if not l_node.is_connected_to_bos:
continue
# right_id and left_id look reversed, but it works ...
connect_cost = self.grammar.get_connect_cost(l_node.right_id, r_node.left_id)
if connect_cost == Grammar.INHIBITED_CONNECTION:
continue
cost = l_node.total_cost + connect_cost
if cost < r_node.total_cost:
r_node.total_cost = cost
r_node.best_previous_node = l_node
r_node.is_connected_to_bos = r_node.best_previous_node is not None
r_node.total_cost += r_node.cost
"""
import mmap
buffers = []
if filename is None:
raise AttributeError("system dictionary is not specified")
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=mmap.ACCESS_READ)
buffers.append(bytes_)
offset = 0
header = DictionaryHeader.from_bytes(bytes_, offset)
if header.version != DictionaryVersion.SYSTEM_DICT_VERSION:
raise Exception("invalid system dictionary")
offset += header.storage_size()
grammar = dictionarylib.grammar.Grammar(bytes_, offset)
offset += grammar.get_storage_size()
lexicon = dictionarylib.lexiconset.LexiconSet(dictionarylib.doublearraylexicon.DoubleArrayLexicon(bytes_, offset))
return buffers, header, grammar, lexicon
def inhibit_connection(grammar: Grammar, left: int, right: int) -> None:
""" Inhibit a connection.
Args:
grammar: grammar of system dictionary
left: right-ID of left node
right: left-ID of right node
"""
grammar.set_connect_cost(left, right, Grammar.INHIBITED_CONNECTION)
def _read_dictionary(filename, access=mmap.ACCESS_READ):
with open(filename, 'r+b') as system_dic:
bytes_ = mmap.mmap(system_dic.fileno(), 0, access=access)
offset = 0
header = DictionaryHeader.from_bytes(bytes_, offset)
offset += header.storage_size()
if header.version not in [SYSTEM_DICT_VERSION, USER_DICT_VERSION_1, USER_DICT_VERSION_2]:
raise Exception('invalid dictionary version')
grammar = None
if header.version != USER_DICT_VERSION_1:
grammar = Grammar(bytes_, offset)
offset += grammar.get_storage_size()
lexicon = DoubleArrayLexicon(bytes_, offset)
return bytes_, grammar, header, lexicon