Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def openGzip(self):
with open(self._filename, "rb") as bglFile:
if not bglFile:
log.error("file pointer empty: %s", bglFile)
return False
b_head = bglFile.read(6)
if len(b_head) < 6 or not b_head[:4] in (
b"\x12\x34\x00\x01",
b"\x12\x34\x00\x02",
):
log.error("invalid header: %r", b_head[:6])
return False
self.gzipOffset = gzipOffset = binStrToInt(b_head[4:6])
log.debug("Position of gz header: %s", gzipOffset)
if gzipOffset < 6:
log.error("invalid gzip header position: %s", gzipOffset)
return False
self.file = BGLGzipFile(
fileobj=FileOffS(self._filename, gzipOffset),
closeFileobj=True,
)
return True
def readEntry_Type11(self, block):
"""return (succeed, u_word, u_alts, u_defi)"""
Err = (False, None, None, None)
pos = 0
# reading headword
if pos + 5 > len(block.data):
log.error(
"reading block offset=%#.2x" % block.offset +
", reading word size: pos + 5 > len(block.data)"
)
return Err
wordLen = binStrToInt(block.data[pos:pos+5])
pos += 5
if pos + wordLen > len(block.data):
log.error(
"reading block offset=%#.2x" % block.offset +
", block.type=%s" % block.type +
", reading word: pos + wordLen > len(block.data)"
)
return Err
b_word = block.data[pos:pos+wordLen]
u_word = self.processKey(b_word)
pos += wordLen
self.wordLenMax = max(self.wordLenMax, len(u_word))
# reading alts and defi
if pos + 4 > len(block.data):
log.error(
def read_type_0(self, block):
x = block.data[0]
if x==2:
# this number is vary close to self.bgl_numEntries, but does not always equal to the number of entries
# see self.read_type_3, x == 12 as well
num = binStrToInt(block.data[1:])
elif x==8:
value = binStrToInt(block.data[1:])
if value >= 0x41:
value -= 0x41
if value < len(self.charsets):
self.defaultCharset = self.charsets[value]
else:
log.warning('read_type_0: unknown defaultCharset {0}'.format(value))
else:
self.unknownBlock(block)
return False
return True
def read_type_3(self, block):
x = binStrToInt(block.data[0:2])
pos = 2
if x==0x01:
# glossary name
self.title = block.data[pos:]
elif x==0x02:
# glossary author name
# a list of '|'-separated values
self.author = block.data[pos:]
elif x==0x03:
# glossary author e-mail
self.email = block.data[pos:]
elif x==0x04:
# copyright message
self.copyright = block.data[pos:]
elif x==0x07:
value = binStrToInt(block.data[pos:])
def read_type_3_message(self, block):
x = binStrToInt(block.data[0:2])
pos = 2
y = binStrToInt(block.data[pos:pos+2])
pos += 2
if y == 0:
if len(block.data) != pos:
log.warning('read_type_3_message: x = {0}. unexpected block size = {1}'.format(
x,
len(block.data),
))
elif y == 1:
z = binStrToInt(block.data[pos:pos+4])
pos += 4
a = binStrToInt(block.data[pos:pos+2])
pos += 2
if a != 0:
log.warning('read_type_3_message: x = {0}. a = {1} != 0'.format(x, a))
if 2*z != len(block.data)-pos:
log.warning('read_type_3_message: x = {0}. z = {1} does not match block size'.format(x, a))
"b_key = %r:\ntoo few data after \\x1a" % b_key
)
return
fields.b_field_1a = b_defi[i:i+Len]
i += Len
elif b_defi[i] == 0x28: # "\x28"
# title with transcription?
if i + 2 >= len(b_defi):
log.debug(
"collecting definition fields, " +
"b_defi = %r\n" % b_defi +
"b_key = %r:\ntoo few data after \\x28" % b_key
)
return
i += 1
Len = binStrToInt(b_defi[i:i+2])
i += 2
if Len == 0:
log.debug(
"collecting definition fields, " +
"b_defi = %r\n" % b_defi +
"b_key = %r:\nblank data after \\x28" % b_key
)
continue
if i+Len > len(b_defi):
log.debug(
"collecting definition fields, " +
"b_defi = %r\n" % b_defi +
"b_key = %r:\ntoo few data after \\x28" % b_key
)
return
fields.b_title_trans = b_defi[i:i+Len]
elif x==0x0a:
## value = 0 - browsing disabled
## value = 1 - browsing enabled
value = block.data[pos]
browsing_enabled = value != 0
elif x==0x0b:
## Glossary icon
pass
elif x==0x0c:
# this does not always matches the number of entries in the dictionary,
# but it's close to it.
# the difference is usually +- 1 or 2, in rare cases may be 9, 29 and more
self.bgl_numEntries = binStrToInt(block.data[pos:])
elif x==0x11:
## A flag field.
flags = binStrToInt(block.data[pos:])
# when this flag is set utf8 encoding is used for all articles
# when false, the encoding is set according to the source and target alphabet
self.option_utf8_encoding = (flags & 0x8000) != 0
# Determines whether the glossary offers spelling alternatives for searched terms
spelling_alternatives = (flags & 0x10000) == 0
# defines if the search for terms in this glossary is case sensitive
# see code 0x20 as well
case_sensitive = (flags & 0x1000) != 0
elif x==0x14:## Creation Time
self.creationTime = decodeBglBinTime(block.data[2:])
elif x==0x1a:
value = block.data[2]
if value >= 0x41:
value -= 0x41
if value < len(self.charsets):
self.sourceCharset = self.charsets[value]
def readBytes(self, bytes):
val=0
if bytes<1 or bytes>4:
log.error('readBytes: invalid argument bytes {0}'.format(bytes))
return -1
self.file.flush()
buf = self.file.read(bytes)
if len(buf)==0:
log.debug('readBytes: end of file: len(buf)==0')
return -1
if len(buf)!=bytes:
log.error('readBytes: to read bytes = {0} , actually read bytes = {1}'.format(bytes, len(buf)))
return -1
return binStrToInt(buf)
t = b_block[i]
if not bytes([t]).isalpha():
return None
i += 1
if bytes([t]).islower():
beg = i
i = b_block.find(b"\x00", beg)
if i < 0:
return None
res.append((b_block[beg:i], t))
i += 1
else:
assert bytes([t]).isupper()
if i + 4 > len(b_block):
return None
size = binStrToInt(b_block[i:i+4])
i += 4
if i + size > len(b_block):
return None
res.append((b_block[i:i+size], t))
i += size
return res