How to use the pyglossary.text_utils.binStrToInt function in pyglossary

To help you get started, we’ve selected a few pyglossary examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
def openGzip(self):
		with open(self._filename, "rb") as bglFile:
			if not bglFile:
				log.error("file pointer empty: %s", bglFile)
				return False
			b_head = bglFile.read(6)

		if len(b_head) < 6 or not b_head[:4] in (
			b"\x12\x34\x00\x01",
			b"\x12\x34\x00\x02",
		):
			log.error("invalid header: %r", b_head[:6])
			return False

		self.gzipOffset = gzipOffset = binStrToInt(b_head[4:6])
		log.debug("Position of gz header: %s", gzipOffset)

		if gzipOffset < 6:
			log.error("invalid gzip header position: %s", gzipOffset)
			return False

		self.file = BGLGzipFile(
			fileobj=FileOffS(self._filename, gzipOffset),
			closeFileobj=True,
		)

		return True
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
def readEntry_Type11(self, block):
		"""return (succeed, u_word, u_alts, u_defi)"""
		Err = (False, None, None, None)
		pos = 0

		# reading headword
		if pos + 5 > len(block.data):
			log.error(
				"reading block offset=%#.2x" % block.offset +
				", reading word size: pos + 5 > len(block.data)"
			)
			return Err
		wordLen = binStrToInt(block.data[pos:pos+5])
		pos += 5
		if pos + wordLen > len(block.data):
			log.error(
				"reading block offset=%#.2x" % block.offset +
				", block.type=%s" % block.type +
				", reading word: pos + wordLen > len(block.data)"
			)
			return Err
		b_word = block.data[pos:pos+wordLen]
		u_word = self.processKey(b_word)
		pos += wordLen
		self.wordLenMax = max(self.wordLenMax, len(u_word))

		# reading alts and defi
		if pos + 4 > len(block.data):
			log.error(
github ilius / pyglossary / pyglossary / plugins / babylon_bgl.py View on Github external
def read_type_0(self, block):
        x = block.data[0]

        if x==2:
            # this number is vary close to self.bgl_numEntries, but does not always equal to the number of entries
            # see self.read_type_3, x == 12 as well
            num = binStrToInt(block.data[1:])
        elif x==8:
            value = binStrToInt(block.data[1:])
            if value >= 0x41:
                value -= 0x41
                if value < len(self.charsets):
                    self.defaultCharset = self.charsets[value]
                else:
                    log.warning('read_type_0: unknown defaultCharset {0}'.format(value))
        else:
            self.unknownBlock(block)
            return False
        return True
github ilius / pyglossary / pyglossary / plugins / babylon_bgl.py View on Github external
def read_type_3(self, block):
        x = binStrToInt(block.data[0:2])
        pos = 2
        if x==0x01:
            # glossary name
            self.title = block.data[pos:]
        elif x==0x02:
            # glossary author name
            # a list of '|'-separated values
            self.author = block.data[pos:]
        elif x==0x03:
            # glossary author e-mail
            self.email = block.data[pos:]
        elif x==0x04:
            # copyright message
            self.copyright = block.data[pos:]
        elif x==0x07:
            value = binStrToInt(block.data[pos:])
github ilius / pyglossary / pyglossary / plugins / babylon_bgl.py View on Github external
def read_type_3_message(self, block):
        x = binStrToInt(block.data[0:2])
        pos = 2
        y = binStrToInt(block.data[pos:pos+2])
        pos += 2
        if y == 0:
            if len(block.data) != pos:
                log.warning('read_type_3_message: x = {0}. unexpected block size = {1}'.format(
                    x,
                    len(block.data),
                ))
        elif y == 1:
            z = binStrToInt(block.data[pos:pos+4])
            pos += 4
            a = binStrToInt(block.data[pos:pos+2])
            pos += 2
            if a != 0:
                log.warning('read_type_3_message: x = {0}. a = {1} != 0'.format(x, a))
            if 2*z != len(block.data)-pos:
                log.warning('read_type_3_message: x = {0}. z = {1} does not match block size'.format(x, a))
github ilius / pyglossary / pyglossary / plugins / babylon_bgl / bgl_reader.py View on Github external
"b_key = %r:\ntoo few data after \\x1a" % b_key
					)
					return
				fields.b_field_1a = b_defi[i:i+Len]
				i += Len
			elif b_defi[i] == 0x28:  # "\x28" 
				# title with transcription?
				if i + 2 >= len(b_defi):
					log.debug(
						"collecting definition fields, " +
						"b_defi = %r\n" % b_defi +
						"b_key = %r:\ntoo few data after \\x28" % b_key
					)
					return
				i += 1
				Len = binStrToInt(b_defi[i:i+2])
				i += 2
				if Len == 0:
					log.debug(
						"collecting definition fields, " +
						"b_defi = %r\n" % b_defi +
						"b_key = %r:\nblank data after \\x28" % b_key
					)
					continue
				if i+Len > len(b_defi):
					log.debug(
						"collecting definition fields, " +
						"b_defi = %r\n" % b_defi +
						"b_key = %r:\ntoo few data after \\x28" % b_key
					)
					return
				fields.b_title_trans = b_defi[i:i+Len]
github ilius / pyglossary / pyglossary / plugins / babylon_bgl.py View on Github external
elif x==0x0a:
            ## value = 0 - browsing disabled
            ## value = 1 - browsing enabled
            value = block.data[pos]
            browsing_enabled = value != 0
        elif x==0x0b:
            ## Glossary icon
            pass
        elif x==0x0c:
            # this does not always matches the number of entries in the dictionary,
            # but it's close to it.
            # the difference is usually +- 1 or 2, in rare cases may be 9, 29 and more
            self.bgl_numEntries = binStrToInt(block.data[pos:])
        elif x==0x11:
            ## A flag field.
            flags = binStrToInt(block.data[pos:])
            # when this flag is set utf8 encoding is used for all articles
            # when false, the encoding is set according to the source and target alphabet
            self.option_utf8_encoding = (flags & 0x8000) != 0
            # Determines whether the glossary offers spelling alternatives for searched terms
            spelling_alternatives = (flags & 0x10000) == 0
            # defines if the search for terms in this glossary is case sensitive
            # see code 0x20 as well
            case_sensitive = (flags & 0x1000) != 0
        elif x==0x14:## Creation Time
            self.creationTime = decodeBglBinTime(block.data[2:])
        elif x==0x1a:
            value = block.data[2]
            if value >= 0x41:
                value -= 0x41
                if value < len(self.charsets):
                    self.sourceCharset = self.charsets[value]
github ilius / pyglossary / pyglossary / plugins / babylon_bgl.py View on Github external
def readBytes(self, bytes):
        val=0
        if bytes<1 or bytes>4:
            log.error('readBytes: invalid argument bytes {0}'.format(bytes))
            return -1
        self.file.flush()
        buf = self.file.read(bytes)
        if len(buf)==0:
            log.debug('readBytes: end of file: len(buf)==0')
            return -1
        if len(buf)!=bytes:
            log.error('readBytes: to read bytes = {0} , actually read bytes = {1}'.format(bytes, len(buf)))
            return -1
        return binStrToInt(buf)
github ilius / pyglossary / pyglossary / plugins / stardict.py View on Github external
t = b_block[i]
			if not bytes([t]).isalpha():
				return None
			i += 1
			if bytes([t]).islower():
				beg = i
				i = b_block.find(b"\x00", beg)
				if i < 0:
					return None
				res.append((b_block[beg:i], t))
				i += 1
			else:
				assert bytes([t]).isupper()
				if i + 4 > len(b_block):
					return None
				size = binStrToInt(b_block[i:i+4])
				i += 4
				if i + size > len(b_block):
					return None
				res.append((b_block[i:i+size], t))
				i += size
		return res