Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def from_bytes(self, bytes_data):
"""Deserialize the DocBin's annotations from a bytestring.
bytes_data (bytes): The data to load from.
RETURNS (DocBin): The loaded DocBin.
DOCS: https://spacy.io/api/docbin#from_bytes
"""
msg = srsly.msgpack_loads(zlib.decompress(bytes_data))
self.attrs = msg["attrs"]
self.strings = set(msg["strings"])
lengths = numpy.frombuffer(msg["lengths"], dtype="int32")
flat_spaces = numpy.frombuffer(msg["spaces"], dtype=bool)
flat_tokens = numpy.frombuffer(msg["tokens"], dtype="uint64")
shape = (flat_tokens.size // len(self.attrs), len(self.attrs))
flat_tokens = flat_tokens.reshape(shape)
flat_spaces = flat_spaces.reshape((flat_spaces.size, 1))
self.tokens = NumpyOps().unflatten(flat_tokens, lengths)
self.spaces = NumpyOps().unflatten(flat_spaces, lengths)
self.cats = msg["cats"]
if self.store_user_data and "user_data" in msg:
self.user_data = list(msg["user_data"])
for tokens in self.tokens:
assert len(tokens.shape) == 2, tokens.shape # this should never happen
return self
def from_bytes(bytes_data, setters, exclude):
msg = srsly.msgpack_loads(bytes_data)
for key, setter in setters.items():
# Split to support file names like meta.json
if key.split(".")[0] not in exclude and key in msg:
setter(msg[key])
return msg
def from_bytes(self, bytes_data):
"""Load a table from a bytestring.
bytes_data (bytes): The data to load.
RETURNS (Table): The loaded table.
DOCS: https://spacy.io/api/lookups#table.from_bytes
"""
loaded = srsly.msgpack_loads(bytes_data)
data = loaded.get("dict", {})
self.name = loaded["name"]
self.bloom = BloomFilter().from_bytes(loaded["bloom"])
self.clear()
self.update(data)
return self
def from_bytes(self, bytes_data, **kwargs):
"""Load the lookups from a bytestring.
bytes_data (bytes): The data to load.
RETURNS (Lookups): The loaded Lookups.
DOCS: https://spacy.io/api/lookups#from_bytes
"""
self._tables = OrderedDict()
for key, value in srsly.msgpack_loads(bytes_data).items():
self._tables[key] = Table(key)
self._tables[key].update(value)
return self
def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
msg = srsly.msgpack_loads(bytes_data)
for field in self.serialization_fields:
setattr(self, field, msg[field])
self.finish_deserializing()
return self
def from_bytes(self, bytes_data: bytes) -> Model:
"""Deserialize the model from a bytes representation. Models are usually
serialized using msgpack, so you should be able to call msgpack.loads()
on the data and get back a dictionary with the contents.
Serialization should round-trip identically, i.e. the same bytes should
result from loading and serializing a model.
"""
data = srsly.msgpack_loads(bytes_data)
weights = data[b"weights"]
queue = [self]
i = 0
for layer in queue:
# Hack to support saving/loading PyTorch models. TODO: Improve
if hasattr(layer, "_model") and not isinstance(layer._model, Model):
layer.from_bytes(weights[i])
i += 1
elif hasattr(layer, "_mem"):
for attr, value in weights[i][b"attrs"].items():
layer.set_attr(attr, value)
for dim, value in weights[i][b"dims"].items():
if isinstance(dim, bytes):
dim = dim.decode("utf8")
setattr(layer, dim, value)
for param in weights[i][b"params"]:
def from_bytes(self, bytes_data):
data = srsly.msgpack_loads(bytes_data)
weights = data[b"weights"]
queue = [self]
i = 0
for layer in queue:
# Hack to support saving/loading PyTorch models. TODO: Improve
if hasattr(layer, "_model") and not isinstance(layer._model, Model):
layer.from_bytes(weights[i])
i += 1
elif hasattr(layer, "_mem"):
if b"seed" in weights[i]:
layer.seed = weights[i][b"seed"]
for dim, value in weights[i][b"dims"].items():
if isinstance(dim, bytes):
dim = dim.decode("utf8")
setattr(layer, dim, value)
for param in weights[i][b"params"]:
def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
msg = srsly.msgpack_loads(bytes_data)
for field in self.serialization_fields:
setattr(self, field, msg[field])
self.finish_deserializing()
return self