Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
suffix = HashEmbed(width // 2, nr_vector, column=3)
shape = HashEmbed(width // 2, nr_vector, column=4)
trained_vectors = FeatureExtracter(
[ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
) >> with_flatten(
uniqued(
(lower | prefix | suffix | shape)
>> LN(Maxout(width, width + (width // 2) * 3)),
column=0,
)
)
if pretrained_dims:
static_vectors = SpacyVectors >> with_flatten(
Affine(width, pretrained_dims)
)
# TODO Make concatenate support lists
vectors = concatenate_lists(trained_vectors, static_vectors)
vectors_width = width * 2
else:
vectors = trained_vectors
vectors_width = width
static_vectors = None
tok2vec = vectors >> with_flatten(
LN(Maxout(width, vectors_width))
>> Residual((ExtractWindow(nW=1) >> LN(Maxout(width, width * 3)))) ** depth,
pad=depth,
)
cnn_model = (
tok2vec
>> flatten_add_lengths
def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False, **cfg):
"""
Build a simple CNN text classifier, given a token-to-vector model as inputs.
If exclusive_classes=True, a softmax non-linearity is applied, so that the
outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
is applied instead, so that outputs are in the range [0, 1].
"""
with Model.define_operators({">>": chain}):
if exclusive_classes:
output_layer = Softmax(nr_class, tok2vec.nO)
else:
output_layer = (
zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
)
model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
model.tok2vec = chain(tok2vec, flatten)
model.nO = nr_class
return model
def create_pretraining_model(nlp, tok2vec, objective="basic"):
"""Define a network for the pretraining."""
output_size = nlp.vocab.vectors.data.shape[1]
# This is annoying, but the parser etc have the flatten step after
# the tok2vec. To load the weights in cleanly, we need to match
# the shape of the models' components exactly. So what we cann
# "tok2vec" has to be the same set of processes as what the components do.
with Model.define_operators({">>": chain, "|": concatenate}):
l2r_model = (
tok2vec.l2r
>> flatten
>> LN(Maxout(output_size, tok2vec.l2r.nO, pieces=3))
>> zero_init(Affine(output_size, drop_factor=0.0))
)
r2l_model = (
tok2vec.r2l
>> flatten
>> LN(Maxout(output_size, tok2vec.r2l.nO, pieces=3))
>> zero_init(Affine(output_size, drop_factor=0.0))
)
model = tok2vec.embed >> (l2r_model | r2l_model)
model.tok2vec = tok2vec
model.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
tok2vec.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
tokvecs = tok2vec([nlp.make_doc('hello there'), nlp.make_doc(u'and hello')])
print(tokvecs.shape)
return model
prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
>> MultiHeadedAttention()
>> with_flatten(Maxout(width, width, pieces=3))
)
>> flatten_add_lengths
>> ParametricAttention(width, hard=False)
>> Pooling(mean_pool)
>> Residual(LN(Maxout(width)))
)
model = (
foreach(sent2vec, drop_factor=2.0)
>> Residual(
prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
>> MultiHeadedAttention()
>> with_flatten(LN(Affine(width, width)))
)
>> flatten_add_lengths
>> ParametricAttention(width, hard=False)
>> Pooling(mean_pool)
>> Residual(LN(Maxout(width))) ** 2
>> Softmax(nr_class)
)
model.lsuv = False
return model
def _context_encoder(self, embed_width, article_width, sent_width, hidden_width, end_width):
self.article_encoder = self._encoder(in_width=embed_width, hidden_with=hidden_width, end_width=article_width)
self.sent_encoder = self._encoder(in_width=embed_width, hidden_with=hidden_width, end_width=sent_width)
model = Affine(end_width, article_width+sent_width, drop_factor=0.0)
return model
with_flatten(embed)
>> Residual(
prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
>> MultiHeadedAttention()
>> with_flatten(Maxout(width, width, pieces=3))
)
>> flatten_add_lengths
>> ParametricAttention(width, hard=False)
>> Pooling(mean_pool)
>> Residual(LN(Maxout(width)))
)
model = (
foreach(sent2vec, drop_factor=2.0)
>> Residual(
prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
>> MultiHeadedAttention()
>> with_flatten(LN(Affine(width, width)))
)
>> flatten_add_lengths
>> ParametricAttention(width, hard=False)
>> Pooling(mean_pool)
>> Residual(LN(Maxout(width))) ** 2
>> Softmax(nr_class)
)
model.lsuv = False
return model
def affine_output(output_size, input_size, drop_factor, **cfg):
return Affine(output_size, input_size, drop_factor=drop_factor)
from thinc.v2v import Affine, Model
from thinc.api import chain
from spacy._ml import flatten
from spacy._ml import PrecomputableAffine
from spacy.syntax._parser_model import ParserModel
token_vector_width = tok2vec.nO
tok2vec = chain(tok2vec, flatten)
tok2vec.nO = token_vector_width
lower = PrecomputableAffine(
hidden_width, nF=nr_feature_tokens, nI=tok2vec.nO, nP=maxout_pieces
)
lower.nP = maxout_pieces
with Model.use_device("cpu"):
upper = Affine()
# Initialize weights at zero, as it's a classification layer.
for desc in upper.descriptions.values():
if desc.name == "W":
desc.init = None
return ParserModel(tok2vec, lower, upper)
def init_models(n_tags, n_words, widths):
word_width, tag_width, hidden_width = widths
with Model.define_operators({'|': concatenate, '>>': chain}):
word_model = (
with_flatten(
Embed(word_width, word_width, n_words), pad=0
)
>> BiLSTM(word_width, residual=True)
>> with_flatten(
Affine(hidden_width, word_width*2))
)
state_model = Affine(hidden_width, hidden_width)
tags_model = (
Embed(hidden_width, tag_width, n_tags)
)
output_model = Softmax(n_tags, hidden_width)
return word_model, TaggerModel(tags_model, state_model, output_model)