Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
>> zero_init(Affine(nr_class, width, drop_factor=0.0))
>> logistic
)
return model
lower = HashEmbed(width, nr_vector, column=1)
prefix = HashEmbed(width // 2, nr_vector, column=2)
suffix = HashEmbed(width // 2, nr_vector, column=3)
shape = HashEmbed(width // 2, nr_vector, column=4)
trained_vectors = FeatureExtracter(
[ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
) >> with_flatten(
uniqued(
(lower | prefix | suffix | shape)
>> LN(Maxout(width, width + (width // 2) * 3)),
column=0,
)
)
if pretrained_dims:
static_vectors = SpacyVectors >> with_flatten(
Affine(width, pretrained_dims)
)
# TODO Make concatenate support lists
vectors = concatenate_lists(trained_vectors, static_vectors)
vectors_width = width * 2
else:
vectors = trained_vectors
vectors_width = width
static_vectors = None
tok2vec = vectors >> with_flatten(
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
embed = (
HashEmbed(width, 5000, column=1)
| StaticVectors(vectors_name, width, column=5)
| HashEmbed(width // 2, 750, column=2)
| HashEmbed(width // 2, 750, column=3)
| HashEmbed(width // 2, 750, column=4)
) >> LN(Maxout(width))
sent2vec = (
with_flatten(embed)
>> Residual(
prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
>> MultiHeadedAttention()
>> with_flatten(Maxout(width, width, pieces=3))
)
>> flatten_add_lengths
>> ParametricAttention(width, hard=False)
>> Pooling(mean_pool)
>> Residual(LN(Maxout(width)))
)
model = (
foreach(sent2vec, drop_factor=2.0)
>> Residual(
prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
>> MultiHeadedAttention()
>> with_flatten(LN(Affine(width, width)))
)
>> flatten_add_lengths
>> ParametricAttention(width, hard=False)
cfg = dict(locals())
print(cfg)
train_data, check_data, nr_tag = ancora_pos_tags()
train_data = list(train_data)
check_data = list(check_data)
extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}):
lower_case = HashEmbed(width, 100, column=0)
shape = HashEmbed(width // 2, 200, column=1)
prefix = HashEmbed(width // 2, 100, column=2)
suffix = HashEmbed(width // 2, 100, column=3)
model = (
with_flatten(
(lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)
)
>> PyTorchBiLSTM(width, width, depth)
>> with_flatten(Softmax(nr_tag))
)
train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)
n_train = float(sum(len(x) for x in train_X))
global epoch_train_acc
with model.begin_training(train_X[:10], train_y[:10], **cfg) as (
trainer,
optimizer,
):
trainer.each_epoch.append(track_progress(**locals()))
trainer.batch_size = min_batch_size
)
)
if pretrained_dims:
static_vectors = SpacyVectors >> with_flatten(
Affine(width, pretrained_dims)
)
# TODO Make concatenate support lists
vectors = concatenate_lists(trained_vectors, static_vectors)
vectors_width = width * 2
else:
vectors = trained_vectors
vectors_width = width
static_vectors = None
tok2vec = vectors >> with_flatten(
LN(Maxout(width, vectors_width))
>> Residual((ExtractWindow(nW=1) >> LN(Maxout(width, width * 3)))) ** depth,
pad=depth,
)
cnn_model = (
tok2vec
>> flatten_add_lengths
>> ParametricAttention(width)
>> Pooling(sum_pool)
>> Residual(zero_init(Maxout(width, width)))
>> zero_init(Affine(nr_class, width, drop_factor=0.0))
)
linear_model = build_bow_text_classifier(
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False
)
if cfg.get("exclusive_classes"):
# This is annoying, but the parser etc have the flatten step after
# the tok2vec. To load the weights in cleanly, we need to match
# the shape of the models' components exactly. So what we cann
# "tok2vec" has to be the same set of processes as what the components do.
with Model.define_operators({">>": chain, "|": concatenate}):
l2r_model = (
tok2vec.l2r
>> flatten
>> LN(Maxout(output_size, tok2vec.l2r.nO, pieces=3))
>> zero_init(Affine(output_size, drop_factor=0.0))
)
r2l_model = (
tok2vec.r2l
>> flatten
>> LN(Maxout(output_size, tok2vec.r2l.nO, pieces=3))
>> zero_init(Affine(output_size, drop_factor=0.0))
)
model = tok2vec.embed >> (l2r_model | r2l_model)
model.tok2vec = tok2vec
model.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
tok2vec.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
tokvecs = tok2vec([nlp.make_doc('hello there'), nlp.make_doc(u'and hello')])
print(tokvecs.shape)
return model
def _encoder(in_width, hidden_with, end_width):
conv_depth = 2
cnn_maxout_pieces = 3
with Model.define_operators({">>": chain, "**": clone}):
convolution = Residual((ExtractWindow(nW=1) >>
LN(Maxout(hidden_with, hidden_with * 3, pieces=cnn_maxout_pieces))))
encoder = SpacyVectors \
>> with_flatten(LN(Maxout(hidden_with, in_width)) >> convolution ** conv_depth, pad=conv_depth) \
>> flatten_add_lengths \
>> ParametricAttention(hidden_with)\
>> Pooling(mean_pool) \
>> Residual(zero_init(Maxout(hidden_with, hidden_with))) \
>> zero_init(Affine(end_width, hidden_with, drop_factor=0.0))
# TODO: ReLu or LN(Maxout) ?
# sum_pool or mean_pool ?
return encoder
# (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
# * concatenate (|): Merge the outputs of two models into a single vector,
# i.e. (f|g)(x) -> hstack(f(x), g(x))
Model.lsuv = True
# Model.ops = CupyOps()
with Model.define_operators({">>": chain, "**": clone, "|": concatenate, "+": add}):
mwe_encode = ExtractWindow(nW=1) >> LN(
Maxout(width, drop_factor=0.0, pieces=pieces)
)
sent2vec = (
flatten_add_lengths
>> with_getitem(
0,
(HashEmbed(width, 3000) | StaticVectors("en_vectors_web_lg", width))
>> LN(Maxout(width, width * 2))
>> Residual(mwe_encode) ** depth,
) # : word_ids{T}
>> Pooling(mean_pool, max_pool)
>> Residual(LN(Maxout(width * 2, pieces=pieces), nO=width * 2)) ** 2
>> logistic
)
model = Siamese(sent2vec, CauchySimilarity(width * 2))
print("Read and parse data: %s" % dataset)
if dataset == "quora":
train, dev = datasets.quora_questions()
elif dataset == "snli":
train, dev = datasets.snli()
elif dataset == "stackxc":
train, dev = datasets.stack_exchange()
elif dataset in ("quora+snli", "snli+quora"):
def create_pretraining_model(nlp, tok2vec):
"""Define a network for the pretraining. We simply add an output layer onto
the tok2vec input model. The tok2vec input model needs to be a model that
takes a batch of Doc objects (as a list), and returns a list of arrays.
Each array in the output needs to have one row per token in the doc.
"""
output_size = nlp.vocab.vectors.data.shape[1]
output_layer = chain(
LN(Maxout(300, pieces=3)), Affine(output_size, drop_factor=0.0)
)
# This is annoying, but the parser etc have the flatten step after
# the tok2vec. To load the weights in cleanly, we need to match
# the shape of the models' components exactly. So what we cann
# "tok2vec" has to be the same set of processes as what the components do.
tok2vec = chain(tok2vec, flatten)
model = chain(tok2vec, output_layer)
model = masked_language_model(nlp.vocab, model)
model.tok2vec = tok2vec
model.output_layer = output_layer
model.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
return model
column=cols.index(ORTH),
)
elif char_embed:
embed = concatenate_lists(
CharacterEmbed(nM=64, nC=8),
FeatureExtracter(cols) >> with_flatten(norm),
)
reduce_dimensions = LN(
Maxout(width, 64 * 8 + width, pieces=cnn_maxout_pieces)
)
else:
embed = norm
convolution = Residual(
ExtractWindow(nW=1)
>> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces))
)
if char_embed:
tok2vec = embed >> with_flatten(
reduce_dimensions >> convolution ** conv_depth, pad=conv_depth
)
else:
tok2vec = FeatureExtracter(cols) >> with_flatten(
embed >> convolution ** conv_depth, pad=conv_depth
)
if bilstm_depth >= 1:
tok2vec = tok2vec >> PyTorchBiLSTM(width, width, bilstm_depth)
# Work around thinc API limitations :(. TODO: Revise in Thinc 7
tok2vec.nO = width
tok2vec.embed = embed
return tok2vec