How to use the thinc.v2v.Affine function in thinc

To help you get started, we’ve selected a few thinc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / spaCy / spacy / _ml.py View on Github external
suffix = HashEmbed(width // 2, nr_vector, column=3)
        shape = HashEmbed(width // 2, nr_vector, column=4)

        trained_vectors = FeatureExtracter(
            [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
        ) >> with_flatten(
            uniqued(
                (lower | prefix | suffix | shape)
                >> LN(Maxout(width, width + (width // 2) * 3)),
                column=0,
            )
        )

        if pretrained_dims:
            static_vectors = SpacyVectors >> with_flatten(
                Affine(width, pretrained_dims)
            )
            # TODO Make concatenate support lists
            vectors = concatenate_lists(trained_vectors, static_vectors)
            vectors_width = width * 2
        else:
            vectors = trained_vectors
            vectors_width = width
            static_vectors = None
        tok2vec = vectors >> with_flatten(
            LN(Maxout(width, vectors_width))
            >> Residual((ExtractWindow(nW=1) >> LN(Maxout(width, width * 3)))) ** depth,
            pad=depth,
        )
        cnn_model = (
            tok2vec
            >> flatten_add_lengths
github explosion / spaCy / spacy / _ml.py View on Github external
def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False, **cfg):
    """
    Build a simple CNN text classifier, given a token-to-vector model as inputs.
    If exclusive_classes=True, a softmax non-linearity is applied, so that the
    outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
    is applied instead, so that outputs are in the range [0, 1].
    """
    with Model.define_operators({">>": chain}):
        if exclusive_classes:
            output_layer = Softmax(nr_class, tok2vec.nO)
        else:
            output_layer = (
                zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
            )
        model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
    model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    return model
github honnibal / spacy-pretrain-polyaxon / lmao-ner / pretrain_lstm.py View on Github external
def create_pretraining_model(nlp, tok2vec, objective="basic"):
    """Define a network for the pretraining."""
    output_size = nlp.vocab.vectors.data.shape[1]
    # This is annoying, but the parser etc have the flatten step after
    # the tok2vec. To load the weights in cleanly, we need to match
    # the shape of the models' components exactly. So what we cann
    # "tok2vec" has to be the same set of processes as what the components do.
    with Model.define_operators({">>": chain, "|": concatenate}):

        l2r_model = (
            tok2vec.l2r
            >> flatten
            >> LN(Maxout(output_size, tok2vec.l2r.nO, pieces=3))
            >> zero_init(Affine(output_size, drop_factor=0.0))
        )
        r2l_model = (
            tok2vec.r2l
            >> flatten
            >> LN(Maxout(output_size, tok2vec.r2l.nO, pieces=3))
            >> zero_init(Affine(output_size, drop_factor=0.0))
        )

        model = tok2vec.embed >> (l2r_model | r2l_model)

    model.tok2vec = tok2vec
    model.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
    tok2vec.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
    tokvecs = tok2vec([nlp.make_doc('hello there'), nlp.make_doc(u'and hello')])
    print(tokvecs.shape)
    return model
github explosion / thinc / examples / imdb_attention.py View on Github external
prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
                >> MultiHeadedAttention()
                >> with_flatten(Maxout(width, width, pieces=3))
            )
            >> flatten_add_lengths
            >> ParametricAttention(width, hard=False)
            >> Pooling(mean_pool)
            >> Residual(LN(Maxout(width)))
        )

        model = (
            foreach(sent2vec, drop_factor=2.0)
            >> Residual(
                prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
                >> MultiHeadedAttention()
                >> with_flatten(LN(Affine(width, width)))
            )
            >> flatten_add_lengths
            >> ParametricAttention(width, hard=False)
            >> Pooling(mean_pool)
            >> Residual(LN(Maxout(width))) ** 2
            >> Softmax(nr_class)
        )
    model.lsuv = False
    return model
github explosion / spaCy / examples / pipeline / wiki_entity_linking / train_el.py View on Github external
def _context_encoder(self, embed_width, article_width, sent_width, hidden_width, end_width):
        self.article_encoder = self._encoder(in_width=embed_width, hidden_with=hidden_width, end_width=article_width)
        self.sent_encoder = self._encoder(in_width=embed_width, hidden_with=hidden_width, end_width=sent_width)

        model = Affine(end_width, article_width+sent_width, drop_factor=0.0)
        return model
github explosion / thinc / examples / imdb_attention.py View on Github external
with_flatten(embed)
            >> Residual(
                prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
                >> MultiHeadedAttention()
                >> with_flatten(Maxout(width, width, pieces=3))
            )
            >> flatten_add_lengths
            >> ParametricAttention(width, hard=False)
            >> Pooling(mean_pool)
            >> Residual(LN(Maxout(width)))
        )

        model = (
            foreach(sent2vec, drop_factor=2.0)
            >> Residual(
                prepare_self_attention(Affine(width*3, width), nM=width, nH=4)
                >> MultiHeadedAttention()
                >> with_flatten(LN(Affine(width, width)))
            )
            >> flatten_add_lengths
            >> ParametricAttention(width, hard=False)
            >> Pooling(mean_pool)
            >> Residual(LN(Maxout(width))) ** 2
            >> Softmax(nr_class)
        )
    model.lsuv = False
    return model
github explosion / spacy-transformers / spacy_transformers / model_registry.py View on Github external
def affine_output(output_size, input_size, drop_factor, **cfg):
    return Affine(output_size, input_size, drop_factor=drop_factor)
github explosion / spaCy / spacy / cli / train_from_config.py View on Github external
from thinc.v2v import Affine, Model
    from thinc.api import chain
    from spacy._ml import flatten
    from spacy._ml import PrecomputableAffine
    from spacy.syntax._parser_model import ParserModel

    token_vector_width = tok2vec.nO
    tok2vec = chain(tok2vec, flatten)
    tok2vec.nO = token_vector_width

    lower = PrecomputableAffine(
        hidden_width, nF=nr_feature_tokens, nI=tok2vec.nO, nP=maxout_pieces
    )
    lower.nP = maxout_pieces
    with Model.use_device("cpu"):
        upper = Affine()
    # Initialize weights at zero, as it's a classification layer.
    for desc in upper.descriptions.values():
        if desc.name == "W":
            desc.init = None
    return ParserModel(tok2vec, lower, upper)
github explosion / thinc / examples / lstm_pos_tagger.py View on Github external
def init_models(n_tags, n_words, widths):
    word_width, tag_width, hidden_width = widths
    with Model.define_operators({'|': concatenate, '>>': chain}):
        word_model = (
            with_flatten(
                Embed(word_width, word_width, n_words), pad=0
            )
            >> BiLSTM(word_width, residual=True)
            >> with_flatten(
                Affine(hidden_width, word_width*2))
        )
        
        state_model = Affine(hidden_width, hidden_width)

        tags_model = (
            Embed(hidden_width, tag_width, n_tags)
        )

        output_model = Softmax(n_tags, hidden_width)
    return word_model, TaggerModel(tags_model, state_model, output_model)