How to use the thinc.v2v.Model function in thinc

To help you get started, we’ve selected a few thinc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / thinc / examples / imdb_cnn.py View on Github external
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        embed = (
            HashEmbed(width, 5000, column=1)
            | StaticVectors(vectors_name, width, column=5)
            | HashEmbed(width // 2, 750, column=2)
            | HashEmbed(width // 2, 750, column=3)
            | HashEmbed(width // 2, 750, column=4)
        ) >> LN(Maxout(width))

        sent2vec = (
            flatten_add_lengths
            >> with_getitem(
                0,
                embed
                >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width))) ** conv_depth,
            )
            >> ParametricAttention(width)
github explosion / spaCy / spacy / cli / train_from_config.py View on Github external
):
    from thinc.v2v import Affine, Model
    from thinc.api import chain
    from spacy._ml import flatten
    from spacy._ml import PrecomputableAffine
    from spacy.syntax._parser_model import ParserModel

    token_vector_width = tok2vec.nO
    tok2vec = chain(tok2vec, flatten)
    tok2vec.nO = token_vector_width

    lower = PrecomputableAffine(
        hidden_width, nF=nr_feature_tokens, nI=tok2vec.nO, nP=maxout_pieces
    )
    lower.nP = maxout_pieces
    with Model.use_device("cpu"):
        upper = Affine()
    # Initialize weights at zero, as it's a classification layer.
    for desc in upper.descriptions.values():
        if desc.name == "W":
            desc.init = None
    return ParserModel(tok2vec, lower, upper)
github explosion / thinc / examples / imdb_attention.py View on Github external
def main(use_gpu=False, nb_epoch=100):
    fix_random_seed(0)
    if use_gpu:
        require_gpu()
    train, test = datasets.imdb(limit=2000)
    print("Load data")
    train_X, train_y = zip(*train)
    test_X, test_y = zip(*test)
    train_y = Model.ops.asarray(to_categorical(train_y, nb_classes=2))
    test_y = Model.ops.asarray(to_categorical(test_y, nb_classes=2))

    nlp = spacy.load("en_vectors_web_lg")
    nlp.add_pipe(nlp.create_pipe("sentencizer"), first=True)
    register_vectors(Model.ops, nlp.vocab.vectors.name, nlp.vocab.vectors.data)

    preprocessor = FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID])
    train_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(train_X))]
    test_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(test_X))]

    dev_X = train_X[-1000:]
    dev_y = train_y[-1000:]
    train_X = train_X[:-1000]
    train_y = train_y[:-1000]
    print("Parse data")
    n_sent = sum([len(list(sents)) for sents in train_X])
    print("%d sentences" % n_sent)

    model = build_model(
        2, vectors_name=nlp.vocab.vectors.name, width=128, conv_depth=2, depth=2, train_X=train_X, train_y=train_y
    )
github explosion / thinc / examples / imdb_attention.py View on Github external
def main(use_gpu=False, nb_epoch=100):
    fix_random_seed(0)
    if use_gpu:
        require_gpu()
    train, test = datasets.imdb(limit=2000)
    print("Load data")
    train_X, train_y = zip(*train)
    test_X, test_y = zip(*test)
    train_y = Model.ops.asarray(to_categorical(train_y, nb_classes=2))
    test_y = Model.ops.asarray(to_categorical(test_y, nb_classes=2))

    nlp = spacy.load("en_vectors_web_lg")
    nlp.add_pipe(nlp.create_pipe("sentencizer"), first=True)
    register_vectors(Model.ops, nlp.vocab.vectors.name, nlp.vocab.vectors.data)

    preprocessor = FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID])
    train_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(train_X))]
    test_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(test_X))]

    dev_X = train_X[-1000:]
    dev_y = train_y[-1000:]
    train_X = train_X[:-1000]
    train_y = train_y[:-1000]
    print("Parse data")
    n_sent = sum([len(list(sents)) for sents in train_X])
    print("%d sentences" % n_sent)
github explosion / thinc / examples / ngram_bow.py View on Github external
def main(use_gpu=False, nb_epoch=50):
    if use_gpu:
        Model.ops = CupyOps()
        Model.Ops = CupyOps
    train, test = datasets.imdb()
    print("Load data")
    train_X, train_y = zip(*train)
    test_X, test_y = zip(*test)
    train_y = to_categorical(train_y, nb_classes=2)
    test_y = to_categorical(test_y, nb_classes=2)

    nlp = Language()

    dev_X = train_X[-1000:]
    dev_y = train_y[-1000:]
    train_X = train_X[:-1000]
    train_y = train_y[:-1000]
    print("Parse data")
    train_X = [nlp.make_doc(x) for x in train_X]
    dev_X = [nlp.make_doc(x) for x in dev_X]
github explosion / spacy-transformers / spacy_transformers / model_registry.py View on Github external
def softmax(X, drop=0.0):
    ops = Model.ops
    Y = ops.softmax(X)

    def backprop_softmax(dY, sgd=None):
        dX = ops.backprop_softmax(Y, dY)
        return dX

    return Y, backprop_softmax
github explosion / spaCy / spacy / _ml.py View on Github external
def __init__(self, out_sizes, nI=None, **kwargs):
        Model.__init__(self, **kwargs)
        self.out_sizes = out_sizes
        self.nO = sum(out_sizes)
        self.nI = nI
github explosion / thinc / examples / text-pair / glove_mwe_multipool_siamese.py View on Github external
def logistic(X, drop=0.0):
    ops = Model.ops
    y = 1.0 / (1.0 + ops.xp.exp(-X))

    def backward(dy, sgd=None):
        return dy * y * (1 - y)

    return y, backward