Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
embed = (
HashEmbed(width, 5000, column=1)
| StaticVectors(vectors_name, width, column=5)
| HashEmbed(width // 2, 750, column=2)
| HashEmbed(width // 2, 750, column=3)
| HashEmbed(width // 2, 750, column=4)
) >> LN(Maxout(width))
sent2vec = (
flatten_add_lengths
>> with_getitem(
0,
embed
>> Residual(ExtractWindow(nW=1) >> LN(Maxout(width))) ** conv_depth,
)
>> ParametricAttention(width)
):
from thinc.v2v import Affine, Model
from thinc.api import chain
from spacy._ml import flatten
from spacy._ml import PrecomputableAffine
from spacy.syntax._parser_model import ParserModel
token_vector_width = tok2vec.nO
tok2vec = chain(tok2vec, flatten)
tok2vec.nO = token_vector_width
lower = PrecomputableAffine(
hidden_width, nF=nr_feature_tokens, nI=tok2vec.nO, nP=maxout_pieces
)
lower.nP = maxout_pieces
with Model.use_device("cpu"):
upper = Affine()
# Initialize weights at zero, as it's a classification layer.
for desc in upper.descriptions.values():
if desc.name == "W":
desc.init = None
return ParserModel(tok2vec, lower, upper)
def main(use_gpu=False, nb_epoch=100):
fix_random_seed(0)
if use_gpu:
require_gpu()
train, test = datasets.imdb(limit=2000)
print("Load data")
train_X, train_y = zip(*train)
test_X, test_y = zip(*test)
train_y = Model.ops.asarray(to_categorical(train_y, nb_classes=2))
test_y = Model.ops.asarray(to_categorical(test_y, nb_classes=2))
nlp = spacy.load("en_vectors_web_lg")
nlp.add_pipe(nlp.create_pipe("sentencizer"), first=True)
register_vectors(Model.ops, nlp.vocab.vectors.name, nlp.vocab.vectors.data)
preprocessor = FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID])
train_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(train_X))]
test_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(test_X))]
dev_X = train_X[-1000:]
dev_y = train_y[-1000:]
train_X = train_X[:-1000]
train_y = train_y[:-1000]
print("Parse data")
n_sent = sum([len(list(sents)) for sents in train_X])
print("%d sentences" % n_sent)
model = build_model(
2, vectors_name=nlp.vocab.vectors.name, width=128, conv_depth=2, depth=2, train_X=train_X, train_y=train_y
)
def main(use_gpu=False, nb_epoch=100):
fix_random_seed(0)
if use_gpu:
require_gpu()
train, test = datasets.imdb(limit=2000)
print("Load data")
train_X, train_y = zip(*train)
test_X, test_y = zip(*test)
train_y = Model.ops.asarray(to_categorical(train_y, nb_classes=2))
test_y = Model.ops.asarray(to_categorical(test_y, nb_classes=2))
nlp = spacy.load("en_vectors_web_lg")
nlp.add_pipe(nlp.create_pipe("sentencizer"), first=True)
register_vectors(Model.ops, nlp.vocab.vectors.name, nlp.vocab.vectors.data)
preprocessor = FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID])
train_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(train_X))]
test_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(test_X))]
dev_X = train_X[-1000:]
dev_y = train_y[-1000:]
train_X = train_X[:-1000]
train_y = train_y[:-1000]
print("Parse data")
n_sent = sum([len(list(sents)) for sents in train_X])
print("%d sentences" % n_sent)
def main(use_gpu=False, nb_epoch=50):
if use_gpu:
Model.ops = CupyOps()
Model.Ops = CupyOps
train, test = datasets.imdb()
print("Load data")
train_X, train_y = zip(*train)
test_X, test_y = zip(*test)
train_y = to_categorical(train_y, nb_classes=2)
test_y = to_categorical(test_y, nb_classes=2)
nlp = Language()
dev_X = train_X[-1000:]
dev_y = train_y[-1000:]
train_X = train_X[:-1000]
train_y = train_y[:-1000]
print("Parse data")
train_X = [nlp.make_doc(x) for x in train_X]
dev_X = [nlp.make_doc(x) for x in dev_X]
def softmax(X, drop=0.0):
ops = Model.ops
Y = ops.softmax(X)
def backprop_softmax(dY, sgd=None):
dX = ops.backprop_softmax(Y, dY)
return dX
return Y, backprop_softmax
def __init__(self, out_sizes, nI=None, **kwargs):
Model.__init__(self, **kwargs)
self.out_sizes = out_sizes
self.nO = sum(out_sizes)
self.nI = nI
def logistic(X, drop=0.0):
ops = Model.ops
y = 1.0 / (1.0 + ops.xp.exp(-X))
def backward(dy, sgd=None):
return dy * y * (1 - y)
return y, backward