Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
d_l2r_Zs = []
d_r2l_Zs = []
for dZ in dZs:
l2r_fwd = dZ[:, :nO]
r2l_fwd = dZ[:, nO:]
d_l2r_Zs.append(l2r.ops.xp.ascontiguousarray(l2r_fwd))
d_r2l_Zs.append(l2r.ops.xp.ascontiguousarray(r2l_fwd[::-1]))
dXs_l2r = bp_l2r_Zs(d_l2r_Zs, sgd=sgd)
dXs_r2l = bp_r2l_Zs(d_r2l_Zs, sgd=sgd)
dXs = [dXf + dXb[::-1] for dXf, dXb in zip(dXs_l2r, dXs_r2l)]
return dXs
Zs = [l2r.ops.xp.hstack((Zf, Zb[::-1])) for Zf, Zb in zip(l2r_Zs, r2l_Zs)]
return Zs, birnn_bwd
return wrap(birnn_fwd, l2r, r2l)
def with_cpu(ops, model):
"""Wrap a model that should run on CPU, transferring inputs and outputs
as necessary."""
model.to_cpu()
def with_cpu_forward(inputs, drop=0.0):
cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
gpu_outputs = _to_device(ops, cpu_outputs)
def with_cpu_backprop(d_outputs, sgd=None):
cpu_d_outputs = _to_cpu(d_outputs)
return backprop(cpu_d_outputs, sgd=sgd)
return gpu_outputs, with_cpu_backprop
return wrap(with_cpu_forward, model)
layers = [chain(layer, flatten) for layer in layers]
concat = concatenate(*layers)
def concatenate_lists_fwd(Xs, drop=0.0):
if drop is not None:
drop *= drop_factor
lengths = ops.asarray([len(X) for X in Xs], dtype="i")
flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
ys = ops.unflatten(flat_y, lengths)
def concatenate_lists_bwd(d_ys, sgd=None):
return bp_flat_y(ops.flatten(d_ys), sgd=sgd)
return ys, concatenate_lists_bwd
model = wrap(concatenate_lists_fwd, concat)
return model
d_lh = d_outputs.lh.data
lh_lengths = []
if d_outputs.has_po:
d_po = d_outputs.po.data[po_rows]
po_lengths = [1 for _ in lengths]
else:
d_po = d_outputs.po.data
po_lengths = []
dY = Acts(RaggedArray(d_lh, lh_lengths), RaggedArray(d_po, po_lengths))
dX = get_dX(dY, sgd=sgd)
assert dX is None
return None
return outputs, backprop_batched
return wrap(apply_model_to_batches, model)
lengths = [len(x) for x in Xs]
QKV, get_dX = affine.begin_update(X, drop=drop)
Qs, Ks, Vs = _split_seqs(QKV, lengths, nH, nD)
def qkv_sa_backward(dQs_dKs_dVs, sgd=None):
dQs, dKs, dVs = dQs_dKs_dVs
dQKV = _join_seqs(dQs, dKs, dVs, nH, nD)
dX = get_dX(dQKV, sgd=sgd)
return affine.ops.unflatten(dX, lengths)
if get_mask is not None:
xp = get_array_module(X)
masks = [get_mask(xp, length, length) for length in lengths]
else:
masks = [None for _ in lengths]
return (Qs, Ks, Vs, masks), qkv_sa_backward
return wrap(qkv_sa_forward, affine)
layers = [chain(layer, flatten) for layer in layers]
concat = concatenate(*layers)
def concatenate_lists_fwd(Xs, drop=0.0):
if drop is not None:
drop *= drop_factor
lengths = ops.asarray([len(X) for X in Xs], dtype="i")
flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
ys = ops.unflatten(flat_y, lengths)
def concatenate_lists_bwd(d_ys, sgd=None):
return bp_flat_y(ops.flatten(d_ys), sgd=sgd)
return ys, concatenate_lists_bwd
model = wrap(concatenate_lists_fwd, concat)
return model
def rnn_step_fwd(prevstate_inputs, drop=0.0):
prevstate, inputs = prevstate_inputs
cell_tm1, hidden_tm1 = prevstate
acts, bp_acts = weights.begin_update((inputs, hidden_tm1), drop=drop)
(cells, hiddens), bp_gates = gates.begin_update((acts, cell_tm1), drop=drop)
def rnn_step_bwd(d_state_d_hiddens, sgd=None):
(d_cells, d_hiddens), d_hiddens = d_state_d_hiddens
d_acts, d_cell_tm1 = bp_gates((d_cells, d_hiddens), sgd=sgd)
d_inputs, d_hidden_tm1 = bp_acts(d_acts, sgd=sgd)
return (d_cell_tm1, d_hidden_tm1), d_inputs
return ((cells, hiddens), hiddens), rnn_step_bwd
model = wrap(rnn_step_fwd, weights, gates)
model.nO = weights.nO
model.nI = weights.nI
model.weights = weights
model.gates = gates
return model
#nonlin = preact > 0
#state = preact * nonlin
#state = preact
state = word_feats
scores, bp_scores = output_model.begin_update(state, drop=drop)
def tagger_bwd(d_scores_d_next_state, sgd=None):
d_scores, d_next_state = d_scores_d_next_state
#d_state = d_next_state + bp_scores(d_scores, sgd=sgd)
d_state = bp_scores(d_scores, sgd=sgd)
#d_state *= nonlin
bp_tags(d_state, sgd=sgd)
d_prev_state = bp_state(d_state, sgd=sgd)
return d_prev_state, d_state
return (scores, state), tagger_bwd
model = wrap(tagger_fwd, tags_model, state_model, output_model)
model.nO = output_model.nO
return model