Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if w not in voca:
voca[w] = len(vocalist)
vocalist.append(w)
doc.append(voca[w])
if len(doc) > 0:
N += len(doc)
doc.append(1) #
docs.append(doc)
D = len(docs)
V = len(vocalist)
print "corpus : %d" % D
print "vocabulary : %d / %d" % (V, N)
print ">> RNNLM(K=%d, b=%f)" % (opt.K, opt.beta)
model = RNNLM(V, K=opt.K)
a = 1.0
b = 0
for i in xrange(opt.I):
if i > opt.I / 2: b = opt.beta
perpl = model.learn(docs, a, b)
print i, perpl, "a=%.3f" % a, b
a = a * 0.95 + 0.01
print opt.I, model.perplexity(docs)
if opt.output:
import cPickle
with open(opt.output, 'wb') as f:
cPickle.dump([model, voca, vocalist], f)
print ">> BIGRAM(alpha=%f)" % opt.alpha
model = BIGRAM(V, opt.alpha)
y = y / y.sum()
log_like -= numpy.log(y[w])
pre_w = w
N += len(doc)
return log_like / N
def clear(self):
self.s = numpy.zeros(self.K)
def dist(self, w):
self.s = 1 / (numpy.exp(- numpy.dot(self.W, self.s) - self.U[:, w]) + 1)
z = numpy.dot(self.V, self.s)
y = numpy.exp(z - z.max())
return y / y.sum()
class RNNLM_BPTT(RNNLM):
"""RNNLM with BackPropagation Through Time"""
def learn(self, docs, alpha=0.1, tau=3):
index = numpy.arange(len(docs))
numpy.random.shuffle(index)
for i in index:
doc = docs[i]
pre_s = [numpy.zeros(self.K)]
pre_w = [0] # <s>
for w in doc:
s = 1 / (numpy.exp(- numpy.dot(self.W, pre_s[-1]) - self.U[:, pre_w[-1]]) + 1)
z = numpy.dot(self.V, s)
y = numpy.exp(z - z.max())
y = y / y.sum()
# calculate errors
y[w] -= 1 # -e0</s>