Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
parser = optparse.OptionParser()
parser.add_option("-n", dest="ngram", type="int", help="n-gram", default=7)
parser.add_option("-d", dest="discount", type="float", help="discount parameter of Knerser-Ney", default=0.5)
parser.add_option("-i", dest="numgen", type="int", help="number of texts to generate", default=100)
parser.add_option("-e", dest="encode", help="character code of input file(s)", default='utf-8')
parser.add_option("-o", dest="output", help="output filename", default="generated.txt")
parser.add_option("--seed", dest="seed", type="int", help="random seed")
(opt, args) = parser.parse_args()
numpy.random.seed(opt.seed)
START = u"\u0001"
END = u"\u0002"
ngram = NGram(opt.ngram)
gen = Generator(ngram)
for filename in args:
with codecs.open(filename, "rb", opt.encode) as f:
for s in f:
s = s.strip()
if len(s) == 0: continue
s = START + s + END
gen.start()
for c in s:
gen.inc(c)
D = opt.discount
with codecs.open(opt.output, "wb", "utf-8") as f:
for n in xrange(opt.numgen):
st = START
for i in xrange(1000):
def inc(self, v):
if self.depth <= self.N:
if v not in self:
self[v] = NGram(self.N, self.depth + 1)
self[v].freq += 1
return self[v]
def dump(self):