Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_custom_regex(self):
with self.assertRaises(Exception) as context:
model = markovify.NewlineText('This sentence contains a custom bad character: #.', reject_reg=r'#')
with self.assertRaises(Exception) as context:
model = markovify.NewlineText('This sentence (would normall fail')
model = markovify.NewlineText('This sentence (would normall fail', well_formed = False)
def get_text_model(channel, user, quotes):
if quotes and user:
lines = db.Quote.find(channel=channel, user=user)
elif quotes:
lines = db.Quote.find(channel=channel)
elif user:
lines = db.Message.find(channel=channel, user=user)
else:
lines = (
db.Message.select()
.where(db.Message.channel == channel)
.where(db.Message.user != 'jarvis'))
lines = lines.order_by(db.peewee.fn.Random()).limit(1000)
text = '\n'.join([i.text for i in lines])
return markovify.NewlineText(text)
def command_markov(self, event, entity):
if isinstance(entity, DiscoUser):
q = Message.select().where(Message.author_id == entity.id).limit(500000)
else:
q = Message.select().where(Message.channel_id == entity.id).limit(500000)
text = [msg.content for msg in q]
self.models[entity.id] = markovify.NewlineText('\n'.join(text))
event.msg.reply(u':ok_hand: created markov model for {} using {} messages'.format(entity, len(text)))
c.execute('SELECT text FROM markov_rants')
rants = c.fetchall()
# Normalize the quote data... Get rid of IRC junk
clean_quotes = [normalize_quote(d['quote']) for d in quotes]
# Normalize the inspire data... Just lightly prune authors
clean_inspirations = [normalize_inspiration(d['text']) for d in inspirations]
# Normalize the rant data... just remove ending punctuation
clean_rants = [normalize_rant(d['text']) for d in rants]
# Create the three models, and combine them.
# More heavily weight our quotes and rants
rants_model = markovify.NewlineText('\n'.join(clean_rants))
quotes_model = markovify.NewlineText('\n'.join(clean_quotes))
inspire_model = markovify.NewlineText('\n'.join(clean_inspirations))
return markovify.combine([quotes_model, rants_model, inspire_model], model_weights)
def augment_data(x_data, y_data,augment_size=50):
'''augment the data by markovify to improve the imbalance issue
https://towardsdatascience.com/nlg-for-fun-automated-headlines-generator-6d0459f9588f'''
#To-do, solve the generate None bug
x_dataaug,y_dataaug=[],[]
for label in set(y_data):
label_indices=np.where(y_data==label)
n_samples=len(label_indices[0])
if n_samples
id = str(acc['id'])
try:
since_id = self.scrape_id(id, since=acctjson[id])
except:
since_id = self.scrape_id(id)
acctjson[id] = since_id
with open(acctfile, 'w') as f:
json.dump(acctjson, f)
# generate the whole corpus after scraping so we don't do at every runtime
combined_model = None
for (dirpath, _, filenames) in os.walk(self.corpus_dir_name):
for filename in filenames:
with open(os.path.join(dirpath, filename)) as f:
model = markovify.NewlineText(f, retain_original=False)
if combined_model:
combined_model = markovify.combine(models=[combined_model, model])
else:
combined_model = model
with open(self.model_name,'w') as f:
f.write(combined_model.to_json())
# Fetch iconic FOSS rants
c.execute('SELECT text FROM markov_rants')
rants = c.fetchall()
# Normalize the quote data... Get rid of IRC junk
clean_quotes = [normalize_quote(d['quote']) for d in quotes]
# Normalize the inspire data... Just lightly prune authors
clean_inspirations = [normalize_inspiration(d['text']) for d in inspirations]
# Normalize the rant data... just remove ending punctuation
clean_rants = [normalize_rant(d['text']) for d in rants]
# Create the three models, and combine them.
# More heavily weight our quotes and rants
rants_model = markovify.NewlineText('\n'.join(clean_rants))
quotes_model = markovify.NewlineText('\n'.join(clean_quotes))
inspire_model = markovify.NewlineText('\n'.join(clean_inspirations))
return markovify.combine([quotes_model, rants_model, inspire_model], model_weights)