Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def read_datafiles(files):
queries = {}
docs = {}
for file in files:
for line in tqdm(file, desc='loading datafile (by line)', leave=False):
cols = line.rstrip().split('\t')
if len(cols) != 3:
tqdm.write(f'skipping line: `{line.rstrip()}`')
continue
c_type, c_id, c_text = cols
assert c_type in ('query', 'doc')
if c_type == 'query':
queries[c_id] = c_text
if c_type == 'doc':
docs[c_id] = c_text
return queries, docs
prediction_list += [item for item in prediction]
target_list += [item for item in test_target]
bounds = [0, max(target_list) + 20]
plt.scatter(target_list, prediction_list)
plt.title("{} actual vs. predicted".format(target_label))
plt.xlim(bounds)
plt.ylim(bounds)
x = np.linspace(0, bounds[1] + 20, 10000)
plt.plot(x, x, color="black")
plt.ylabel("Cross validated predictions for {}".format(target_label))
plt.xlabel("Actual values for {}".format(target_label))
plt.show()
tqdm.write("RMSE for {}: {}".format(
target_label, sum(rmse_scores) / len(rmse_scores)
))
tqdm.write("R^2 for {}: {}".format(
target_label, sum(r2_scores) / len(r2_scores)
))
sleep(1)
# COMPARE TO MATBENCH
df = load_tehrani_superhard_mat(data="basic_descriptors")
df = df.drop(["formula", "material_id", "shear_modulus",
"initial_structure"], axis=1)
traindf = df.iloc[:floor(.8 * len(df))]
testdf = df.iloc[floor(.8 * len(df)):]
target = "bulk_modulus"
mdp.info.action_space.shape,
**policy_params)
agent = alg(mdp.info, policy, critic_params, **alg_params)
core = Core(agent, mdp)
for it in trange(n_epochs):
core.learn(n_steps=n_steps, n_steps_per_fit=n_steps_per_fit)
dataset = core.evaluate(n_steps=n_step_test, render=False)
J = np.mean(compute_J(dataset, mdp.info.gamma))
R = np.mean(compute_J(dataset))
E = agent.policy.entropy()
tqdm.write('END OF EPOCH ' + str(it))
tqdm.write('J: {}, R: {}, entropy: {}'.format(J, R, E))
tqdm.write('##################################################################################################')
print('Press a button to visualize')
input()
core.evaluate(n_episodes=5, render=True)
# optional hyperparameters. Set to None if not in config file
class_weights = torch.tensor(params['training']['class_weights']) if params['training']['class_weights'] else None
if params['training']['class_weights']:
verify_weights(num_classes, class_weights)
ignore_index = get_key_def('ignore_index', params['training'], -1)
# Loss function
criterion = MultiClassCriterion(loss_type=params['training']['loss_fn'], ignore_index=ignore_index, weight=class_weights)
# Optimizer
opt_fn = params['training']['optimizer']
optimizer = create_optimizer(params=model.parameters(), mode=opt_fn, base_lr=lr, weight_decay=weight_decay)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=step_size, gamma=gamma)
if checkpoint:
tqdm.write(f'Loading checkpoint...')
model, optimizer = load_from_checkpoint(checkpoint, model, optimizer=optimizer)
return model, criterion, optimizer, lr_scheduler
gen_scheduler, dis_scheduler = schedulers
g_lr = gen_scheduler.step(global_steps)
d_lr = dis_scheduler.step(global_steps)
writer.add_scalar('LR/g_lr', g_lr, global_steps)
writer.add_scalar('LR/d_lr', d_lr, global_steps)
# moving average weight
for p, avg_p in zip(gen_net.parameters(), gen_avg_param):
avg_p.mul_(0.999).add_(0.001, p.data)
writer.add_scalar('g_loss', g_loss.item(), global_steps)
gen_step += 1
# verbose
if gen_step and iter_idx % args.print_freq == 0:
tqdm.write(
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]" %
(epoch, args.max_epoch, iter_idx % len(train_loader), len(train_loader), d_loss.item(), g_loss.item()))
writer_dict['train_global_steps'] = global_steps + 1
if epoch % 2 == 0 and step == 1:
#decrease_learning_rate(optimizer, decrease_by=0.03)
decrease_learning_rate(optimizer, decrease_by=0.03)
if epoch % 10 == 0 and step == 1:
tqdm.write("*" * 50)
tqdm.write("Epoch {:3d} step {:3d} loss: {:5.2f}\n".format(epoch, step, loss.data[0]))
seqs, likelihood, _ = Prior.sample(100)
valid = 0
f = open('tran_output.smi', 'a')
for i, seq in enumerate(seqs.cpu().numpy()):
smile = voc.decode(seq)
if Chem.MolFromSmiles(smile):
valid += 1
f.write(smile + "\n")
if i < 10:
tqdm.write(smile)
f.close()
tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs)))
tqdm.write("*" * 50 + "\n")
# Save the Prior
torch.save(Prior.rnn.state_dict(), "data/100_epochs_transfer.ckpt")
# training logic
min_val_loss = 999999
counter = 0
num_epochs = int(num_iters) if self.epoch_scale else 1
num_passes = None if self.epoch_scale else num_iters
for epoch in range(num_epochs):
self._train_one_epoch(model, num_passes, reg_layers)
val_loss = self._validate_one_epoch(model)
if val_loss < min_val_loss:
min_val_loss = val_loss
counter = 0
else:
counter += 1
if counter > self.patience:
tqdm.write("[!] early stopped!!")
model.early_stopped = True
return min_val_loss
if self.batch_hyper:
self.data_loader = None
state = {
'state_dict': model.state_dict(),
'min_val_loss': min_val_loss,
}
self._save_checkpoint(state, model.ckpt_name)
return min_val_loss
def build_dependency_graph(self, connection):
graph = Graph()
tables = get_table_names(connection=connection)
for table in tables:
graph.add_table(table)
for table in tqdm(tables):
tqdm.write(f"Computing joinable tables for {table}...")
joinable_tables = self.find_joinable_tables(table, connection)
tqdm.write("Add results to graph")
for id_column, join_data in joinable_tables.items():
for joinable_table, joinable_columns in join_data:
for joinable_column in joinable_columns:
join_info = (id_column, joinable_column)
graph.add_join(table, joinable_table, join_info)
return graph
def log_training_results(engine):
pbar.refresh()
evaluator.run(train_loader)
metrics = evaluator.state.metrics
avg_accuracy = metrics['accuracy']
avg_nll = metrics['nll']
tqdm.write(
"Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}"
.format(engine.state.epoch, avg_accuracy, avg_nll)
)
continue
pos_id = random.choice(pos_ids)
pos_ids_lookup = set(pos_ids)
pos_ids = set(pos_ids)
neg_ids = [did for did in train_pairs[qid] if did not in pos_ids_lookup]
if len(neg_ids) == 0:
continue
neg_id = random.choice(neg_ids)
query_tok = model.tokenize(ds_queries[qid])
pos_doc = ds_docs.get(pos_id)
neg_doc = ds_docs.get(neg_id)
if pos_doc is None:
tqdm.write(f'missing doc {pos_id}! Skipping')
continue
if neg_doc is None:
tqdm.write(f'missing doc {neg_id}! Skipping')
continue
yield qid, pos_id, query_tok, model.tokenize(pos_doc)
yield qid, neg_id, query_tok, model.tokenize(neg_doc)