Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
else:
h_t = enc_h_t[-1]
c_t = enc_c_t[-1] # (batch, hidden_size)
ctx, _ = pad_packed_sequence(enc_h, batch_first=True)
if args.sub_out == "max":
ctx_max, _ = ctx.max(1)
decoder_init = nn.Tanh()(self.encoder2decoder(ctx_max))
elif args.sub_out == "tanh":
decoder_init = nn.Tanh()(self.encoder2decoder(h_t))
else:
assert False
ctx = self.drop(ctx)
if args.zero_init:
return ctx, torch.zeros_like(decoder_init), torch.zeros_like(c_t)
else:
return ctx, decoder_init, c_t # (batch, seq_len, hidden_size*num_directions)
# (batch, hidden_size)
stats = train_env.get_statistics()
print("The training data_size is : %d" % train_env.size())
print("The average instruction length of the dataset is %0.4f." % (stats['length']))
print("The average action length of the dataset is %0.4f." % (stats['path']))
stats = aug_env.get_statistics()
print("The augmentation data size is %d" % aug_env.size())
print("The average instruction length of the dataset is %0.4f." % (stats['length']))
print("The average action length of the dataset is %0.4f." % (stats['path']))
# Setup the validation data
val_envs = {split: (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split],
tokenizer=tok), Evaluation([split], featurized_scans, tok))
for split in ['train', 'val_seen', 'val_unseen']}
# Start training
train(train_env, tok, args.iters, val_envs=val_envs, aug_env=aug_env)
# Encoder
ctx = self.encoder(can_feats, img_feats, lengths,
already_dropfeat=(featdropmask is not None))
ctx_mask = utils.length2mask(lengths)
# Decoder
words = []
log_probs = []
hidden_states = []
entropies = []
h_t = torch.zeros(1, batch_size, args.rnn_dim).cuda()
c_t = torch.zeros(1, batch_size, args.rnn_dim).cuda()
ended = np.zeros(len(obs), np.bool)
word = np.ones(len(obs), np.int64) * self.tok.word_to_index[''] # First word is
word = torch.from_numpy(word).view(-1, 1).cuda()
for i in range(args.maxDecode):
# Decode Step
logits, h_t, c_t = self.decoder(word, ctx, ctx_mask, h_t, c_t) # Decode, logits: (b, 1, vocab_size)
# Select the word
logits = logits.squeeze() # logits: (b, vocab_size)
logits[:, self.tok.word_to_index['']] = -float("inf") # No in infer
if sampling:
probs = F.softmax(logits, -1)
m = torch.distributions.Categorical(probs)
word = m.sample()
log_prob = m.log_prob(word)
if train:
log_probs.append(log_prob)
hidden_states.append(h_t.squeeze())
entropies.append(m.entropy())
else:
self.model.lxrt_encoder.multi_gpu()
# Losses and optimizer
self.bce_loss = nn.BCEWithLogitsLoss()
self.mce_loss = nn.CrossEntropyLoss(ignore_index=-1)
if 'bert' in args.optim:
batch_per_epoch = len(self.train_tuple.loader)
t_total = int(batch_per_epoch * args.epochs)
print("Total Iters: %d" % t_total)
from lxrt.optimization import BertAdam
self.optim = BertAdam(list(self.model.parameters()),
lr=args.lr,
warmup=0.1,
t_total=t_total)
else:
self.optim = args.optimizer(list(self.model.parameters()), args.lr)
self.output = args.output
os.makedirs(self.output, exist_ok=True)
args.train, bs=args.batch_size, shuffle=True, drop_last=True
)
if args.valid != "":
self.valid_tuple = get_data_tuple(
args.valid, bs=1024,
shuffle=False, drop_last=False
)
else:
self.valid_tuple = None
# Model
self.model = VQAModel(self.train_tuple.dataset.num_answers)
# Load pre-trained weights
if args.load_lxmert is not None:
self.model.lxrt_encoder.load(args.load_lxmert)
if args.load_lxmert_qa is not None:
load_lxmert_qa(args.load_lxmert_qa, self.model,
label2ans=self.train_tuple.dataset.label2ans)
# GPU options
self.model = self.model.cuda()
if args.multiGPU:
self.model.lxrt_encoder.multi_gpu()
# Loss and Optimizer
self.bce_loss = nn.BCEWithLogitsLoss()
if 'bert' in args.optim:
batch_per_epoch = len(self.train_tuple.loader)
t_total = int(batch_per_epoch * args.epochs)
print("BertAdam Total Iters: %d" % t_total)
from lxrt.optimization import BertAdam
def save(self, name):
torch.save(self.model.state_dict(),
os.path.join(args.output, "%s_LXRT.pth" % name))
input_a_t, f_t, candidate_feat, candidate_leng = self.get_input_feat(perm_obs)
if speaker is not None: # Apply the env drop mask to the feat
candidate_feat[..., :-args.angle_feat_size] *= noise
f_t[..., :-args.angle_feat_size] *= noise
h_t, c_t, logit, h1 = self.decoder(input_a_t, f_t, candidate_feat,
h_t, h1, c_t,
ctx, ctx_mask,
already_dropfeat=(speaker is not None))
hidden_states.append(h_t)
# Mask outputs where agent can't move forward
# Here the logit is [b, max_candidate]
candidate_mask = utils.length2mask(candidate_leng)
if args.submit: # Avoding cyclic path
for ob_id, ob in enumerate(perm_obs):
visited[ob_id].add(ob['viewpoint'])
for c_id, c in enumerate(ob['candidate']):
if c['viewpointId'] in visited[ob_id]:
candidate_mask[ob_id][c_id] = 1
logit.masked_fill_(candidate_mask, -float('inf'))
# Supervised training
target = self._teacher_action(perm_obs, ended)
ml_loss += self.criterion(logit, target)
# Determine next model inputs
if self.feedback == 'teacher':
a_t = target # teacher forcing
elif self.feedback == 'argmax':
_, a_t = logit.max(1) # student forcing - argmax
if '.module' in key:
state_dict[key.replace('.module', '')] = state_dict.pop(key)
self.model.load_state_dict(state_dict, strict=False)
if __name__ == "__main__":
# Build Class
gqa = GQA()
# Load Model
if args.load is not None:
gqa.load(args.load)
# Test or Train
if args.test is not None:
args.fast = args.tiny = False # Always loading all data in test
if 'submit' in args.test:
gqa.predict(
get_tuple(args.test, bs=args.batch_size,
shuffle=False, drop_last=False),
dump=os.path.join(args.output, 'submit_predict.json')
)
if 'testdev' in args.test:
result = gqa.evaluate(
get_tuple('testdev', bs=args.batch_size,
shuffle=False, drop_last=False),
dump=os.path.join(args.output, 'testdev_predict.json')
)
print(result)
else:
# print("Train Oracle: %0.2f" % (gqa.oracle_score(gqa.train_tuple) * 100))
print('Splits in Train data:', gqa.train_tuple.dataset.splits)
state_dict = torch.load("%s.pth" % path)
self.model.load_state_dict(state_dict)
if __name__ == "__main__":
# Build Class
nlvr2 = NLVR2()
# Load Model
if args.load is not None:
nlvr2.load(args.load)
# Test or Train
if args.test is not None:
args.fast = args.tiny = False # Always loading all data in test
if 'hidden' in args.test:
nlvr2.predict(
get_tuple(args.test, bs=args.batch_size,
shuffle=False, drop_last=False),
dump=os.path.join(args.output, 'hidden_predict.csv')
)
elif 'test' in args.test or 'valid' in args.test:
result = nlvr2.evaluate(
get_tuple(args.test, bs=args.batch_size,
shuffle=False, drop_last=False),
dump=os.path.join(args.output, '%s_predict.csv' % args.test)
)
print(result)
else:
assert False, "No such test option for %s" % args.test
else:
print('Splits in Train data:', nlvr2.train_tuple.dataset.splits)
def train(self, train_tuple: DataTuple, eval_tuple: DataTuple):
train_ld = train_tuple.loader
# Optimizer
from lxrt.optimization import BertAdam
batch_per_epoch = len(train_ld)
t_total = int(batch_per_epoch * args.epochs)
warmup_ratio = 0.05
warmup_iters = int(t_total * warmup_ratio)
print("Batch per epoch: %d" % batch_per_epoch)
print("Total Iters: %d" % t_total)
print("Warm up Iters: %d" % warmup_iters)
optim = BertAdam(self.model.parameters(), lr=args.lr, warmup=warmup_ratio, t_total=t_total)
# Train
best_eval_loss = 9595.
for epoch in range(args.epochs):
# Train
self.model.train()
total_loss = 0.
total_losses = 0.
uid2ans = {}
for batch in tqdm(train_ld, total=len(train_ld)):
loss, losses, logit = self.train_batch(optim, batch)
total_loss += loss
total_losses += losses
if args.task_qa:
score, label = logit.max(1)