Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
vals = normalize(inds, vals)
tfidf = torch.sparse.FloatTensor(torch.LongTensor(inds), torch.FloatTensor(vals))
tfidf = tfidf.coalesce()
# Latent word embeddings
emb_dim = 300
glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
weights, word2emb = utils.create_glove_embedding_init(dictionary.idx2word[N:], glove_file)
print('tf-idf stochastic matrix (%d x %d) is generated.' % (tfidf.size(0), tfidf.size(1)))
return tfidf, weights
if __name__=='__main__':
dictionary = Dictionary.load_from_file('data/dictionary.pkl')
tfidf, weights = tfidf_from_questions(['train', 'val', 'test2015'], dictionary)
if __name__=='__main2__':
from torch.utils.data import DataLoader
dictionary = Dictionary.load_from_file('data/dictionary.pkl')
train_dset = VQAFeatureDataset('val', dictionary, adaptive=True)
# name = 'train'
# eval_dset = VQAFeatureDataset(name, dictionary)
# vg_dset = VisualGenomeFeatureDataset(name, eval_dset.features, eval_dset.spatials, dictionary)
# train_loader = DataLoader(vg_dset, 10, shuffle=True, num_workers=1)
loader = DataLoader(train_dset, 10, shuffle=True, num_workers=1, collate_fn=utils.trim_collate)
for i, (v, b, q, a) in enumerate(loader):
print(v.size())
if word not in word2emb:
continue
weights[idx] = word2emb[word]
return weights, word2emb
if __name__ == '__main__':
args = parse_args()
dataroot = 'data' if args.task == 'vqa' else 'data/flickr30k'
dictionary_path = os.path.join(dataroot, 'dictionary.pkl')
d = create_dictionary(dataroot, args.task)
d.dump_to_file(dictionary_path)
d = Dictionary.load_from_file(dictionary_path)
emb_dim = 300
glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
np.save(os.path.join(dataroot, 'glove6b_init_%dd.npy' % emb_dim), weights)
vals = entry.split(' ')
word = vals[0]
vals = list(map(float, vals[1:]))
word2emb[word] = np.array(vals)
for idx, word in enumerate(idx2word):
if word not in word2emb:
continue
weights[idx] = word2emb[word]
return weights, word2emb
if __name__ == '__main__':
d = create_dictionary('data')
d.dump_to_file('data/glove/dictionary.pkl')
d = Dictionary.load_from_file('data/dictionary.pkl')
emb_dim = 300
glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
np.save('/data/glove/glove6b_init_%dd.npy' % emb_dim, weights)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
torch.backends.cudnn.benchmark = True
if args.task == 'vqa':
from train import train
dict_path = 'data/dictionary.pkl'
dictionary = Dictionary.load_from_file(dict_path)
train_dset = VQAFeatureDataset('train', dictionary, adaptive=True)
val_dset = VQAFeatureDataset('val', dictionary, adaptive=True)
w_emb_path = 'data/glove6b_init_300d.npy'
elif args.task == 'flickr':
from train_flickr import train
dict_path = 'data/flickr30k/dictionary.pkl'
dictionary = Dictionary.load_from_file(dict_path)
train_dset = Flickr30kFeatureDataset('train', dictionary)
val_dset = Flickr30kFeatureDataset('val', dictionary)
w_emb_path = 'data/flickr30k/glove6b_init_300d.npy'
args.op = ''
args.gamma = 1
args.tfidf = False
utils.create_dir(args.output)
logger = utils.Logger(os.path.join(args.output, 'args.txt'))
logger.write(args.__repr__())
batch_size = args.batch_size
constructor = 'build_%s' % args.model
model = getattr(base_model, constructor)(train_dset, args.num_hid, args.op, args.gamma, args.task).cuda()
seed = random.randint(1, 10000)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(args.seed)
else:
seed = args.seed
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
torch.backends.cudnn.benchmark = True
output = args.output + args.model + '_' + str(args.num_hid) + '_' + args.activation + '_' + args.optimizer +\
'_D' + str(args.dropout) + '_DL' + str(args.dropout_L) + '_DG' + str(args.dropout_G) + '_DW' + str(args.dropout_W) \
+ '_DC' + str(args.dropout_C) + '_w' + str(args.weight_decay) + '_SD' + str(seed) \
+ '_initializer_' + args.initializer
dictionary = Dictionary.load_from_file('data/dictionary.pkl')
train_dset = VQAFeatureDataset('train', dictionary)
if args.model == 'baseline':
model = build_baseline(train_dset, num_hid=args.num_hid, dropout= args.dropout, norm=args.norm,\
activation=args.activation, dropL=args.dropout_L, dropG=args.dropout_G,\
dropW=args.dropout_W, dropC=args.dropout_C)
elif args.model == 'A1':
model = build_model_A1(train_dset, num_hid=args.num_hid, dropout= args.dropout, norm=args.norm,\
activation=args.activation, dropL=args.dropout_L, dropG=args.dropout_G,\
dropW=args.dropout_W, dropC=args.dropout_C)
elif args.model == 'A2':
model = build_model_A2(train_dset, num_hid=args.num_hid, dropout= args.dropout, norm=args.norm,\
activation=args.activation, dropL=args.dropout_L, dropG=args.dropout_G,\
dropW=args.dropout_W, dropC=args.dropout_C)
elif args.model == 'A3':
model = build_model_A3(train_dset, num_hid=args.num_hid, dropout= args.dropout, norm=args.norm,\
if args.seed != -1:
print("Predefined randam seed %d" % args.seed)
else:
# fix seed
args.seed = random.randint(1, 10000)
print("Choose random seed %d" % args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
if "ban" == args.fusion:
fusion_methods = args.fusion+"_"+str(args.ban_gamma)
else:
fusion_methods = args.fusion
dictionary = Dictionary.load_from_file(
join(args.data_folder, 'glove/dictionary.pkl'))
if args.dataset == "vqa_cp":
coco_train_features = Image_Feature_Loader(
'train', args.relation_type,
adaptive=args.adaptive, dataroot=args.data_folder)
coco_val_features = Image_Feature_Loader(
'val', args.relation_type,
adaptive=args.adaptive, dataroot=args.data_folder)
val_dset = VQA_cp_Dataset(
'test', dictionary, coco_train_features, coco_val_features,
adaptive=args.adaptive, pos_emb_dim=args.imp_pos_emb_dim,
dataroot=args.data_folder)
train_dset = VQA_cp_Dataset(
'train', dictionary, coco_train_features,
coco_val_features, adaptive=args.adaptive,
pos_emb_dim=args.imp_pos_emb_dim,
parser.add_argument('--model', type=str, default='baseline0_newatt')
parser.add_argument('--output', type=str, default='saved_models/exp0')
parser.add_argument('--batch_size', type=int, default=512)
parser.add_argument('--seed', type=int, default=1111, help='random seed')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
torch.backends.cudnn.benchmark = True
dictionary = Dictionary.load_from_file('data/dictionary.pkl')
train_dset = VQAFeatureDataset('train', dictionary)
eval_dset = VQAFeatureDataset('val', dictionary)
batch_size = args.batch_size
constructor = 'build_%s' % args.model
model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
model.w_emb.init_embedding('data/glove6b_init_300d.npy')
model = nn.DataParallel(model).cuda()
train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=1)
eval_loader = DataLoader(eval_dset, batch_size, shuffle=True, num_workers=1)
train(model, train_loader, eval_loader, args.epochs, args.output)