Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def predict(logger, args):
"""do inference on the test dataset """
logger.info('Load data_set and vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
vocab = pickle.load(fin)
logger.info('vocab size is {} and embed dim is {}'.format(vocab.size(
), vocab.embed_dim))
brc_data = BRCDataset(
args.max_p_num, args.max_p_len, args.max_q_len, dev_files=args.testset)
logger.info('Converting text into ids...')
brc_data.convert_to_ids(vocab)
logger.info('Initialize the model...')
# build model
main_program = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_program, startup_prog):
with fluid.unique_name.guard():
avg_cost, s_probs, e_probs, match, feed_order = rc_model.rc_model(
args.hidden_size, vocab, args)
# initialize parameters
if not args.use_gpu:
place = fluid.CPUPlace()
dev_count = int(
def evaluate(args):
"""
evaluate the trained model on dev files
"""
logger = logging.getLogger("brc")
logger.info('Load data_set and vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
vocab = pickle.load(fin)
assert len(args.dev_files) > 0, 'No dev files are provided.'
brc_data = BRCDataset(args.max_p_num, args.max_p_len, args.max_q_len, dev_files=args.dev_files)
logger.info('Converting text into ids...')
brc_data.convert_to_ids(vocab)
logger.info('Restoring the model...')
rc_model = RCModel(vocab, args)
rc_model.restore(model_dir=args.model_dir, model_prefix=args.algo)
logger.info('Evaluating the model on dev set...')
dev_batches = brc_data.gen_mini_batches('dev', args.batch_size,
pad_id=vocab.get_id(vocab.pad_token), shuffle=False)
dev_loss, dev_bleu_rouge = rc_model.evaluate(
dev_batches, result_dir=args.result_dir, result_prefix='dev.predicted')
logger.info('Loss on dev set: {}'.format(dev_loss))
logger.info('Result on dev set: {}'.format(dev_bleu_rouge))
logger.info('Predicted answers are saved to {}'.format(os.path.join(args.result_dir)))
def predict(args):
"""
predicts answers for test files
"""
logger = logging.getLogger("brc")
logger.info('Load data_set and vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
vocab = pickle.load(fin)
assert len(args.test_files) > 0, 'No test files are provided.'
brc_data = BRCDataset(args.max_p_num, args.max_p_len, args.max_q_len,
test_files=args.test_files)
logger.info('Converting text into ids...')
brc_data.convert_to_ids(vocab)
logger.info('Restoring the model...')
rc_model = RCModel(vocab, args)
rc_model.restore(model_dir=args.model_dir, model_prefix=args.algo)
logger.info('Predicting answers for test set...')
test_batches = brc_data.gen_mini_batches('test', args.batch_size,
pad_id=vocab.get_id(vocab.pad_token), shuffle=False)
rc_model.evaluate(test_batches,
result_dir=args.result_dir, result_prefix='test.predicted')
def train(args):
"""
trains the reading comprehension model
"""
logger = logging.getLogger("brc")
logger.info('Load data_set and vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.pkl'), 'rb') as fin:
vocab = pickle.load(fin)
fin.close()
brc_data = BRCDataset(args.max_p_num, args.max_p_len, args.max_q_len,
args.train_files, args.dev_files)
logger.info('Converting text into ids...')
brc_data.convert_to_ids(vocab)
logger.info('Initialize the model...')
rc_model = RCModel(vocab, args)
logger.info('Training the model...')
model_saver = rc_model.train(brc_data, args.epochs, args.batch_size, save_dir=args.model_dir,
save_prefix=args.algo,
dropout_keep_prob=args.dropout_keep_prob)
logger.info('Done with model training!')
return model_saver
def predict(args, model_saver):
"""
predicts answers for test files
"""
logger = logging.getLogger("brc")
logger.info('Load data_set and vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.pkl'), 'rb') as fin:
vocab = pickle.load(fin)
fin.close()
assert len(args.test_files) > 0, 'No test files are provided.'
brc_data = BRCDataset(args.max_p_num, args.max_p_len, args.max_q_len,
test_files=args.test_files)
logger.info('Converting text into ids...')
brc_data.convert_to_ids(vocab)
logger.info('Restoring the model...')
rc_model = RCModel(vocab, args, model_saver)
rc_model.restore(model_dir=args.model_dir, model_prefix=args.algo)
logger.info('Predicting answers for test set...')
test_batches = brc_data.gen_mini_batches('test', args.batch_size,
pad_id=vocab.get_id(vocab.pad_token), shuffle=False)
rc_model.evaluate(test_batches,
result_dir=args.result_dir, result_prefix='test.predicted')
def predict(logger, args):
"""do inference on the test dataset """
logger.info('Load data_set and vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
vocab = pickle.load(fin)
logger.info('vocab size is {} and embed dim is {}'.format(vocab.size(
), vocab.embed_dim))
brc_data = BRCDataset(
args.max_p_num, args.max_p_len, args.max_q_len, dev_files=args.testset)
logger.info('Converting text into ids...')
brc_data.convert_to_ids(vocab)
logger.info('Initialize the model...')
# build model
main_program = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_program, startup_prog):
with fluid.unique_name.guard():
avg_cost, s_probs, e_probs, match, feed_order = rc_model.rc_model(
args.hidden_size, vocab, args)
# initialize parameters
if not args.use_gpu:
place = fluid.CPUPlace()
dev_count = int(
def train(logger, args):
"""train a model"""
logger.info('Load data_set and vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
if six.PY2:
vocab = pickle.load(fin)
else:
vocab = pickle.load(fin, encoding='bytes')
logger.info('vocab size is {} and embed dim is {}'.format(vocab.size(
), vocab.embed_dim))
brc_data = BRCDataset(args.max_p_num, args.max_p_len, args.max_q_len,
args.trainset, args.devset)
logger.info('Converting text into ids...')
brc_data.convert_to_ids(vocab)
logger.info('Initialize the model...')
if not args.use_gpu:
place = fluid.CPUPlace()
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
else:
place = fluid.CUDAPlace(0)
dev_count = fluid.core.get_cuda_device_count()
# build model
main_program = fluid.Program()
startup_prog = fluid.Program()
if args.enable_ce:
def prepare(logger, args):
"""
checks data, creates the directories, prepare the vocabulary and embeddings
"""
logger.info('Checking the data files...')
for data_path in args.trainset + args.devset + args.testset:
assert os.path.exists(data_path), '{} file does not exist.'.format(
data_path)
logger.info('Preparing the directories...')
for dir_path in [args.vocab_dir, args.save_dir, args.result_dir]:
if not os.path.exists(dir_path):
os.makedirs(dir_path)
logger.info('Building vocabulary...')
brc_data = BRCDataset(args.max_p_num, args.max_p_len, args.max_q_len,
args.trainset, args.devset, args.testset)
vocab = Vocab(lower=True)
for word in brc_data.word_iter('train'):
vocab.add(word)
unfiltered_vocab_size = vocab.size()
vocab.filter_tokens_by_cnt(min_cnt=2)
filtered_num = unfiltered_vocab_size - vocab.size()
logger.info('After filter {} tokens, the final vocab size is {}'.format(
filtered_num, vocab.size()))
logger.info('Assigning embeddings...')
vocab.randomly_init_embeddings(args.embed_size)
logger.info('Saving vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.data'), 'wb') as fout:
def predict(logger, args):
"""do inference on the test dataset """
logger.info('Load data_set and vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
vocab = pickle.load(fin)
logger.info('vocab size is {} and embed dim is {}'.format(vocab.size(
), vocab.embed_dim))
brc_data = BRCDataset(
args.max_p_num, args.max_p_len, args.max_q_len, dev_files=args.testset)
logger.info('Converting text into ids...')
brc_data.convert_to_ids(vocab)
logger.info('Initialize the model...')
# build model
main_program = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_program, startup_prog):
with fluid.unique_name.guard():
avg_cost, s_probs, e_probs, match, feed_order = rc_model.rc_model(
args.hidden_size, vocab, args)
# initialize parameters
if not args.use_gpu:
place = fluid.CPUPlace()
dev_count = int(
def predict(logger, args):
logger.info('Load data_set and vocab...')
with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
vocab = pickle.load(fin)
logger.info('vocab size is {} and embed dim is {}'.format(vocab.size(
), vocab.embed_dim))
brc_data = BRCDataset(
args.max_p_num, args.max_p_len, args.max_q_len, dev_files=args.testset)
logger.info('Converting text into ids...')
brc_data.convert_to_ids(vocab)
logger.info('Initialize the model...')
# build model
main_program = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_program, startup_prog):
with fluid.unique_name.guard():
avg_cost, s_probs, e_probs, match, feed_order = rc_model.rc_model(
args.hidden_size, vocab, args)
# initialize parameters
if not args.use_gpu:
place = fluid.CPUPlace()
dev_count = int(