Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
params_saved = os.path.join(output_dir, ckpt_name)
nlp.utils.save_parameters(model, params_saved)
logging.info('params saved in: %s', params_saved)
toc = time.time()
logging.info('Time cost=%.2fs', toc - tic)
tic = toc
if not only_inference:
# we choose the best model based on metric[0],
# assuming higher score stands for better model quality
metric_history.sort(key=lambda x: x[2][0], reverse=True)
epoch_id, metric_nm, metric_val = metric_history[0]
ckpt_name = 'model_bert_{0}_{1}.params'.format(task_name, epoch_id)
params_saved = os.path.join(output_dir, ckpt_name)
nlp.utils.load_parameters(model, params_saved)
metric_str = 'Best model at epoch {}. Validation metrics:'.format(epoch_id)
metric_str += ','.join([i + ':%.4f' for i in metric_nm])
logging.info(metric_str, *metric_val)
# inference on test data
for segment, test_data in test_data_list:
test(test_data, segment)
params_saved = os.path.join(output_dir, ckpt_name)
nlp.utils.save_parameters(model, params_saved)
logging.info('params saved in: %s', params_saved)
toc = time.time()
logging.info('Time cost=%.2fs', toc - tic)
tic = toc
if not args.only_inference:
# we choose the best model based on metric[0],
# assuming higher score stands for better model quality
metric_history.sort(key=lambda x: x[2][0], reverse=True)
epoch_id, metric_nm, metric_val = metric_history[0]
ckpt_name = 'model_xlnet_{0}_{1}.params'.format(
args.task_name, epoch_id)
params_saved = os.path.join(output_dir, ckpt_name)
nlp.utils.load_parameters(model, params_saved)
metric_str = 'Best model at epoch {}. Validation metrics:'.format(
epoch_id + 1)
metric_str += ','.join([i + ':%.4f' for i in metric_nm])
logging.info(metric_str, *metric_val)
# inference on test data
for segment, test_data in test_data_list:
test(test_data, segment)
print('finish test!')
evaluate(dataset_eval, model, nsp_loss, mlm_loss, len(vocab), [ctx],
args.log_interval, args.dtype)
batch_num += 1
if is_master_node:
save_states(step_num, trainer, args.ckpt_dir, local_rank)
if local_rank == 0:
save_parameters(step_num, model, args.ckpt_dir)
mx.nd.waitall()
train_end_time = time.time()
logging.info('Train cost={:.1f}s'.format(train_end_time - train_begin_time))
if __name__ == '__main__':
random_seed = random.randint(0, 1000)
nlp.utils.mkdir(args.ckpt_dir)
ctx = mx.gpu(local_rank)
dataset_name, vocab = args.dataset_name, None
if args.sentencepiece:
logging.info('loading vocab file from sentence piece model: %s', args.sentencepiece)
if args.dataset_name:
warnings.warn('Both --dataset_name and --sentencepiece are provided. '
'The vocabulary will be loaded based on --sentencepiece')
dataset_name = None
vocab = nlp.vocab.BERTVocab.from_sentencepiece(args.sentencepiece)
model, nsp_loss, mlm_loss, vocab = get_model_loss([ctx], args.model, args.pretrained,
dataset_name, vocab, args.dtype,
ckpt_dir=args.ckpt_dir,
start_step=args.start_step)
logging.debug('Model created')
# specific language governing permissions and limitations
# under the License.
import argparse
import time
import math
import os
import sys
import mxnet as mx
from mxnet import gluon, autograd
import gluonnlp as nlp
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path.append(os.path.join(curr_path, '..', '..'))
nlp.utils.check_version('0.7.0')
parser = argparse.ArgumentParser(description=
'MXNet Autograd RNN/LSTM Language Model on Wikitext-2.')
parser.add_argument('--model', type=str, default='lstm',
help='type of recurrent net (rnn_tanh, rnn_relu, lstm, gru)')
parser.add_argument('--emsize', type=int, default=400,
help='size of word embeddings')
parser.add_argument('--nhid', type=int, default=1150,
help='number of hidden units per layer')
parser.add_argument('--nlayers', type=int, default=3,
help='number of layers')
parser.add_argument('--lr', type=float, default=30,
help='initial learning rate')
parser.add_argument('--clip', type=float, default=0.25,
help='gradient clipping')
parser.add_argument('--epochs', type=int, default=750,
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint:disable=missing-docstring
import argparse
import numpy as np
import mxnet as mx
import gluonnlp as nlp
import model # local 'model' module with the addition of GPT-2
nlp.utils.check_version('0.7.1')
parser = argparse.ArgumentParser(description='Generate sentences by beam search. '
'We load a LSTM model that is pre-trained on '
'WikiText as our encoder.')
# beam search sampler options
subparsers = parser.add_subparsers(help='Sequence generation methods.',
dest='command')
subparsers.required = True
beam_search_parser = subparsers.add_parser('beam-search', help='Use beam search for decoding.')
beam_search_parser.add_argument('--alpha', type=float, default=0.0,
help='Alpha in the length penalty term.')
beam_search_parser.add_argument('--k', type=int, default=5, help='K in the length penalty term.')
# random sampler options
random_sample_parser = subparsers.add_parser('random-sample',
import math
import os
import sys
import argparse
import numpy as np
import mxnet as mx
from mxnet import gluon
import gluonnlp as nlp
from gluonnlp.utils import Parallel
from gluonnlp.model.train.language_model import ParallelBigRNN
from sampler import LogUniformSampler
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path.append(os.path.join(curr_path, '..', '..'))
nlp.utils.check_version('0.7.0')
###############################################################################
# Arg parser
###############################################################################
parser = argparse.ArgumentParser(description=
'Gluon-NLP Big LSTM 2048-512 Language Model on GBW')
parser.add_argument('--save', type=str, default='model.params',
help='path to save the final model.')
parser.add_argument('--emsize', type=int, default=512,
help='size of word embeddings')
parser.add_argument('--nhid', type=int, default=2048,
help='number of hidden units per layer')
parser.add_argument('--nproj', type=int, default=512,
help='number of projection units per layer. Could be different from embsize')
parser.add_argument('--nlayers', type=int, default=1,
help='number of layers')
for segment, dev_data in dev_data_list:
metric_nm, metric_val = evaluate(dev_data, metric, segment)
if best_metric is None or metric_val >= best_metric:
best_metric = metric_val
patience = args.early_stop
else:
if args.early_stop is not None:
patience -= 1
metric_history.append((epoch_id, metric_nm, metric_val))
if not only_inference:
# save params
ckpt_name = 'model_bert_{0}_{1}.params'.format(task_name, epoch_id)
params_saved = os.path.join(output_dir, ckpt_name)
nlp.utils.save_parameters(model, params_saved)
logging.info('params saved in: %s', params_saved)
toc = time.time()
logging.info('Time cost=%.2fs', toc - tic)
tic = toc
if not only_inference:
# we choose the best model based on metric[0],
# assuming higher score stands for better model quality
metric_history.sort(key=lambda x: x[2][0], reverse=True)
epoch_id, metric_nm, metric_val = metric_history[0]
ckpt_name = 'model_bert_{0}_{1}.params'.format(task_name, epoch_id)
params_saved = os.path.join(output_dir, ckpt_name)
nlp.utils.load_parameters(model, params_saved)
metric_str = 'Best model at epoch {}. Validation metrics:'.format(epoch_id)
metric_str += ','.join([i + ':%.4f' for i in metric_nm])
logging.info(metric_str, *metric_val)