Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
module = hub.Module(name="senta_bilstm")
inputs, outputs, program = module.context(trainable=True)
# Download dataset and use LACClassifyReader to read dataset
dataset = hub.dataset.ChnSentiCorp()
reader = hub.reader.LACClassifyReader(
dataset=dataset, vocab_path=module.get_vocab_path())
sent_feature = outputs["sentence_feature"]
# Setup feed list for data feeder
# Must feed all the tensor of senta's module need
feed_list = [inputs["words"].name]
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_cuda=args.use_gpu,
use_pyreader=False,
use_data_parallel=False,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy())
# Define a classfication finetune task by PaddleHub's API
cls_task = hub.TextClassifierTask(
data_reader=reader,
feature=sent_feature,
feed_list=feed_list,
num_classes=dataset.num_labels,
config=config)
# Construct transfer learning network
# Use "sequence_output" for token-level output.
sequence_output = outputs["sequence_output"]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=False,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a sequence labeling finetune task by PaddleHub's API
# if add crf, the network use crf as decoder
seq_label_task = hub.SequenceLabelTask(
data_reader=reader,
feature=sequence_output,
feed_list=feed_list,
max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels,
config=config,
add_crf=True)
# Must feed all the tensor of ERNIE's module need
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Select finetune strategy, setup config and finetune
strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_proportion,
weight_decay=args.weight_decay,
learning_rate=args.learning_rate)
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
eval_interval=300,
use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=strategy)
# Define a regression finetune task by PaddleHub's API
reg_task = hub.RegressionTask(
data_reader=reader,
feature=pooled_output,
feed_list=feed_list,
config=config)
# Finetune and evaluate by PaddleHub's API
with fluid.program_guard(program):
label = fluid.layers.data(name="label", shape=[1], dtype='int64')
pooled_output = outputs["pooled_output"]
cls_task = hub.create_text_classification_task(
feature=pooled_output, label=label, num_classes=dataset.num_labels)
# Step4
strategy = hub.AdamWeightDecayStrategy(
learning_rate=5e-5,
warmup_proportion=0.1,
warmup_strategy="linear_warmup_decay",
weight_decay=0.01)
config = hub.RunConfig(
use_cuda=True, num_epoch=3, batch_size=32, strategy=strategy)
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
inputs["segment_ids"].name, inputs["input_mask"].name, label.name
]
hub.finetune_and_eval(
task=cls_task, data_reader=reader, feed_list=feed_list, config=config)
# Setup feed list for data feeder
# Must feed all the tensor of module need
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
inputs["segment_ids"].name, inputs["input_mask"].name
]
# Select a finetune strategy
strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_proportion,
weight_decay=args.weight_decay,
learning_rate=args.learning_rate)
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=strategy)
# Define a sequence labeling finetune task by PaddleHub's API
# If add crf, the network use crf as decoder
seq_label_task = hub.SequenceLabelTask(
data_reader=reader,
feature=sequence_output,
feed_list=feed_list,
max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels,
config=config,
# Use ImageClassificationReader to read dataset
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=dataset)
feature_map = output_dict["feature_map"]
# Setup feed list for data feeder
feed_list = [input_dict["image"].name]
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=False,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a reading comprehension finetune task by PaddleHub's API
task = hub.ImageClassifierTask(
data_reader=data_reader,
feed_list=feed_list,
feature=feature_map,
num_classes=dataset.num_labels,
config=config)
data = ["./test/test_img_daisy.jpg", "./test/test_img_roses.jpg"]
label_map = dataset.label_dict()
module = hub.Module(name="senta_bilstm")
inputs, outputs, program = module.context(trainable=True)
# Download dataset and use LACClassifyReader to read dataset
dataset = hub.dataset.ChnSentiCorp()
reader = hub.reader.LACClassifyReader(
dataset=dataset, vocab_path=module.get_vocab_path())
sent_feature = outputs["sentence_feature"]
# Setup feed list for data feeder
# Must feed all the tensor of senta's module need
feed_list = [inputs["words"].name]
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=False,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy())
# Define a classfication finetune task by PaddleHub's API
cls_task = hub.TextClassifierTask(
data_reader=reader,
feature=sent_feature,
feed_list=feed_list,
num_classes=dataset.num_labels,
config=config)
# Data to be predicted
data = ["这家餐厅很好吃", "这部电影真的很差劲"]
# Use ImageClassificationReader to read dataset
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=dataset)
feature_map = output_dict["feature_map"]
# Setup feed list for data feeder
feed_list = [input_dict["image"].name]
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a reading comprehension finetune task by PaddleHub's API
task = hub.ImageClassifierTask(
data_reader=data_reader,
feed_list=feed_list,
feature=feature_map,
num_classes=dataset.num_labels,
config=config)
# Finetune by PaddleHub's API