Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self._base_startup_program = clone_program(
fluid.default_startup_program(), for_test=False)
else:
self._base_startup_program = clone_program(
startup_program, for_test=False)
self.is_checkpoint_loaded = False
self._base_compiled_program = None
# run config
self.config = config if config else RunConfig()
self.place = self.places[0]
self.device_count = len(self.places)
if self.config.use_data_parallel:
if not self.config.use_pyreader and self.config.batch_size < self.device_count:
logger.warning(
"Batch size({}) is less than the count of devices({}), which is not allowed in current Paddle versions"
.format(self.config.batch_size, self.device_count))
logger.warning("Batch size automatically adjusted to {}".format(
self.device_count))
self.config._batch_size = self.device_count
self.exe = fluid.Executor(place=self.place)
self.build_strategy = fluid.BuildStrategy()
# log item
if not os.path.exists(self.config.checkpoint_dir):
mkdir(self.config.checkpoint_dir)
tb_log_dir = os.path.join(self.config.checkpoint_dir, "visualization")
self.tb_writer = SummaryWriter(tb_log_dir)
# run environment
def check_module_valid(self, module_path):
try:
desc_pb_path = os.path.join(module_path, 'module_desc.pb')
if os.path.exists(desc_pb_path) and os.path.isfile(desc_pb_path):
info = {}
desc = module_desc_pb2.ModuleDesc()
with open(desc_pb_path, "rb") as fp:
desc.ParseFromString(fp.read())
info['version'] = desc.attr.map.data["module_info"].map.data[
"version"].s
return True, info
else:
logger.warning(
"%s does not exist, the module will be reinstalled" %
desc_pb_path)
except:
pass
return False, None
def _check_paddle_version(self):
if version_compare(self.paddle_version, paddle.__version__):
logger.warning(
"This Module is generated by the PaddlePaddle with version %s, and the local PaddlePaddle version is %s, which may cause serious incompatible bug. Please upgrade PaddlePaddle to the latest version."
% (self.paddle_version, paddle.__version__))
return False
return True
""" Construct finetune Config """
self._log_interval = log_interval
self._eval_interval = eval_interval
self._save_ckpt_interval = save_ckpt_interval
self._use_cuda = use_cuda
self._checkpoint_dir = checkpoint_dir
self._num_epoch = num_epoch
self._batch_size = batch_size
self._use_pyreader = use_pyreader
self._use_data_parallel = use_data_parallel
if strategy is None:
self._strategy = DefaultStrategy()
else:
self._strategy = strategy
if enable_memory_optim:
logger.warning(
"The memory optimization feature has been dropped! PaddleHub now doesn't optimize the memory of the program."
)
self._enable_memory_optim = False
if checkpoint_dir is None:
now = int(time.time())
time_str = time.strftime("%Y%m%d%H%M%S", time.localtime(now))
self._checkpoint_dir = "ckpt_" + time_str
else:
self._checkpoint_dir = checkpoint_dir
logger.info("Checkpoint dir: {}".format(self._checkpoint_dir))
def scheduler_handler(self, max_train_steps):
scheduled_lr = fluid.layers.create_global_var(
shape=[1],
value=self.learning_rate,
dtype='float32',
persistable=True,
name="learning_rate")
if not self.scheduler["slanted_triangle"]["cut_fraction"]:
warmup_steps = int(max_train_steps * self.scheduler["warmup"])
linear_decay_start = int(
max_train_steps * self.scheduler["linear_decay"]["start_point"])
if linear_decay_start < warmup_steps:
logger.warning(
"linear decay can not start during warmup process,"
"it will start after warmup ends!")
linear_decay_start = warmup_steps
if self.scheduler["noam_decay"]:
if warmup_steps > 0:
scheduled_lr = fluid.layers.learning_rate_scheduler \
.noam_decay(1 / (warmup_steps * (self.learning_rate ** 2)),
warmup_steps)
else:
logger.warning(
"Noam decay learning rate scheduler should have positive \
warmup steps, using constant learning rate instead!")
if not self.scheduler["noam_decay"] and \
(warmup_steps > 0 or self.scheduler["linear_decay"]["start_point"]<1):
with self.main_program._lr_schedule_guard():
start_position = char_to_word_offset[answer_offset]
end_position = char_to_word_offset[
answer_offset + answer_length - 1]
# Only add answers where the text can be exactly recovered from the
# document. If this CAN'T happen it's likely due to weird Unicode
# stuff so we will just skip the example.
#
# Note that this means for training mode, every example is NOT
# guaranteed to be preserved.
actual_text = " ".join(
doc_tokens[start_position:(end_position + 1)])
cleaned_answer_text = " ".join(
tokenization.whitespace_tokenize(
orig_answer_text))
if actual_text.find(cleaned_answer_text) == -1:
logger.warning(
"Could not find answer: '%s' vs. '%s'" %
(actual_text, cleaned_answer_text))
continue
else:
start_position = -1
end_position = -1
orig_answer_text = ""
example = SquadExample(
qas_id=qas_id,
question_text=question_text,
doc_tokens=doc_tokens,
orig_answer_text=orig_answer_text,
start_position=start_position,
end_position=end_position,
is_impossible=is_impossible)
if not self.scheduler["slanted_triangle"]["cut_fraction"]:
warmup_steps = int(max_train_steps * self.scheduler["warmup"])
linear_decay_start = int(
max_train_steps * self.scheduler["linear_decay"]["start_point"])
if linear_decay_start < warmup_steps:
logger.warning(
"linear decay can not start during warmup process,"
"it will start after warmup ends!")
linear_decay_start = warmup_steps
if self.scheduler["noam_decay"]:
if warmup_steps > 0:
scheduled_lr = fluid.layers.learning_rate_scheduler \
.noam_decay(1 / (warmup_steps * (self.learning_rate ** 2)),
warmup_steps)
else:
logger.warning(
"Noam decay learning rate scheduler should have positive \
warmup steps, using constant learning rate instead!")
if not self.scheduler["noam_decay"] and \
(warmup_steps > 0 or self.scheduler["linear_decay"]["start_point"]<1):
with self.main_program._lr_schedule_guard():
global_step = lr_scheduler._decay_step_counter()
with control_flow.Switch() as switch:
if warmup_steps > 0:
with switch.case(global_step < warmup_steps):
decayed_lr = self.learning_rate * global_step * 1.0 / warmup_steps
fluid.layers.assign(decayed_lr, scheduled_lr)
if self.scheduler["linear_decay"]["start_point"] < 1:
with switch.case(global_step >= linear_decay_start):
decayed_lr = lr_scheduler.polynomial_decay(
learning_rate=self.learning_rate,
tokenization.whitespace_tokenize(orig_answer_text))
if actual_text.find(cleaned_answer_text) == -1:
drop += 1
# logger.warning((actual_text, " vs ",
# cleaned_answer_text, " in ", qa))
continue
example = CMRC2018Example(
qas_id=qas_id,
question_text=question_text,
doc_tokens=doc_tokens,
orig_answer_text=orig_answer_text,
start_position=start_position,
end_position=end_position)
examples.append(example)
logger.warning("%i bad examples has been dropped" % drop)
return examples
def _check_hub_version(self):
if version_compare(self.hub_version, hub_version):
logger.warning(
"This Module is generated by the PaddleHub with version %s, and the local PaddleHub version is %s, which may cause serious incompatible bug. Please upgrade PaddleHub to the latest version."
% (self.hub_version, hub_version))
return False
return True