Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def server_init(controller, options):
"""
Initialize params for server.
"""
# names and static/fixed info
log_dir_root = logger.get_logger_dir()
model_dir_root = options.model_dir
# Queues.
queue_names, _queues = controller.init_queues()
(q_parent, q_hallu, q_child) = (
controller.q_parent, controller.q_hallu, controller.q_child)
qname_to_pool = {
q_child.name : TRAIN_CRITIC_MODEL,
q_hallu.name : TRAIN_CRITIC_HALLU,
q_parent.name : TRAIN_CRITIC_PARENT}
mi_info = []
if is_debug(options):
prev_log_root = log_dir_root
prev_model_root = model_dir_root
else:
def _before_train(self):
stats = JSONWriter.load_existing_json()
self._fname = os.path.join(logger.get_logger_dir(), JSONWriter.FILENAME)
if stats is not None:
try:
epoch = stats[-1]['epoch_num'] + 1
except Exception:
epoch = None
starting_epoch = self.trainer.loop.starting_epoch
if epoch is None or epoch == starting_epoch:
logger.info("Found existing JSON inside {}, will append to it.".format(logger.get_logger_dir()))
self._stats = stats
else:
logger.warn(
"History epoch value {} from JSON is not the predecessor of the starting_epoch value {}".format(
epoch - 1, starting_epoch))
logger.warn("If you want to resume old training, either use `AutoResumeTrainConfig` "
"or correctly set the starting_epoch yourself to avoid inconsistency. "
"Epoch number will not be automatically loaded by JSONWriter.")
backup_fname = JSONWriter.FILENAME + '.' + datetime.now().strftime('%m%d-%H%M%S')
backup_fname = os.path.join(logger.get_logger_dir(), backup_fname)
logger.warn("Now, we will start training at epoch {} and backup old json to {}".format(
self.trainer.loop.starting_epoch, backup_fname))
shutil.move(self._fname, backup_fname)
self._stats = []
nr_gpu (int) : the numebr of gpu the crawler can use. A round robin schedule is used.
launcher (str) : the name of the launcher, which is used for logger.
n_parallel (int) : max number of parallel jobs. We count the number of n_parallel
using the number of .sh files in the launcher's own log dir. Hence it is IMPERATIVE
for each launched job to remove its own .sh after finishing regardless of sucess/failure.
Bugged xx.sh are copied into xx.sh.fixme to avoid resource leak.
num_init_use_all_gpu (int) : if mi < num_init_use_all_gpu then it will use
all availabel gpu. This is for the initial jobs to be faster
"""
device = -1
while True:
time.sleep(1)
if os.path.exists(auto_dir):
break
logger.info("Found the auto_dir {}".format(auto_dir))
launch_log = logger.get_logger_dir()
# python 2 vs 3 crap
check_errno = False
try:
FileNotFoundError
except NameError:
FileNotFoundError = OSError
check_errno = True
logger.info("Crawler check_errno = {}".format(check_errno))
def _newFileNotFound():
e = FileNotFoundError()
e.errno = errno.ENOENT
return e
def _isFileNotFound(e):
def __init__(self, dump_metadata=False, dump_tracing=True, dump_event=False):
"""
Args:
dump_metadata(bool): Dump :class:`tf.RunMetadata` to be used with tfprof.
dump_tracing(bool): Dump chrome tracing files.
dump_event(bool): Dump to an event processed by FileWriter and
will be shown in TensorBoard.
"""
self._dir = logger.get_logger_dir()
self._dump_meta = bool(dump_metadata)
self._dump_tracing = bool(dump_tracing)
self._dump_event = bool(dump_event)
assert os.path.isdir(self._dir), self._dir
@staticmethod
def load_existing_json():
"""
Look for an existing json under :meth:`logger.get_logger_dir()` named "stats.json",
and return the loaded list of statistics if found. Returns None otherwise.
"""
dir = logger.get_logger_dir()
fname = os.path.join(dir, JSONWriter.FILENAME)
if tf.gfile.Exists(fname):
with open(fname) as f:
stats = json.load(f)
assert isinstance(stats, list), type(stats)
return stats
return None
def _before_train(self):
stats = JSONWriter.load_existing_json()
self._fname = os.path.join(logger.get_logger_dir(), JSONWriter.FILENAME)
if stats is not None:
try:
epoch = stats[-1]['epoch_num'] + 1
except Exception:
epoch = None
# check against the current training settings
# therefore this logic needs to be in before_train stage
starting_epoch = self.trainer.loop.starting_epoch
if epoch is None or epoch == starting_epoch:
logger.info("Found existing JSON inside {}, will append to it.".format(logger.get_logger_dir()))
self._stats = stats
else:
logger.warn(
"History epoch={} from JSON is not the predecessor of the current starting_epoch={}".format(
epoch - 1, starting_epoch))
logger.warn("If you want to resume old training, either use `AutoResumeTrainConfig` "
"or correctly set the new starting_epoch yourself to avoid inconsistency. ")
backup_fname = JSONWriter.FILENAME + '.' + datetime.now().strftime('%m%d-%H%M%S')
backup_fname = os.path.join(logger.get_logger_dir(), backup_fname)
logger.warn("Now, we will train with starting_epoch={} and backup old json to {}".format(
self.trainer.loop.starting_epoch, backup_fname))
shutil.move(self._fname, backup_fname)
# in case we have something to log here.
# check against the current training settings
# therefore this logic needs to be in before_train stage
starting_epoch = self.trainer.loop.starting_epoch
if epoch is None or epoch == starting_epoch:
logger.info("Found existing JSON inside {}, will append to it.".format(logger.get_logger_dir()))
self._stats = stats
else:
logger.warn(
"History epoch={} from JSON is not the predecessor of the current starting_epoch={}".format(
epoch - 1, starting_epoch))
logger.warn("If you want to resume old training, either use `AutoResumeTrainConfig` "
"or correctly set the new starting_epoch yourself to avoid inconsistency. ")
backup_fname = JSONWriter.FILENAME + '.' + datetime.now().strftime('%m%d-%H%M%S')
backup_fname = os.path.join(logger.get_logger_dir(), backup_fname)
logger.warn("Now, we will train with starting_epoch={} and backup old json to {}".format(
self.trainer.loop.starting_epoch, backup_fname))
shutil.move(self._fname, backup_fname)
# in case we have something to log here.
self._trigger()
def __new__(cls, logdir=None, max_queue=10, flush_secs=120, **kwargs):
if logdir is None:
logdir = logger.get_logger_dir()
if logdir is not None:
return super(TFEventWriter, cls).__new__(cls)
else:
logger.warn("logger directory was not set. Ignore TFEventWriter.")
return NoOpMonitor("TFEventWriter")
def __init__(self, logdir=None, max_queue=10, flush_secs=120, split_files=False):
"""
Args:
logdir: ``logger.get_logger_dir()`` by default.
max_queue, flush_secs: Same as in :class:`tf.summary.FileWriter`.
split_files: if True, split events to multiple files rather than
append to a single file. Useful on certain filesystems where append is expensive.
"""
if logdir is None:
logdir = logger.get_logger_dir()
assert tf.gfile.IsDirectory(logdir), logdir
self._logdir = fs.normpath(logdir)
self._max_queue = max_queue
self._flush_secs = flush_secs
self._split_files = split_files
def __init__(self, dump_metadata=False, dump_tracing=True, dump_event=False):
"""
Args:
dump_metadata(bool): Dump :class:`tf.RunMetadata` to be used with tfprof.
dump_tracing(bool): Dump chrome tracing files.
dump_event(bool): Dump to an event processed by FileWriter and
will be shown in TensorBoard.
"""
self._dir = logger.get_logger_dir()
self._dump_meta = bool(dump_metadata)
self._dump_tracing = bool(dump_tracing)
self._dump_event = bool(dump_event)
assert os.path.isdir(self._dir), self._dir