Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
coordinator = CoordinatorP2P(
c10d_backend=c10d_backend,
init_method=rdzv_init_url,
max_num_trainers=max_world_size,
process_group_timeout=60000,
)
state = ImagenetState(
model=model,
params=training_params,
dataset=train_dataset,
num_epochs=training_params.num_epochs,
)
log.info(f"Entering torchelastic train_loop")
torchelastic.train(coordinator, train_step, state)