Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Options loaded from default.py will be overridden by options loaded from cfg file
Options passed in through options argument will override option loaded from cfg file
Args:
*options (str,int ,optional): Options used to overide what is loaded from the config.
To see what options are available consult default.py
cfg (str, optional): Location of config file to load. Defaults to None.
"""
update_config(config, options=options, config_file=cfg)
hvd.init()
silence_other_ranks = True
logging.config.fileConfig(config.LOG_CONFIG)
logger = logging.getLogger(__name__)
torch.manual_seed(config.SEED)
torch.cuda.set_device(hvd.local_rank())
torch.cuda.manual_seed(config.SEED)
rank, world_size = hvd.rank(), hvd.size()
scheduler_step = config.TRAIN.END_EPOCH // config.TRAIN.SNAPSHOTS
torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK
torch.manual_seed(config.SEED)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(config.SEED)
np.random.seed(seed=config.SEED)
# Setup Augmentations
basic_aug = Compose(
[
Normalize(
mean=(config.TRAIN.MEAN,),
std=(config.TRAIN.STD,),
max_pixel_value=1,
if not _v2_api:
return
hvd.init()
rank = hvd.rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
dims = [17] * 3
tensor = torch.FloatTensor(*dims)
if rank == 0:
ret = hvd.join(hvd.local_rank())
else:
try:
broadcasted_tensor = hvd.broadcast(tensor, 1)
assert False, 'hvd.broadcast did not throw error'
except (torch.FatalError, RuntimeError):
pass
if torch.cuda.is_available():
ret = hvd.join(hvd.local_rank())
else:
ret = hvd.join()
def main():
logger = _get_logger()
if _DISTRIBUTED:
# Horovod: initialize Horovod.
hvd.init()
logger.info("Runnin Distributed")
torch.manual_seed(_SEED)
# Horovod: pin GPU to local rank.
torch.cuda.set_device(hvd.local_rank())
torch.cuda.manual_seed(_SEED)
logger.info("PyTorch version {}".format(torch.__version__))
if _FAKE:
logger.info("Setting up fake loaders")
train_dataset = FakeData(n_classes=1000, data_transform=torch.FloatTensor)
else:
normalize = transforms.Normalize(_RGB_MEAN, _RGB_SD)
train_X, train_y, valid_X, valid_y = _create_data_fn(os.getenv('AZ_BATCHAI_INPUT_TRAIN'), os.getenv('AZ_BATCHAI_INPUT_TEST'))
logger.info("Setting up loaders")
train_dataset = ImageNet(
train_X,
train_y,
overwrite_model (bool, optional): Whether to overwrite an existing model.
If `cache_dir` and `load_model_from_dir` are the same and
`overwrite_model` is `False`, the fitted model is saved to
"cache_dir/fine_tuned". Defaults to False.
"""
# dist
step_per_log = 100
is_master = False
if distributed:
hvd.init()
# run = Run.get_context()
rank = hvd.rank()
local_rank = hvd.local_rank()
world_size = hvd.size()
# torch.distributed.init_process_group(
# backend="nccl",
# init_method="tcp://127.0.0.1:6000",
# world_size=world_size,
# rank=local_rank,
# )
torch.cuda.set_device(local_rank)
device = torch.device("cuda", local_rank)
is_master = rank == 0
self.cache_dir = self.cache_dir + "/distributed_" + str(rank)
self.model = self.model.to(device)
# Experimental options
ap.add_argument("--opt_fp16", action="store_true")
# Paths
ap.add_argument("--model_path",
default="{}/lanmt.pt".format(DATA_ROOT))
ap.add_argument("--result_path",
default="{}/lanmt.result".format(DATA_ROOT))
OPTS.parse(ap)
# Determine the number of GPUs to use
horovod_installed = importlib.util.find_spec("horovod") is not None
if torch.cuda.is_available() and horovod_installed:
import horovod.torch as hvd
hvd.init()
torch.cuda.set_device(hvd.local_rank())
part_index = hvd.rank()
part_num = hvd.size()
gpu_num = hvd.size()
else:
part_index = 0
part_num = 1
gpu_num = 1
if is_root_node():
print("Running on {} GPUs".format(gpu_num))
# Get the path variables
(
train_src_corpus,
train_tgt_corpus,
distilled_tgt_corpus,
truncate_datapoints,
def train_and_eval(tag, dataroot, test_ratio=0.0, cv_fold=0, reporter=None, metric='last', save_path=None, only_eval=False, horovod=False):
if horovod:
import horovod.torch as hvd
hvd.init()
device = torch.device('cuda', hvd.local_rank())
torch.cuda.set_device(device)
if not reporter:
reporter = lambda **kwargs: 0
max_epoch = C.get()['epoch']
trainsampler, trainloader, validloader, testloader_ = get_dataloaders(C.get()['dataset'], C.get()['batch'], dataroot, test_ratio, split_idx=cv_fold, horovod=horovod)
# create a model & an optimizer
model = get_model(C.get()['model'], num_class(C.get()['dataset']), data_parallel=(not horovod))
criterion = nn.CrossEntropyLoss()
if C.get()['optimizer']['type'] == 'sgd':
optimizer = optim.SGD(
model.parameters(),
lr=C.get()['lr'],
import torch.optim as optim
from utils_nlp.gensen.multi_task_model import MultitaskModel
from utils_nlp.gensen.utils import (
BufferedDataIterator,
NLIIterator,
compute_validation_loss,
)
cudnn.benchmark = True
logger = logging.getLogger(__name__)
hvd.init()
if torch.cuda.is_available():
# Horovod: pin GPU to local rank.
torch.cuda.set_device(hvd.local_rank())
def metric_average(value, name):
"""
Sync the validation loss with nodes.
:param value:
:param name:
:return:
"""
tensor = torch.tensor(value)
avg_tensor = hvd.allreduce(tensor, name=name)
return avg_tensor.item()
def setup_horovod(model, learning_rate):
""" Setup for Horovod usage.
config['data_path'] = args.dataPath
config["sweep_size"] = args.sweep_size
print("pytorch version:{}".format(th.__version__))
with open(args.data) as f:
data = yaml.safe_load(f)
config["source_paths"] = [j for i, j in data['clean_source'].items()]
print("Experiment starts with config {}".format(json.dumps(config, sort_keys=True, indent=4)))
# Initialize Horovod
hvd.init()
th.cuda.set_device(hvd.local_rank())
print("Run experiments with world size {}".format(hvd.size()))
dataset = SpeechDataset(config)
transform=None
if args.transform is not None and os.path.isfile(args.transform):
with open(args.transform, 'rb') as f:
transform = pickle.load(f)
dataset.transform = transform
train_dataloader = SeqDataloader(dataset,
batch_size=args.batch_size,
num_workers = args.data_loader_threads,
distributed=True,
test_only=False)
local_vars = locals()
loss_fns = [loss_constructor(**local_vars) for loss_constructor in loss_constructors]
# Horovod: initialize library.
hvd.init()
if not user_shuffle_buffer_size:
shuffle_buffer_size = \
calculate_shuffle_buffer_size(hvd, avg_row_size, train_rows / hvd.size())
else:
shuffle_buffer_size = user_shuffle_buffer_size
cuda_available = torch.cuda.is_available()
if cuda_available:
# Horovod: pin GPU to local rank.
torch.cuda.set_device(hvd.local_rank())
# Move model to GPU.
model.cuda()
# Optimizer object needs to be re-instantiated. Internally, it uses memory addresses of
# objects as their identity and therefore it cannot be serialized and then
# deserialized. The deserialized optimizer object stores the names of the parameters
# with their old memory addresses but in reality those are different than the
# reconstructed deserialized object and that creates problem.
# Learning rate is a required parameters in SGD optimizer. It will be overridden with
# load_state_dict.
optimizer = optimizer_cls(model.parameters(), lr=1)
optimizer_state = model_opt_state['optimizer']
if last_checkpoint_state is not None:
model.load_state_dict(last_checkpoint_state['model'])
optimizer.load_state_dict(last_checkpoint_state['optimizer'])
help='random seed (default: 42)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--fp16-allreduce', action='store_true', default=False,
help='use fp16 compression during allreduce')
parser.add_argument('--results_path', type=str, help="Path to store results")
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
# Horovod: initialize library.
hvd.init()
torch.manual_seed(args.seed)
if args.cuda:
# Horovod: pin GPU to local rank.
torch.cuda.set_device(hvd.local_rank())
torch.cuda.manual_seed(args.seed)
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_dataset = \
datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
# Horovod: use DistributedSampler to partition the training data.
train_sampler = torch.utils.data.distributed.DistributedSampler(
train_dataset, num_replicas=hvd.size(), rank=hvd.rank())
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs)