Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, accumulation_step=1):
hvd.init()
self.local_rank = hvd.local_rank()
self.world_size = hvd.size()
self.rank = hvd.rank()
self.n_gpu = torch.cuda.device_count()
self.node_count = self.world_size // self.n_gpu
self.accumulation_step = accumulation_step
self.count_down = accumulation_step - 1
self._multi_node = self.node_count > 1
if not self._multi_node:
# use PyTorch build-in NCCL backend for single node training
torch.distributed.init_process_group(backend='nccl', init_method='tcp://127.0.0.1:6000',
world_size=self.n_gpu, rank=self.local_rank)
def test_horovod_allreduce_average(self):
"""Test that the allreduce correctly averages 1D, 2D, 3D tensors."""
hvd.init()
size = hvd.size()
dtypes = self.filter_supported_types([torch.IntTensor, torch.LongTensor,
torch.FloatTensor, torch.DoubleTensor])
if torch.cuda.is_available():
dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor,
torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
if _fp16_supported:
dtypes += [torch.cuda.HalfTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
torch.manual_seed(1234)
tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100)
tensor = self.cast_and_place(tensor, dtype)
averaged = hvd.allreduce(tensor, average=True)
max_difference = averaged.data.sub(tensor).max()
def test_horovod_allgather(self):
"""Test that the allgather correctly gathers 1D, 2D, 3D tensors."""
hvd.init()
rank = hvd.rank()
size = hvd.size()
dtypes = [torch.ByteTensor, torch.CharTensor, torch.ShortTensor,
torch.IntTensor, torch.LongTensor, torch.FloatTensor, torch.DoubleTensor]
if _fp16_supported:
dtypes += [torch.HalfTensor]
if torch.cuda.is_available():
dtypes += [torch.cuda.ByteTensor, torch.cuda.CharTensor, torch.cuda.ShortTensor,
torch.cuda.IntTensor, torch.cuda.LongTensor,
torch.cuda.FloatTensor, torch.cuda.DoubleTensor]
if _fp16_supported:
dtypes += [torch.cuda.HalfTensor]
dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
tensor = torch.FloatTensor(*([17] * dim)).fill_(1).mul_(rank)
def test_delta_optimizer(self):
"""Test that delta optimizer."""
hvd.init()
# TODO support non-MPI Adasum operation
# Only do this test if there are GPUs available.
if not hvd.mpi_enabled() or not torch.cuda.is_available():
return
local_rank = hvd.local_rank()
size = hvd.size()
# This test does not apply if there is only one worker.
if size == 1:
return
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 100, 1).cuda(local_rank)
self.conv2 = torch.nn.Conv2d(100, 1, 1).cuda(local_rank)
print("pytorch version:{}".format(th.__version__))
with open(args.data) as f:
data = yaml.safe_load(f)
config["source_paths"] = [j for i, j in data['clean_source'].items()]
if 'dir_noise' in data:
config["dir_noise_paths"] = [j for i, j in data['dir_noise'].items()]
if 'rir' in data:
config["rir_paths"] = [j for i, j in data['rir'].items()]
config['data_path'] = args.dataPath
print("Experiment starts with config {}".format(json.dumps(config, sort_keys=True, indent=4)))
# Initialize Horovod
hvd.init()
th.cuda.set_device(hvd.local_rank())
print("Run experiments with world size {}".format(hvd.size()))
dataset = SpeechDataset(config)
transform=None
if args.transform is not None and os.path.isfile(args.transform):
with open(args.transform, 'rb') as f:
transform = pickle.load(f)
dataset.transform = transform
train_dataloader = SeqDataloader(dataset,
batch_size=args.batch_size,
num_workers = args.data_loader_threads,
distributed=True,
print("pytorch version:{}".format(th.__version__))
with open(args.data) as f:
data = yaml.safe_load(f)
config["source_paths"] = [j for i, j in data['clean_source'].items()]
if 'dir_noise' in data:
config["dir_noise_paths"] = [j for i, j in data['dir_noise'].items()]
if 'rir' in data:
config["rir_paths"] = [j for i, j in data['rir'].items()]
config['data_path'] = args.dataPath
print("Experiment starts with config {}".format(json.dumps(config, sort_keys=True, indent=4)))
# Initialize Horovod
hvd.init()
th.cuda.set_device(hvd.local_rank())
print("Run experiments with world size {}".format(hvd.size()))
dataset = SpeechDataset(config)
transform=None
if args.transform is not None and os.path.isfile(args.transform):
with open(args.transform, 'rb') as f:
transform = pickle.load(f)
dataset.transform = transform
train_dataloader = SeqDataloader(dataset,
batch_size=args.batch_size,
num_workers = args.data_loader_threads,
distributed=True,
from utils_nlp.eval.question_answering import evaluate_qa
from utils_nlp.common.timer import Timer
parser = argparse.ArgumentParser()
parser.add_argument("--cache_dir", type=str, default="./")
parser.add_argument("--model_name", type=str, default="distilbert-base-uncased")
parser.add_argument("--do_lower_case", type=bool, default=True)
parser.add_argument("--quick_run", type=bool, default=False)
parser.add_argument("--gradient_accumulation_steps", type=int, default=1)
args = parser.parse_args()
HOROVOD = True
hvd.init()
rank = hvd.rank()
local_rank = hvd.local_rank()
world_size = hvd.size()
print("rank: {}".format(rank))
print("local_rank: {}".format(local_rank))
print("world_size: {}".format(world_size))
MODEL_NAME = args.model_name
DO_LOWER_CASE = args.do_lower_case
TRAIN_DATA_USED_PERCENT = 1
DEV_DATA_USED_PERCENT = 1
NUM_EPOCHS = 2
import pyt.model as base
import torch
import text_dataset
from pyt.dataset import get_dataset
from pyt.model import *
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
flags = tf.app.flags
FLAGS = flags.FLAGS
logging = melt.logging
import horovod.torch as hvd
hvd.init()
# Horovod: pin GPU to local rank.
torch.cuda.set_device(hvd.local_rank())
def main(_):
FLAGS.torch_only = True
#FLAGS.valid_input = None
melt.init()
fit = melt.get_fit()
FLAGS.eval_batch_size = 512 * FLAGS.valid_multiplier
model_name = FLAGS.model
model = getattr(base, model_name)()
loss_fn = nn.BCEWithLogitsLoss()
schedule = True if args.schedule == 1 else False
decay = args.decay
assert nz > 0
# setup seeds to maintain experiment reproducibility
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.backends.cudnn.deterministic = True
# Distributed: set up horovod over multiple gpu's
if distributed:
import horovod.torch as hvd
# initialize horovod
hvd.init()
# pin gpu to "local rank" (see Horovod documentation)
torch.cuda.set_device(hvd.local_rank())
print(f"My local rank is {hvd.local_rank()}")
# distribute mini-batches over the different gpu's
batch_size //= hvd.size()
# string-tag for logging
tag = f'nz{nz}'
# define the "root process": only one of the gpu's has to log relevant values
# set only one gpu as root process
root_process = True
if distributed and not hvd.rank() == 0:
root_process = False
help='number of warm-up batches that don\'t count towards benchmark')
parser.add_argument('--num-batches-per-iter', type=int, default=10,
help='number of batches per benchmark iteration')
parser.add_argument('--num-iters', type=int, default=10,
help='number of benchmark iterations')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--use-adasum', action='store_true', default=False,
help='use adasum algorithm to do reduction')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
hvd.init()
if args.cuda:
# Horovod: pin GPU to local rank.
torch.cuda.set_device(hvd.local_rank())
cudnn.benchmark = True
# Set up standard model.
model = getattr(models, args.model)()
# By default, Adasum doesn't need scaling up learning rate.
lr_scaler = hvd.size() if not args.use_adasum else 1
if args.cuda:
# Move model to GPU.
model.cuda()