Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
print()
print("CUDA DataParallel high batch")
print("------------------------------------------------")
print("Pytorch Symmetric(cuda {0}) DataParallel impl:".format(device_ids))
print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE))
run_profile(
nn.parallel.DataParallel(ReferenceQuantize(NBITS).cuda(), device_ids=device_ids),
HIGH_BATCH_INPUT_SIZE,
'cuda',
GPU_RUNS_HIGH_BATCH)
print()
print("Custom Symmetric (cuda {0}) DataParallel impl:".format(device_ids))
print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE))
run_profile(
nn.parallel.DataParallel(SymmetricQuantizer(QuantizerConfig(QuantizationParams(bits=NBITS))).cuda(),
device_ids=device_ids),
HIGH_BATCH_INPUT_SIZE,
'cuda',
GPU_RUNS_HIGH_BATCH)
print()
print("Custom Asymmetric (cuda {0}) DataParallel impl:".format(device_ids))
print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE))
run_profile(
nn.parallel.DataParallel(AsymmetricQuantizer(QuantizerConfig(QuantizationParams(bits=NBITS))).cuda(),
device_ids=device_ids),
HIGH_BATCH_INPUT_SIZE,
'cuda',
GPU_RUNS_HIGH_BATCH)
# CUDA DataParallel high batch
print("CUDA DistributedDataParallel high batch")
print("------------------------------------------------")
print("Pytorch Symmetric(cuda {0}) DistributedDataParallel impl:".format(device_ids))
print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE))
mp.spawn(
run_worker,
nprocs=NGPUS_PER_NODE,
args=(WORLD_SIZE, ReferenceQuantize(NBITS), TEST_PARAMS_STRUCT[1], GPU_RUNS_HIGH_BATCH))
print()
print("Custom Symmetric (cuda {0}) DistributedDataParallel impl:".format(device_ids))
print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE))
mp.spawn(
run_worker,
nprocs=NGPUS_PER_NODE,
args=(WORLD_SIZE, SymmetricQuantizer(QuantizerConfig(QuantizationParams(bits=NBITS))), TEST_PARAMS_STRUCT[1],
GPU_RUNS_HIGH_BATCH))
print()
print("Custom Asymmetric (cuda {0}) DistributedDataParallel impl:".format(device_ids))
print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE))
mp.spawn(
run_worker,
nprocs=NGPUS_PER_NODE,
args=(WORLD_SIZE, SymmetricQuantizer(QuantizerConfig(QuantizationParams(bits=NBITS))), TEST_PARAMS_STRUCT[1],
GPU_RUNS_HIGH_BATCH))
print("Pytorch Symmetric Per Weight Channel (cpu) impl:")
print("input size: {0}".format(LOW_BATCH_INPUT_SIZE))
run_profile(
ReferenceQuantize(NBITS,
input_shape=LOW_BATCH_INPUT_SIZE,
per_channel=True,
is_weights=True),
LOW_BATCH_INPUT_SIZE,
'cpu',
CPU_RUNS)
print()
print("Custom Symmetric Per Weight Channel (cpu) impl")
print("input size: {0}".format(LOW_BATCH_INPUT_SIZE))
run_profile(
SymmetricQuantizer(QuantizerConfig(QuantizationParams(bits=NBITS),
input_shape=LOW_BATCH_INPUT_SIZE,
per_channel=True,
is_weights=True)),
LOW_BATCH_INPUT_SIZE,
'cpu',
CPU_RUNS)
print()
print("Pytorch Symmetric Per Activation Channel (cpu) impl:")
print("input size: {0}".format(LOW_BATCH_INPUT_SIZE))
run_profile(
ReferenceQuantize(NBITS,
input_shape=LOW_BATCH_INPUT_SIZE,
per_channel=True,
is_weights=False),
LOW_BATCH_INPUT_SIZE,