Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# ...
# for (j1 = ...) // second source of nested parallelism
# ...
mapper = {}
for tree in retrieve_iteration_tree(partree):
index = tree.index(partree)
outer = tree[index:index + partree.ncollapsed]
inner = tree[index + partree.ncollapsed:]
# Heuristic: nested parallelism is applied only if the top nested
# parallel Iteration iterates *within* the top outer parallel Iteration
# (i.e., the outer is a loop over blocks, while the nested is a loop
# within a block)
candidates = []
for i in inner:
if any(is_integer(j.step - i.symbolic_size) for j in outer):
candidates.append(i)
elif candidates:
# If there's at least one candidate but `i` doesn't honor the
# heuristic above, then we break, as the candidates must be
# perfectly nested
break
if not candidates:
continue
# Introduce nested parallelism
omp_pragma = lambda i: self.lang['par-for'](i, nhyperthreads())
subroot, subpartree, _ = self._make_partree(candidates, omp_pragma)
mapper[subroot] = subpartree
partree = Transformer(mapper).visit(partree)
"""
Check the shape of the Array used to store an aliasing expression.
The shape is impacted by loop blocking, which reduces the required
write-to space.
"""
grid = Grid(shape=(3, 3, 3))
x, y, z = grid.dimensions # noqa
t = grid.stepping_dim
f = Function(name='f', grid=grid)
f.data_with_halo[:] = 1.
u = TimeFunction(name='u', grid=grid, space_order=3)
u.data_with_halo[:] = 0.5
# Leads to 3D aliases
eqn = Eq(u.forward, ((u[t, x, y, z] + u[t, x+1, y+1, z+1])*3*f +
(u[t, x+2, y+2, z+2] + u[t, x+3, y+3, z+3])*3*f + 1))
op0 = Operator(eqn, opt=('noop', {'openmp': True}))
op1 = Operator(eqn, opt=('advanced', {'openmp': True, 'cire-mincost-sops': 1}))
x0_blk_size = op1.parameters[2]
y0_blk_size = op1.parameters[3]
z_size = op1.parameters[4]
# Check Array shape
arrays = [i for i in FindSymbols().visit(op1._func_table['bf0'].root)
if i.is_Array and i._mem_local]
assert len(arrays) == 1
a = arrays[0]
assert len(a.dimensions) == 3
assert a.halo == ((1, 1), (1, 1), (1, 1))
assert Add(*a.symbolic_shape[0].args) == x0_blk_size + 2
coordinates=wave.geometry.rec_positions)
gradient, _ = wave.jacobian_adjoint(residual, u0, vp=v0,
checkpointing=checkpointing)
G = np.dot(gradient.data.reshape(-1), dm.reshape(-1))
# FWI Gradient test
H = [0.5, 0.25, .125, 0.0625, 0.0312, 0.015625, 0.0078125]
error1 = np.zeros(7)
error2 = np.zeros(7)
for i in range(0, 7):
# Add the perturbation to the model
def initializer(data):
data[:] = np.sqrt(v0.data**2 * v**2 /
((1 - H[i]) * v**2 + H[i] * v0.data**2))
vloc = Function(name='vloc', grid=wave.model.grid, space_order=space_order,
initializer=initializer)
# Data for the new model
d = wave.forward(vp=vloc)[0]
# First order error Phi(m0+dm) - Phi(m0)
F_i = .5*linalg.norm((d.data - rec.data).reshape(-1))**2
error1[i] = np.absolute(F_i - F0)
# Second order term r Phi(m0+dm) - Phi(m0) -
error2[i] = np.absolute(F_i - F0 - H[i] * G)
# Test slope of the tests
p1 = np.polyfit(np.log10(H), np.log10(error1), 1)
p2 = np.polyfit(np.log10(H), np.log10(error2), 1)
info('1st order error, Phi(m0+dm)-Phi(m0): %s' % (p1))
info(r'2nd order error, Phi(m0+dm)-Phi(m0) - : %s' % (p2))
assert np.isclose(p1[0], 1.0, rtol=0.1)
assert np.isclose(p2[0], 2.0, rtol=0.1)
@pytest.mark.parametrize('eq,expected,blocking', [
('Eq(f, 2*f)', [2, 0, 0], False),
('Eq(u, 2*u)', [0, 2, 0, 0], False),
('Eq(u, 2*u)', [3, 0, 0, 0, 0, 0], True)
])
@patch("devito.passes.iet.openmp.Ompizer.COLLAPSE_NCORES", 1)
@patch("devito.passes.iet.openmp.Ompizer.COLLAPSE_WORK", 0)
def test_collapsing(self, eq, expected, blocking):
grid = Grid(shape=(3, 3, 3))
f = Function(name='f', grid=grid) # noqa
u = TimeFunction(name='u', grid=grid) # noqa
eq = eval(eq)
if blocking:
op = Operator(eq, dle=('blocking', 'simd', 'openmp', {'blockinner': True}))
iterations = FindNodes(Iteration).visit(op._func_table['bf0'])
else:
op = Operator(eq, dle=('simd', 'openmp'))
iterations = FindNodes(Iteration).visit(op)
assert len(iterations) == len(expected)
# Check for presence of pragma omp + collapse clause
for i, j in zip(iterations, expected):
if j > 0:
Examples
--------
Given the expression
sqrt(cos(a[x, y]))
We should get
t0 = cos(a[x,y])
t1 = sqrt(t0)
out = t1 # pseudocode
"""
grid = Grid(shape=(3, 3))
x, y = grid.dimensions # noqa
u = TimeFunction(name='u', grid=grid)
g = Function(name='g', grid=grid)
op = Operator(Eq(u.forward, u + sin(cos(g)) + sin(cos(g[x+1, y+1]))))
# We expect two temporary Arrays: `r1 = cos(g)` and `r2 = sqrt(r1)`
arrays = [i for i in FindSymbols().visit(op) if i.is_Array and i._mem_local]
assert len(arrays) == 2
assert all(i._mem_heap and not i._mem_external for i in arrays)
def test_scheduling(self):
"""
Affine iterations -> #pragma omp ... schedule(dynamic,1) ...
Non-affine iterations -> #pragma omp ... schedule(dynamic,chunk_size) ...
"""
grid = Grid(shape=(11, 11))
u = TimeFunction(name='u', grid=grid, time_order=2, save=5, space_order=0)
sf1 = SparseTimeFunction(name='s', grid=grid, npoint=1, nt=5)
eqns = [Eq(u.forward, u + 1)]
eqns += sf1.interpolate(u)
op = Operator(eqns, dle='openmp')
iterations = FindNodes(Iteration).visit(op)
assert len(iterations) == 4
assert iterations[1].is_Affine
assert 'schedule(dynamic,1)' in iterations[1].pragmas[0].value
assert not iterations[3].is_Affine
assert 'schedule(dynamic,chunk_size)' in iterations[3].pragmas[0].value
@pytest.mark.parametrize('dse', ['noop', 'basic', 'advanced', 'aggressive'])
@pytest.mark.parametrize('dle', ['noop', 'advanced', 'speculative'])
def test_time_dependent_split(dse, dle):
grid = Grid(shape=(10, 10))
u = TimeFunction(name='u', grid=grid, time_order=2, space_order=2, save=3)
v = TimeFunction(name='v', grid=grid, time_order=2, space_order=0, save=3)
# The second equation needs a full loop over x/y for u then
# a full one over x.y for v
eq = [Eq(u.forward, 2 + grid.time_dim),
Eq(v.forward, u.forward.dx + u.forward.dy + 1)]
op = Operator(eq, dse=dse, dle=dle)
trees = retrieve_iteration_tree(op)
assert len(trees) == 2
op()
assert np.allclose(u.data[2, :, :], 3.0)
assert np.allclose(v.data[1, 1:-1, 1:-1], 1.0)
])
@pytest.mark.parallel(mode=[(1, 'diag')])
def test_diag_comm_scheme(self, expr, expected):
"""
Check that the 'diag' mode does not generate more communications
than strictly necessary.
"""
grid = Grid(shape=(4, 4))
x, y = grid.dimensions # noqa
t = grid.stepping_dim # noqa
f = TimeFunction(name='f', grid=grid) # noqa
op = Operator(Eq(f.forward, eval(expr)), dle=('advanced', {'openmp': False}))
calls = FindNodes(Call).visit(op._func_table['haloupdate0'])
destinations = {i.arguments[-2].field for i in calls}
assert destinations == expected
def test_avoid_redundant_haloupdate(self):
grid = Grid(shape=(12,))
x = grid.dimensions[0]
t = grid.stepping_dim
i = Dimension(name='i')
j = Dimension(name='j')
f = TimeFunction(name='f', grid=grid)
g = Function(name='g', grid=grid)
op = Operator([Eq(f.forward, f[t, x-1] + f[t, x+1] + 1.),
Inc(f[t+1, i], 1.), # no halo update as it's an Inc
Eq(g, f[t, j] + 1)]) # access `f` at `t`, not `t+1`!
calls = FindNodes(Call).visit(op)
assert len(calls) == 1
provided to an Operator, the resulting loop nest is the same.
The array accesses in the equations may or may not use offsets;
these impact the loop bounds, but not the resulting tree
structure.
"""
eq1, eq2, eq3 = EVAL(exprs, ti0.base, ti1.base, ti3.base)
op1 = Operator([eq1, eq2, eq3], dse='noop', dle='noop')
op2 = Operator([eq2, eq1, eq3], dse='noop', dle='noop')
op3 = Operator([eq3, eq2, eq1], dse='noop', dle='noop')
trees = [retrieve_iteration_tree(i) for i in [op1, op2, op3]]
assert all(len(i) == 1 for i in trees)
trees = [i[0] for i in trees]
for tree in trees:
assert IsPerfectIteration().visit(tree[0])
exprs = FindNodes(Expression).visit(tree[-1])
assert len(exprs) == 3