Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
])
@pytest.mark.parallel(mode=[(1, 'diag')])
def test_diag_comm_scheme(self, expr, expected):
"""
Check that the 'diag' mode does not generate more communications
than strictly necessary.
"""
grid = Grid(shape=(4, 4))
x, y = grid.dimensions # noqa
t = grid.stepping_dim # noqa
f = TimeFunction(name='f', grid=grid) # noqa
op = Operator(Eq(f.forward, eval(expr)), dle=('advanced', {'openmp': False}))
calls = FindNodes(Call).visit(op._func_table['haloupdate0'])
destinations = {i.arguments[-2].field for i in calls}
assert destinations == expected
def test_avoid_redundant_haloupdate(self):
grid = Grid(shape=(12,))
x = grid.dimensions[0]
t = grid.stepping_dim
i = Dimension(name='i')
j = Dimension(name='j')
f = TimeFunction(name='f', grid=grid)
g = Function(name='g', grid=grid)
op = Operator([Eq(f.forward, f[t, x-1] + f[t, x+1] + 1.),
Inc(f[t+1, i], 1.), # no halo update as it's an Inc
Eq(g, f[t, j] + 1)]) # access `f` at `t`, not `t+1`!
calls = FindNodes(Call).visit(op)
assert len(calls) == 1
provided to an Operator, the resulting loop nest is the same.
The array accesses in the equations may or may not use offsets;
these impact the loop bounds, but not the resulting tree
structure.
"""
eq1, eq2, eq3 = EVAL(exprs, ti0.base, ti1.base, ti3.base)
op1 = Operator([eq1, eq2, eq3], dse='noop', dle='noop')
op2 = Operator([eq2, eq1, eq3], dse='noop', dle='noop')
op3 = Operator([eq3, eq2, eq1], dse='noop', dle='noop')
trees = [retrieve_iteration_tree(i) for i in [op1, op2, op3]]
assert all(len(i) == 1 for i in trees)
trees = [i[0] for i in trees]
for tree in trees:
assert IsPerfectIteration().visit(tree[0])
exprs = FindNodes(Expression).visit(tree[-1])
assert len(exprs) == 3
u = TimeFunction(name='u', save=None, grid=grid, space_order=0, time_order=1)
xi = SubDimension.middle(name='xi', parent=x,
thickness_left=thickness, thickness_right=thickness)
yi = SubDimension.middle(name='yi', parent=y,
thickness_left=thickness, thickness_right=thickness)
# flow dependencies in x and y which should force serial execution
# in reverse direction
centre = Eq(u[t+1, xi, yi], u[t, xi, yi] + u[t+1, xi+1, yi+1])
u.data[0, 10, 10] = 1.0
op = Operator([centre])
iterations = FindNodes(Iteration).visit(op)
assert all(i.is_Affine and i.is_Sequential for i in iterations if i.dim == xi)
assert all(i.is_Affine and i.is_Parallel for i in iterations if i.dim == yi)
op.apply(time_m=0, time_M=0)
for i in range(4, 11):
assert u.data[1, i, i] == 1.0
u.data[1, i, i] = 0.0
assert np.all(u.data[1, :] == 0)
ti1 = Array(name='ti1', shape=grid.shape, dimensions=grid.dimensions) # noqa
ti3 = Array(name='ti3', shape=grid.shape, dimensions=grid.dimensions) # noqa
f = Function(name='f', grid=grid) # noqa
tu = TimeFunction(name='tu', grid=grid) # noqa
tv = TimeFunction(name='tv', grid=grid) # noqa
tw = TimeFunction(name='tw', grid=grid) # noqa
# List comprehension would need explicit locals/globals mappings to eval
eqns = []
for e in exprs:
eqns.append(eval(e))
op = Operator(eqns, dse='noop', dle=('noop', {'openmp': False}))
trees = retrieve_iteration_tree(op)
iters = FindNodes(Iteration).visit(op)
assert len(trees) == len(expected)
assert len(iters) == len(directions)
# mapper just makes it quicker to write out the test parametrization
mapper = {'time': 't'}
assert ["".join(mapper.get(i.dim.name, i.dim.name) for i in j)
for j in trees] == expected
assert "".join(mapper.get(i.dim.name, i.dim.name) for i in iters) == visit
# mapper just makes it quicker to write out the test parametrization
mapper = {'+': Forward, '-': Backward, '*': Any}
assert all(i.direction == mapper[j] for i, j in zip(iters, directions))
v = TimeFunction(name='v', grid=grid, time_order=4) # noqa
op = Operator(eval(expr), dle='noop')
iters = FindNodes(Iteration).visit(op)
time_iter = [i for i in iters if i.dim.is_Time]
assert len(time_iter) == 1
time_iter = time_iter[0]
# Check uindices in Iteration header
signatures = [(i._offset, i._modulo) for i in time_iter.uindices]
assert len(signatures) == len(exp_uindices)
assert all(i in signatures for i in exp_uindices)
# Check uindices within each TimeFunction
exprs = [i.expr for i in FindNodes(Expression).visit(op)]
assert(i.indices[i.function._time_position].modulo == exp_mods[i.function.name]
for i in flatten(retrieve_indexed(i) for i in exprs))
trees = handle
if not trees:
continue
# Check foldability
pairwise_folds = list(zip(*reversed(trees)))
if any(not is_foldable(j) for j in pairwise_folds):
continue
# Maybe heuristically exclude innermost Iteration
if blockinner is False:
pairwise_folds = pairwise_folds[:-1]
# Perhaps there's nothing to fold
if len(pairwise_folds) == 0:
continue
# TODO: we do not currently support blocking if any of the foldable
# iterations writes to user data (need min/max loop bounds?)
exprs = flatten(FindNodes(Expression).visit(j.root) for j in trees[:-1])
if any(j.write.is_Input for j in exprs):
continue
# Perform folding
for j in pairwise_folds:
r, remainder = j[0], j[1:]
folds = [(tuple(y-x for x, y in zip(i.offsets, r.offsets)), i.nodes)
for i in remainder]
mapper[r] = IterationFold(folds=folds, **r.args)
for k in remainder:
mapper[k] = None
# Insert the IterationFolds in the Iteration/Expression tree
iet = Transformer(mapper, nested=True).visit(iet)
return iet
* Remove all ``useless`` HaloSpots;
* Merge all ``hoistable`` HaloSpots with their root HaloSpot, thus
removing redundant communications and anticipating communications
that will be required by later Iterations.
"""
# Drop `useless` HaloSpots
mapper = {hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(hs.useless))
for hs in FindNodes(HaloSpot).visit(iet)}
iet = Transformer(mapper, nested=True).visit(iet)
# Handle `hoistable` HaloSpots
# First, we merge `hoistable` HaloSpots together, to anticipate communications
mapper = {}
for tree in retrieve_iteration_tree(iet):
halo_spots = FindNodes(HaloSpot).visit(tree.root)
if not halo_spots:
continue
root = halo_spots[0]
if root in mapper:
continue
hss = [root.halo_scheme]
hss.extend([hs.halo_scheme.project(hs.hoistable) for hs in halo_spots[1:]])
try:
mapper[root] = root._rebuild(halo_scheme=HaloScheme.union(hss))
except ValueError:
# HaloSpots have non-matching `loc_indices` and therefore can't be merged
perf_adv("Found hoistable HaloSpots with disjoint loc_indices, "
"skipping optimization")
continue
for hs in halo_spots[1:]:
halo_scheme = hs.halo_scheme.drop(hs.hoistable)
def instrument(self, iet):
"""
Enrich the Iteration/Expression tree ``iet`` adding nodes for C-level
performance profiling. In particular, turn all Sections within ``iet``
into TimedLists.
"""
sections = FindNodes(Section).visit(iet)
for section in sections:
bundles = FindNodes(ExpressionBundle).visit(section)
# Total operation count
ops = sum(i.ops for i in bundles)
# Operation count at each section iteration
sops = sum(estimate_cost(i.expr) for i in flatten(b.exprs for b in bundles))
# Total memory traffic
mapper = {}
for i in bundles:
for k, v in i.traffic.items():
mapper.setdefault(k, []).append(v)
traffic = 0
for i in mapper.values():
def _optimize_halo_updates(self, iet, state):
"""
Drop unnecessary halo exchanges, or shuffle them around to improve
computation-communication overlap.
"""
hss = FindNodes(HaloSpot).visit(iet)
mapper = {i: None for i in hss if i.is_Redundant}
processed = Transformer(mapper, nested=True).visit(iet)
return processed, {}