Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Nodes
A = state.add_array('A', (128, ), dace.float32)
B = state.add_array('B', (2, ), dace.float32)
me, mx = state.add_map('mymap', dict(bi='0:2'))
mei, mxi = state.add_map('mymap2', dict(i='0:32'))
red = state.add_reduce('lambda a,b: a+b', None, 0,
dace.ScheduleType.GPU_ThreadBlock)
tA = state.add_transient('tA', (2, ), dace.float32)
tB = state.add_transient('tB', (1, ), dace.float32)
write_tasklet = state.add_tasklet('writeout', {'inp'}, {'out'},
'if i == 0: out = inp')
# Edges
state.add_edge(A, None, me, None, Memlet.simple(A, '0:128'))
state.add_edge(me, None, mei, None, Memlet.simple(A, '(64*bi):(64*bi+64)'))
state.add_edge(mei, None, tA, None,
Memlet.simple('A', '(64*bi+2*i):(64*bi+2*i+2)'))
state.add_edge(tA, None, red, None, Memlet.simple(tA, '0:2'))
state.add_edge(red, None, tB, None, Memlet.simple(tB, '0'))
state.add_edge(tB, None, write_tasklet, 'inp', Memlet.simple(tB, '0'))
state.add_edge(write_tasklet, 'out', mxi, None,
Memlet('B', -1, dace.subsets.Indices(['bi']), 1))
state.add_edge(mxi, None, mx, None, Memlet.simple(B, 'bi'))
state.add_edge(mx, None, B, None, Memlet.simple(B, '0:2'))
sdfg.fill_scope_connectors()
if __name__ == '__main__':
print('Block reduction test')
sdfg.draw_to_file()
state = sdfg.add_state("copy_to_device")
A_host = sdfg.add_array("A", [SIZE], dtype=DTYPE)
A_device = sdfg.add_array("A_device", [SIZE],
dtype=DTYPE,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)
read = state.add_read("A")
write = state.add_write("A_device")
state.add_memlet_path(read,
write,
memlet=dace.memlet.Memlet.simple(
"A_device",
"0:N",
veclen=VECTOR_LENGTH.get(),
num_accesses=SIZE))
return state
",".join(inputShape))
dtype = _tensortype(node)
inputNode = state.add_array(
name=label + "_Inp", shape=shape, dtype=dace.typeclass(dtype))
#create and add mapp
mapDict = dict(zip(inputParams, inputDims))
inMemletDict = dict(
j0=Memlet.simple(inputNode, ",".join(inputParams)))
outMemletDict = dict(
out=Memlet.simple(outputNode, ",".join(outputParams)))
code = "out = j0"
tasklet, map_entry, map_exit = state.add_mapped_tasklet(
label, mapDict, inMemletDict, code, outMemletDict)
state.add_edge(inputNode, None, map_entry, None,
Memlet.simple(inputNode, ",".join(inputDims)))
state.add_edge(map_exit, None, outputNode, None,
Memlet.simple(outputNode, ",".join(outputDims)))
# If training example node is not already in inputDict, add a
# zero array. This prevents DaCe from raising a key error when
# trying to call the dace function if we only execute a subgraph
# where it does not appear. This might not be necessary any longer.
if (label + "_Inp" not in self.inpDict.keys()):
self.inpDict[label + "_Inp"] = np.zeros(
tuple(map(int, (inputShape))), dtype=dtype)
# If we are not training, set the output non transient and add to
# input dict
else:
outputNode.desc(self.graph).transient = False
self.inpDict[label] = np.zeros(
tasklet = body.add_tasklet(
"read_row", {"row_in"},
{"to_val_out", "to_col_out", "to_compute_out", "to_x_out"},
"to_val_out = row_in\n"
"to_col_out = row_in\n"
"to_compute_out = row_in\n"
"to_x_out = row_in")
body.add_memlet_path(a_row_mem,
tasklet,
dst_conn="row_in",
memlet=Memlet.simple(a_row_mem, "h"))
body.add_memlet_path(tasklet,
to_val_pipe,
src_conn="to_val_out",
memlet=Memlet.simple(to_val_pipe, "0"))
body.add_memlet_path(tasklet,
to_col_pipe,
src_conn="to_col_out",
memlet=Memlet.simple(to_col_pipe, "0"))
body.add_memlet_path(tasklet,
to_compute_pipe,
src_conn="to_compute_out",
memlet=Memlet.simple(to_compute_pipe, "0"))
body.add_memlet_path(tasklet,
to_x_pipe,
src_conn="to_x_out",
memlet=Memlet.simple(to_x_pipe, "0"))
return sdfg
def _unop(sdfg: SDFG, state: SDFGState, op1: str, opcode: str, opname: str):
""" Implements a general element-wise array unary operator. """
arr1 = sdfg.arrays[op1]
name, _ = sdfg.add_temp_transient(arr1.shape, arr1.dtype, arr1.storage)
state.add_mapped_tasklet(
"_%s_" % opname,
{'__i%d' % i: '0:%s' % s
for i, s in enumerate(arr1.shape)}, {
'__in1':
Memlet.simple(
op1, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))
},
'__out = %s __in1' % opcode, {
'__out':
Memlet.simple(
name, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))
},
external_edges=True)
return name
k_entry, k_end,
dace.sdfg.InterstateEdge(
condition=dace.properties.CodeProperty.from_string(
"k >= K", language=dace.dtypes.Language.Python)))
mem = loop_body.add_array("mem", [N, K],
dtype=dace.float32,
storage=dace.dtypes.StorageType.FPGA_Global)
pipe = loop_body.add_stream("pipe",
dace.float32,
storage=dace.dtypes.StorageType.FPGA_Local)
loop_body.add_memlet_path(mem,
pipe,
memlet=dace.memlet.Memlet.simple(
pipe, '0', other_subset_str="n, k"))
return sdfg
outer_indices.append(n)
elif n not in inner_indices:
inner_indices.add(n)
irng.pop(outer_indices)
orng.pop(outer_indices)
irng.offset(orng, True)
if (memlet.data, scope_memlet.subset,
'w') in self.accesses:
vname = self.accesses[(memlet.data,
scope_memlet.subset, 'w')][0]
memlet = Memlet.simple(vname, str(irng))
elif (memlet.data, scope_memlet.subset,
'r') in self.accesses:
vname = self.accesses[(memlet.data,
scope_memlet.subset, 'r')][0]
memlet = Memlet.simple(vname, str(irng))
else:
name = memlet.data
vname = "{c}_in_from_{s}{n}".format(
c=conn,
s=self.sdfg.nodes().index(state),
n=('_%s' % state.node_id(entry_node)
if entry_node else ''))
self.accesses[(name, scope_memlet.subset,
'r')] = (vname, orng)
orig_shape = orng.size()
shape = [d for d in orig_shape if d != 1]
strides = [
i for j, i in enumerate(arr.strides)
if j not in outer_indices
]
strides = [
row_to_val_in = state.add_stream("row_to_val",
itype,
transient=True,
storage=StorageType.FPGA_Local)
val_to_compute_out = state.add_stream("val_to_compute",
dtype,
transient=True,
storage=StorageType.FPGA_Local)
read_val_sdfg = make_read_val()
read_val_tasklet = state.add_nested_sdfg(read_val_sdfg, sdfg,
{"A_val_mem", "row_pipe"},
{"compute_pipe"})
state.add_memlet_path(a_val,
read_val_tasklet,
dst_conn="A_val_mem",
memlet=dace.memlet.Memlet.simple(a_val, "0:nnz"))
state.add_memlet_path(row_to_val_in,
read_val_tasklet,
dst_conn="row_pipe",
memlet=dace.memlet.Memlet.simple(row_to_val_in,
"0",
num_accesses=-1))
state.add_memlet_path(read_val_tasklet,
val_to_compute_out,
src_conn="compute_pipe",
memlet=dace.memlet.Memlet.simple(val_to_compute_out,
"0",
num_accesses=-1))
# Read values of x using column pointers and send to compute
x = state.add_array("x_device", (W, ),
dtype,
dtype=dace.float32,
storage=dace.dtypes.StorageType.FPGA_Local)
else_C_in = else_state.add_scalar(
"C_in", dtype=dace.float32, storage=dace.dtypes.StorageType.FPGA_Local)
else_C_out = else_state.add_scalar(
"C_out",
dtype=dace.float32,
storage=dace.dtypes.StorageType.FPGA_Local)
# Memlets
then_a_val_memlet = dace.memlet.Memlet.simple(then_A_val, "0")
then_b_val_memlet = dace.memlet.Memlet.simple(then_B_val, "0")
then_c_out_memlet = dace.memlet.Memlet.simple(then_C_out, "0")
else_a_val_memlet = dace.memlet.Memlet.simple(else_A_val, "0")
else_b_val_memlet = dace.memlet.Memlet.simple(else_B_val, "0")
else_c_in_memlet = dace.memlet.Memlet.simple(else_C_in, "0")
else_c_out_memlet = dace.memlet.Memlet.simple(else_C_out, "0")
# Draw paths within each state
then_state.add_memlet_path(then_A_val,
then_tasklet,
memlet=then_a_val_memlet,
dst_conn="a")
then_state.add_memlet_path(then_B_val,
then_tasklet,
memlet=then_b_val_memlet,
dst_conn="b")
then_state.add_memlet_path(then_tasklet,
then_C_out,
memlet=then_c_out_memlet,
src_conn="c_out")
code = """\
if y >= 3 and x >= 3 and y < H - 1 and x < W - 1:
result = 0.2 * (window[0, 1] + window[1, 0] + window[1, 1] + window[1, 2] + window[2, 1])"""
tasklet = loop_body.add_tasklet("compute", {"window"}, {"result"}, code)
# Input window
loop_body.add_memlet_path(window_compute_in,
tasklet,
dst_conn="window",
memlet=dace.memlet.Memlet.simple(
window_compute_in, "0:3, 0:3"))
# Output result (conditional write)
out_memlet = dace.memlet.Memlet.simple(stream_out, "0", num_accesses=-1)
loop_body.add_memlet_path(tasklet,
stream_out,
src_conn="result",
memlet=out_memlet)
# Read row buffer
read_row_memlet = dace.memlet.Memlet.simple(rows_in,
'0:2, x',
other_subset_str="0:2, 2")
pre_shift.add_memlet_path(rows_in,
window_buffer_out,
memlet=read_row_memlet)
# Read from memory
read_memory_memlet = dace.memlet.Memlet.simple(stream_in,
'0',