Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def make_copy_to_fpga_state(sdfg):
state = sdfg.add_state("copy_to_device")
A_host = sdfg.add_array("A", [SIZE], dtype=DTYPE)
A_device = sdfg.add_array("A_device", [SIZE],
dtype=DTYPE,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)
read = state.add_read("A")
write = state.add_write("A_device")
state.add_memlet_path(read,
write,
memlet=dace.memlet.Memlet.simple(
"A_device",
"0:N",
veclen=VECTOR_LENGTH.get(),
num_accesses=SIZE))
return state
{"pipe"})
compute_sdfg = make_compute_sdfg()
compute_sdfg_node = state.add_nested_sdfg(
compute_sdfg, sdfg, {"A_stream_in", "B_stream_in", "C_stream_in"},
{"A_stream_out", "B_stream_out", "C_stream_out"})
write_C_sdfg = make_write_C_sdfg()
write_C_sdfg_node = state.add_nested_sdfg(write_C_sdfg, sdfg, {"pipe"},
{"mem"})
A = state.add_array(
"A_device", [N, K],
dtype=dace.float32,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)
B = state.add_array(
"B_device", [K, M],
dtype=dace.float32,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)
C = state.add_array(
"C_device", [N, M],
dtype=dace.float32,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)
A_pipe_read = state.add_stream(
"A_pipe",
dace.float32,
transient=True,
shape=(P + 1, ),
nested_axpy_1_node,
memlet=dace.Memlet.simple(in_data_B.data, '0:N'),
dst_conn="mem_B")
parent_nested_axpy.add_memlet_path(nested_axpy_1_node,
stream_wr,
memlet=dace.Memlet.simple(stream_wr.data, '0:N'),
src_conn="stream_out")
#### STORE PART#############
store_node = parent_nested_axpy.add_nested_sdfg(store_sdfg, parent_sdfg, {"stream_in"}, {"mem"})
out_data = parent_nested_axpy.add_write("out_device")
_, stream_node = parent_sdfg.add_stream("stream_rcv", dtype=dace.float32, transient=True,
storage=dace.dtypes.StorageType.FPGA_Remote)
#####################################################
# set SMI properties
stream_node.location["snd_rank"] = "1"
stream_node.location["port"] = "1"
stream_rd = parent_nested_axpy.add_read("stream_rcv")
parent_nested_axpy.add_memlet_path(stream_rd,
store_node,
memlet=dace.Memlet.simple(stream_rd.data, '0:N'),
dst_conn="stream_in")
parent_nested_axpy.add_memlet_path(store_node,
out_data,
memlet=dace.Memlet.simple(out_data.data, '0:N'),
src_conn="mem")
parent_sdfg.add_edge(copy_in_state, parent_nested_axpy, dace.sdfg.edges.InterstateEdge())
dtype=dace.float32,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)
C = state.add_array("C_device", [N, M],
dtype=dace.float32,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)
C_buffer_in = state.add_array("C_buffer", [M],
dtype=dace.float32,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Local)
C_buffer_out = state.add_array("C_buffer", [M],
dtype=dace.float32,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Local)
n_entry, n_exit = state.add_map(
"Map_N", {"n": "0:N"}, schedule=dace.dtypes.ScheduleType.FPGA_Device)
k_entry, k_exit = state.add_map(
"Map_K", {"k": "0:K"}, schedule=dace.dtypes.ScheduleType.FPGA_Device)
m_entry, m_exit = state.add_map(
"Map_M", {"m": "0:M"}, schedule=dace.dtypes.ScheduleType.FPGA_Device)
state.add_nedge(n_entry, C_buffer_in, dace.memlet.Memlet())
###########################################################################
# Nested SDFG
nested_sdfg = dace.SDFG("zero_or_wcr")
if_state = nested_sdfg.add_state("if_state")
def make_write_buffer_state(sdfg):
state = sdfg.add_state("write_buffer")
hist_buffer = state.add_array("hist_buffer", (num_bins, ),
dace.uint32,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Local)
hist_dram = state.add_array("hist_out", (num_bins, ),
dace.uint32,
storage=dace.dtypes.StorageType.FPGA_Global)
state.add_memlet_path(hist_buffer,
hist_dram,
memlet=dace.memlet.Memlet.simple(
hist_dram, "0:num_bins"))
return state
def add_tmp(state):
return state.add_array("tmp", (2, H, W),
dtype,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)
def signature(self, with_types=True, for_call=False, name=None):
if not with_types or for_call: return name
if self.storage in [
dtypes.StorageType.GPU_Global, dtypes.StorageType.GPU_Shared
]:
return 'dace::GPUStream<%s, %s> %s' % (str(
self.dtype.ctype), 'true' if sp.log(
self.buffer_size, 2).is_Integer else 'false', name)
return 'dace::Stream<%s> %s' % (str(self.dtype.ctype), name)
def add_array(self,
name,
shape,
dtype,
storage=dtypes.StorageType.Default,
transient=False,
strides=None,
offset=None,
lifetime=dtypes.AllocationLifetime.Scope,
debuginfo=None,
total_size=None,
find_new_name=False,
alignment=0):
""" @attention: This function is deprecated. """
warnings.warn(
'The "SDFGState.add_array" API is deprecated, please '
'use "SDFG.add_array" and "SDFGState.add_access"',
DeprecationWarning)
# Workaround to allow this legacy API
if name in self.parent._arrays:
del self.parent._arrays[name]
def dispatch_copy(self, src_node, dst_node, edge, sdfg, dfg, state_id,
function_stream, output_stream):
""" Dispatches a code generator for a memory copy operation. """
if isinstance(src_node, nodes.CodeNode):
src_storage = dtypes.StorageType.Register
else:
src_storage = src_node.desc(sdfg).storage
if isinstance(dst_node, nodes.CodeNode):
dst_storage = dtypes.StorageType.Register
else:
dst_storage = dst_node.desc(sdfg).storage
if (isinstance(src_node, nodes.Tasklet)
and not isinstance(dst_node, nodes.Tasklet)):
# Special case: Copying from a tasklet to an array, schedule of
# the copy is in the copying tasklet
dst_schedule_node = dfg.scope_dict()[src_node]
else:
dst_schedule_node = dfg.scope_dict()[dst_node]
if dst_schedule_node is not None:
dst_schedule = dst_schedule_node.map.schedule
else:
dst_schedule = None
def add_tmp(state):
return state.add_array("tmp", (2, H, W),
dtype,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)