Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
in_read_A = nested_axpy_1_state.add_read("mem_A")
nested_axpy_1_sdfg.add_array("mem_B", shape=[N], dtype=dace.float32, storage=dace.dtypes.StorageType.FPGA_Global)
in_read_B = nested_axpy_1_state.add_read("mem_B")
nested_axpy_1_sdfg.add_stream('stream_out', dtype=dace.float32, storage=dace.dtypes.StorageType.FPGA_Remote)
stream_write = nested_axpy_1_state.add_write("stream_out")
tasklet, map_entry, map_exit = nested_axpy_1_state.add_mapped_tasklet(
'read', # name
dict(i='0:N'), # map range
dict(inp_A=dace.Memlet.simple(in_read_A.data, 'i'), # input memlets
inp_B=dace.Memlet.simple(in_read_B.data, 'i')),
''' # code
out = inp_A + inp_B
''',
dict(out=dace.Memlet.simple(stream_write.data, 'i')), # output memlets,
schedule=dace.dtypes.ScheduleType.FPGA_Device
)
# Add edges to map
nested_axpy_1_state.add_edge(
in_read_A, None,
map_entry, None,
memlet=dace.Memlet.simple(in_read_A.data, '0:N'))
nested_axpy_1_state.add_edge(
in_read_B, None,
map_entry, None,
memlet=dace.Memlet.simple(in_read_B.data, '0:N'))
# Add output path (exit->dst)
nested_axpy_1_state.add_edge(
map_exit, None,
def _set_default_schedule_in_scope(parent_node: nodes.Node,
parent_schedule: dtypes.ScheduleType,
reverse_scope_dict: Dict[nodes.Node,
List[nodes.Node]]):
for node in reverse_scope_dict[parent_node]:
child_schedule = dtypes.SCOPEDEFAULT_SCHEDULE[parent_schedule]
# Set default schedule type
if isinstance(node, nodes.MapEntry):
if node.map.schedule == dtypes.ScheduleType.Default:
node.map.schedule = child_schedule
# Also traverse children (recursively)
_set_default_schedule_in_scope(node, node.map.schedule,
reverse_scope_dict)
elif isinstance(node, nodes.ConsumeEntry):
if node.consume.schedule == dtypes.ScheduleType.Default:
node.consume.schedule = child_schedule
# Also traverse children (recursively)
_set_default_schedule_in_scope(node, node.consume.schedule,
reverse_scope_dict)
elif isinstance(node, nodes.NestedSDFG):
# Nested SDFGs retain same schedule as their parent scope
def __init__(self,
data,
access=dtypes.AccessType.ReadWrite,
debuginfo=None):
super(AccessNode, self).__init__()
# Properties
self.debuginfo2 = debuginfo
self.access = access
if not isinstance(data, str):
raise TypeError('Data for AccessNode must be a string')
self.data = data
if isinstance(n, dace.graph.nodes.EntryNode)
]
unrolled_loops = 0
if len(top_scopes) == 1:
scope = top_scopes[0]
if scope.unroll:
# Unrolled processing elements
self._unrolled_pes.add(scope.map)
kernel_args_opencl += [
"const int " + p for p in scope.params
] # PE id will be a macro defined constant
kernel_args_call += [p for p in scope.params]
unrolled_loops += 1
# Ensure no duplicate parameters are used
kernel_args_opencl = dtypes.deduplicate(kernel_args_opencl)
kernel_args_call = dtypes.deduplicate(kernel_args_call)
# Add kernel call host function
if unrolled_loops == 0:
host_body_stream.write(
"kernels.emplace_back(program.MakeKernel(\"{}\"{}));".format(
module_function_name, ", ".join([""] + kernel_args_call)
if len(kernel_args_call) > 0 else ""), sdfg, state_id)
else:
# We will generate a separate kernel for each PE. Adds host call
for ul in self._unrolled_pes:
start, stop, skip = ul.range.ranges[0]
start_idx = evaluate(start, sdfg.constants)
stop_idx = evaluate(stop, sdfg.constants)
skip_idx = evaluate(skip, sdfg.constants)
# Due to restrictions on channel indexing, PE IDs must start from zero
dtype=vtype,
buffer_size=buffer_size,
transient=True,
storage=StorageType.FPGA_Local)
B_pipe_in = state.add_stream("B_pipe",
dtype=vtype,
buffer_size=buffer_size,
transient=True,
storage=StorageType.FPGA_Local)
B_pipe_out = state.add_stream("B_pipe",
dtype=vtype,
buffer_size=buffer_size,
transient=True,
storage=StorageType.FPGA_Local)
valid_pipe_in = state.add_stream("valid_pipe",
dtype=dace.dtypes.bool,
buffer_size=buffer_size,
transient=True,
storage=StorageType.FPGA_Local)
valid_pipe_out = state.add_stream("valid_pipe",
dtype=dace.dtypes.bool,
buffer_size=buffer_size,
transient=True,
storage=StorageType.FPGA_Local)
compute_sdfg = make_compute_sdfg()
compute_tasklet = state.add_nested_sdfg(compute_sdfg, sdfg,
{"_A_pipe", "ratio_nested"},
{"_B_pipe", "_valid_pipe", "count"})
write_sdfg = make_write_sdfg()
write_tasklet = state.add_nested_sdfg(write_sdfg, sdfg,
"A_val_in",
dtype=dace.float32,
storage=dace.dtypes.StorageType.FPGA_Registers)
B_in = compute_state.add_stream(
"B_in",
dtype=dace.float32,
storage=dace.dtypes.StorageType.FPGA_Registers)
B_out = compute_state.add_stream(
"B_out",
dtype=dace.float32,
storage=dace.dtypes.StorageType.FPGA_Registers)
C_val_in = compute_state.add_scalar(
"C_val",
dtype=dace.float32,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Registers)
C_out = compute_state.add_scalar(
"C_out",
dtype=dace.float32,
storage=dace.dtypes.StorageType.FPGA_Registers)
compute_state.add_memlet_path(A_val_in,
compute_tasklet,
memlet=dace.memlet.Memlet.simple(
A_val_in, "0"),
dst_conn="a_in")
compute_state.add_memlet_path(B_in,
compute_tasklet,
memlet=dace.memlet.Memlet.simple(B_in, "0"),
dst_conn="b_in")
compute_state.add_memlet_path(
compute_tasklet,
B_out,
return nsdfg
@dace.library.expansion
class ExpandReduceOpenMP(pm.ExpandTransformation):
"""
OpenMP-based implementation of the reduce node
"""
environments = []
_REDUCTION_TYPE_TO_OPENMP = {
dtypes.ReductionType.Max: ('max', '{o} = max({o}, {i});'),
dtypes.ReductionType.Min: ('min', '{o} = min({o}, {i});'),
dtypes.ReductionType.Sum: ('+', '{o} += {i};'),
dtypes.ReductionType.Product: ('*', '{o} *= {i};'),
dtypes.ReductionType.Bitwise_And: ('&', '{o} &= {i};'),
dtypes.ReductionType.Logical_And: ('&&', '{o} = {o} && {i};'),
dtypes.ReductionType.Bitwise_Or: ('|', '{o} |= {i};'),
dtypes.ReductionType.Logical_Or: ('||', '{o} = {o} || {i};'),
dtypes.ReductionType.Bitwise_Xor: ('^', '{o} ^= {i};'),
dtypes.ReductionType.Sub: ('-', '{o} -= {i};'),
dtypes.ReductionType.Div: ('/', '{o} /= {i};'),
}
@staticmethod
def expansion(node: 'Reduce', state: SDFGState, sdfg: SDFG):
node.validate(sdfg, state)
inedge: graph.MultiConnectorEdge = state.in_edges(node)[0]
outedge: graph.MultiConnectorEdge = state.out_edges(node)[0]
input_dims = len(inedge.data.subset)
output_dims = len(outedge.data.subset)
# Skip sequential maps to determine storage
while parent_schedule == dtypes.ScheduleType.Sequential:
parent_node = scope_dict[parent_node]
if parent_node is None:
parent_schedule = toplevel_schedule
break
parent_schedule = parent_node.map.schedule
# Determine default GPU schedule based on existence of
# thread-block maps
if parent_schedule == dtypes.ScheduleType.GPU_Device:
if parent_node not in scopes_with_tbmaps:
parent_schedule = dtypes.ScheduleType.GPU_ThreadBlock
# End of special cases
# Set default storage type
desc.storage = dtypes.SCOPEDEFAULT_STORAGE[parent_schedule]
# Take care of remaining arrays/scalars, e.g., code->code edges
for desc in sdfg.arrays.values():
if desc.storage == dtypes.StorageType.Default:
desc.storage = dtypes.StorageType.Register
for state in sdfg.nodes():
# Loop again after all default storages have been set to set nested
# SDFGs
for node in state.nodes():
if not isinstance(node, nodes.NestedSDFG):
continue
for name, desc in node.sdfg.arrays.items():
if (not desc.transient
and desc.storage == dtypes.StorageType.Default):
# Find connector and ensure storage types match
nested_sdfg = dace.SDFG("gemm_nested")
if_state_c = nested_sdfg.add_state("if_state_c")
then_state_c = nested_sdfg.add_state("then_state_c")
else_state_c = nested_sdfg.add_state("else_state_c")
if_state_a = nested_sdfg.add_state("if_state_a")
then_state_a = nested_sdfg.add_state("then_state_a")
# No else state is necessary, but control flow detection seems to be broken
# for ifs with no else branch
else_state_a = nested_sdfg.add_state("else_state_a")
compute_state = nested_sdfg.add_state("compute_state")
nested_sdfg.add_edge(
if_state_c, then_state_c,
dace.sdfg.InterstateEdge(
condition=dace.properties.CodeProperty.from_string(
"k == 0", language=dace.dtypes.Language.Python)))
nested_sdfg.add_edge(
if_state_c, else_state_c,
dace.sdfg.InterstateEdge(
condition=dace.properties.CodeProperty.from_string(
"k != 0", language=dace.dtypes.Language.Python)))
nested_sdfg.add_edge(then_state_c, if_state_a, dace.sdfg.InterstateEdge())
nested_sdfg.add_edge(else_state_c, if_state_a, dace.sdfg.InterstateEdge())
nested_sdfg.add_edge(
if_state_a, then_state_a,
dace.sdfg.InterstateEdge(
condition=dace.properties.CodeProperty.from_string(
"m == 0", language=dace.dtypes.Language.Python)))
nested_sdfg.add_edge(
if_state_a, else_state_a,
dace.sdfg.InterstateEdge(
condition=dace.properties.CodeProperty.from_string(
# If another target has already been registered as CPU, use it instead
if v['name'] == 'cpu':
default_target = k
targets = {'cpu': default_target(frame, sdfg)}
# Instantiate the rest of the targets
targets.update({
v['name']: k(frame, sdfg)
for k, v in target.TargetCodeGenerator.extensions().items()
if v['name'] not in targets
})
# Instantiate all instrumentation providers in SDFG
provider_mapping = InstrumentationProvider.get_provider_mapping()
frame._dispatcher.instrumentation[
dtypes.InstrumentationType.No_Instrumentation] = None
for node, _ in sdfg.all_nodes_recursive():
if hasattr(node, 'instrument'):
frame._dispatcher.instrumentation[node.instrument] = \
provider_mapping[node.instrument]
elif hasattr(node, 'consume'):
frame._dispatcher.instrumentation[node.consume.instrument] = \
provider_mapping[node.consume.instrument]
elif hasattr(node, 'map'):
frame._dispatcher.instrumentation[node.map.instrument] = \
provider_mapping[node.map.instrument]
frame._dispatcher.instrumentation = {
k: v() if v is not None else None
for k, v in frame._dispatcher.instrumentation.items()
}
# Generate frame code (and the rest of the code)