From d8ddc756820c5fd42ea10da219edc11af1552c0a Mon Sep 17 00:00:00 2001 From: Philipp Schaad Date: Tue, 29 Oct 2024 16:56:41 +0100 Subject: [PATCH 01/33] Warn on potential data races (#1712) Re-implement changes made by @luca-patrignani in #1541, see that PR for more information. Author: @luca-patrignani --- dace/config_schema.yml | 7 + dace/sdfg/validation.py | 51 ++- .../sdfg/warn_on_potential_data_race_test.py | 316 ++++++++++++++++++ 3 files changed, 367 insertions(+), 7 deletions(-) create mode 100644 tests/sdfg/warn_on_potential_data_race_test.py diff --git a/dace/config_schema.yml b/dace/config_schema.yml index da35e61997..7afb06a50a 100644 --- a/dace/config_schema.yml +++ b/dace/config_schema.yml @@ -919,6 +919,13 @@ required: description: > Check for undefined symbols in memlets during SDFG validation. + check_race_conditions: + type: bool + default: false + title: Check race conditions + description: > + Check for potential race conditions during validation. + ############################################# # Features for unit testing diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py index e75099276f..f02a5003e9 100644 --- a/dace/sdfg/validation.py +++ b/dace/sdfg/validation.py @@ -1,17 +1,22 @@ -# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. """ Exception classes and methods for validation of SDFGs. """ + import copy -from dace.dtypes import DebugInfo import os -from typing import TYPE_CHECKING, Dict, List, Set import warnings +from collections import defaultdict +from typing import TYPE_CHECKING, Dict, List, Set + +import networkx as nx + from dace import dtypes, subsets, symbolic +from dace.dtypes import DebugInfo if TYPE_CHECKING: import dace + from dace.memlet import Memlet from dace.sdfg import SDFG from dace.sdfg import graph as gr - from dace.memlet import Memlet from dace.sdfg.state import ControlFlowRegion ########################################### @@ -34,8 +39,8 @@ def validate_control_flow_region(sdfg: 'SDFG', symbols: dict, references: Set[int] = None, **context: bool): - from dace.sdfg.state import SDFGState, ControlFlowRegion, ConditionalBlock from dace.sdfg.scope import is_in_scope + from dace.sdfg.state import ConditionalBlock, ControlFlowRegion, SDFGState if len(region.source_nodes()) > 1 and region.start_block is None: raise InvalidSDFGError("Starting block undefined", sdfg, None) @@ -200,7 +205,7 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context # Avoid import loop from dace import data as dt from dace.codegen.targets import fpga - from dace.sdfg.scope import is_devicelevel_gpu, is_devicelevel_fpga + from dace.sdfg.scope import is_devicelevel_fpga, is_devicelevel_gpu references = references or set() @@ -383,7 +388,8 @@ def validate_state(state: 'dace.sdfg.SDFGState', from dace.sdfg import SDFG from dace.sdfg import nodes as nd from dace.sdfg import utils as sdutil - from dace.sdfg.scope import scope_contains_scope, is_devicelevel_gpu, is_devicelevel_fpga + from dace.sdfg.scope import (is_devicelevel_fpga, is_devicelevel_gpu, + scope_contains_scope) sdfg = sdfg or state.parent state_id = state_id if state_id is not None else state.parent_graph.node_id(state) @@ -839,6 +845,37 @@ def validate_state(state: 'dace.sdfg.SDFGState', continue raise error + if Config.get_bool('experimental.check_race_conditions'): + node_labels = [] + write_accesses = defaultdict(list) + read_accesses = defaultdict(list) + for node in state.data_nodes(): + node_labels.append(node.label) + write_accesses[node.label].extend( + [{'subset': e.data.dst_subset, 'node': node, 'wcr': e.data.wcr} for e in state.in_edges(node)]) + read_accesses[node.label].extend( + [{'subset': e.data.src_subset, 'node': node} for e in state.out_edges(node)]) + + for node_label in node_labels: + writes = write_accesses[node_label] + reads = read_accesses[node_label] + # Check write-write data races. + for i in range(len(writes)): + for j in range(i+1, len(writes)): + same_or_unreachable_nodes = (writes[i]['node'] == writes[j]['node'] or + not nx.has_path(state.nx, writes[i]['node'], writes[j]['node'])) + no_wcr = writes[i]['wcr'] is None and writes[j]['wcr'] is None + if same_or_unreachable_nodes and no_wcr: + subsets_intersect = subsets.intersects(writes[i]['subset'], writes[j]['subset']) + if subsets_intersect: + warnings.warn(f'Memlet range overlap while writing to "{node}" in state "{state.label}"') + # Check read-write data races. + for write in writes: + for read in reads: + if (not nx.has_path(state.nx, read['node'], write['node']) and + subsets.intersects(write['subset'], read['subset'])): + warnings.warn(f'Memlet range overlap while writing to "{node}" in state "{state.label}"') + ######################################## diff --git a/tests/sdfg/warn_on_potential_data_race_test.py b/tests/sdfg/warn_on_potential_data_race_test.py new file mode 100644 index 0000000000..8f17409a2f --- /dev/null +++ b/tests/sdfg/warn_on_potential_data_race_test.py @@ -0,0 +1,316 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. + +import warnings +import dace +import pytest + +def test_memlet_range_not_overlap_ranges(): + sdfg = dace.SDFG('memlet_range_not_overlap_ranges') + state = sdfg.add_state() + N = dace.symbol("N", dtype=dace.int32) + sdfg.add_array("A", (N//2,), dace.int32) + A = state.add_access("A") + sdfg.add_array("B", (N,), dace.int32) + B = state.add_access("B") + state.add_mapped_tasklet( + name="first_tasklet", + code="b = a + 10", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "0:N//2"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + state.add_mapped_tasklet( + name="second_tasklet", + code="b = a - 20", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k+N//2")}, + map_ranges={"k": "0:N//2"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + + with warnings.catch_warnings(): + warnings.simplefilter("error", UserWarning) + with dace.config.set_temporary("experimental.check_race_conditions", value=True): + sdfg.validate() + + +def test_memlet_range_write_write_overlap_ranges(): + sdfg = dace.SDFG('memlet_range_overlap_ranges') + state = sdfg.add_state() + N = dace.symbol("N", dtype=dace.int32) + sdfg.add_array("A", (N,), dace.int32) + A = state.add_access("A") + sdfg.add_array("B", (N,), dace.int32) + B = state.add_access("B") + state.add_mapped_tasklet( + name="first_tasklet", + code="b = a + 10", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "0:N"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + state.add_mapped_tasklet( + name="second_tasklet", + code="b = a - 20", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "0:N"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + + with pytest.warns(UserWarning): + with dace.config.set_temporary("experimental.check_race_conditions", value=True): + sdfg.validate() + +def test_memlet_range_write_read_overlap_ranges(): + sdfg = dace.SDFG('memlet_range_write_read_overlap_ranges') + state = sdfg.add_state() + N = dace.symbol("N", dtype=dace.int32) + sdfg.add_array("A", (N,), dace.int32) + A_read = state.add_read("A") + A_write = state.add_write("A") + sdfg.add_array("B", (N,), dace.int32) + B = state.add_access("B") + sdfg.add_array("C", (N,), dace.int32) + C = state.add_access("C") + state.add_mapped_tasklet( + name="first_tasklet", + code="b = a + 10", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "0:N"}, + external_edges=True, + input_nodes={"A": A_read}, + output_nodes={"B": B} + ) + state.add_mapped_tasklet( + name="second_tasklet", + code="a = c - 20", + inputs={"c": dace.Memlet(data="C", subset="k")}, + outputs={"a": dace.Memlet(data="A", subset="k")}, + map_ranges={"k": "0:N"}, + external_edges=True, + input_nodes={"C": C}, + output_nodes={"A": A_write} + ) + + with pytest.warns(UserWarning): + with dace.config.set_temporary('experimental', 'check_race_conditions', value=True): + sdfg.validate() + +def test_memlet_overlap_ranges_two_access_nodes(): + sdfg = dace.SDFG('memlet_range_write_read_overlap_ranges') + state = sdfg.add_state() + N = dace.symbol("N", dtype=dace.int32) + sdfg.add_array("A", (N,), dace.int32) + A1 = state.add_access("A") + A2 = state.add_access("A") + sdfg.add_array("B", (N,), dace.int32) + B1 = state.add_access("B") + B2 = state.add_access("B") + + state.add_mapped_tasklet( + name="first_tasklet", + code="b = a + 10", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "0:N"}, + external_edges=True, + input_nodes={"A": A1}, + output_nodes={"B": B1} + ) + state.add_mapped_tasklet( + name="second_tasklet", + code="b = a - 20", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "0:N"}, + external_edges=True, + input_nodes={"A": A2}, + output_nodes={"B": B2} + ) + + with pytest.warns(UserWarning): + with dace.config.set_temporary('experimental', 'check_race_conditions', value=True): + sdfg.validate() + +def test_memlet_overlap_symbolic_ranges(): + sdfg = dace.SDFG('memlet_overlap_symbolic_ranges') + state = sdfg.add_state() + N = dace.symbol("N", dtype=dace.int32) + sdfg.add_array("A", (2*N,), dace.int32) + A = state.add_access("A") + sdfg.add_array("B", (2*N,), dace.int32) + B = state.add_access("B") + + state.add_mapped_tasklet( + name="first_tasklet", + code="b = a + 10", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "0:N"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + state.add_mapped_tasklet( + name="second_tasklet", + code="b = a - 20", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "0:2*N"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + + with pytest.warns(UserWarning): + with dace.config.set_temporary('experimental', 'check_race_conditions', value=True): + sdfg.validate() + +def test_constant_memlet_overlap(): + sdfg = dace.SDFG('constant_memlet_overlap') + state = sdfg.add_state() + sdfg.add_array("A", (12,), dace.int32) + A = state.add_access("A") + sdfg.add_array("B", (12,), dace.int32) + B = state.add_access("B") + + state.add_mapped_tasklet( + name="first_tasklet", + code="b = a + 10", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "3:10"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + state.add_mapped_tasklet( + name="second_tasklet", + code="b = a - 20", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "6:12"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + + with pytest.warns(UserWarning): + with dace.config.set_temporary('experimental', 'check_race_conditions', value=True): + sdfg.validate() + +def test_constant_memlet_almost_overlap(): + sdfg = dace.SDFG('constant_memlet_almost_overlap') + state = sdfg.add_state() + sdfg.add_array("A", (20,), dace.int32) + A = state.add_access("A") + sdfg.add_array("B", (20,), dace.int32) + B = state.add_access("B") + + state.add_mapped_tasklet( + name="first_tasklet", + code="b = a + 10", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "3:10"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + state.add_mapped_tasklet( + name="second_tasklet", + code="b = a - 20", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="k")}, + map_ranges={"k": "10:20"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + + with warnings.catch_warnings(): + warnings.simplefilter("error", UserWarning) + with dace.config.set_temporary('experimental', 'check_race_conditions', value=True): + sdfg.validate() + +def test_elementwise_map(): + sdfg = dace.SDFG('elementwise_map') + state = sdfg.add_state() + sdfg.add_array("A", (20,), dace.int32) + A_read = state.add_read("A") + A_write = state.add_write("A") + + state.add_mapped_tasklet( + name="first_tasklet", + code="aa = a + 10", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"aa": dace.Memlet(data="A", subset="k")}, + map_ranges={"k": "0:20"}, + external_edges=True, + input_nodes={"A": A_read}, + output_nodes={"A": A_write} + ) + + with warnings.catch_warnings(): + warnings.simplefilter("error", UserWarning) + with dace.config.set_temporary('experimental', 'check_race_conditions', value=True): + sdfg.validate() + +def test_memlet_overlap_with_wcr(): + sdfg = dace.SDFG('memlet_overlap_with_wcr') + state = sdfg.add_state() + sdfg.add_array("A", (20,), dace.int32) + sdfg.add_array("B", (1,), dace.int32) + A = state.add_read("A") + B = state.add_write("B") + + state.add_mapped_tasklet( + name="first_reduction", + code="b = a", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="0", wcr="lambda old, new: old + new")}, + map_ranges={"k": "0:20"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + + state.add_mapped_tasklet( + name="second_reduction", + code="b = a", + inputs={"a": dace.Memlet(data="A", subset="k")}, + outputs={"b": dace.Memlet(data="B", subset="0", wcr="lambda old, new: old + new")}, + map_ranges={"k": "0:20"}, + external_edges=True, + input_nodes={"A": A}, + output_nodes={"B": B} + ) + + with warnings.catch_warnings(): + warnings.simplefilter("error", UserWarning) + with dace.config.set_temporary('experimental', 'check_race_conditions', value=True): + sdfg.validate() + + +if __name__ == '__main__': + test_memlet_range_not_overlap_ranges() + test_memlet_range_write_write_overlap_ranges() + test_memlet_range_write_read_overlap_ranges() + test_memlet_overlap_ranges_two_access_nodes() + test_memlet_overlap_symbolic_ranges() + test_constant_memlet_overlap() + test_constant_memlet_almost_overlap() + test_elementwise_map() + test_memlet_overlap_with_wcr() From 7cb93f29820c8ad8caba5f75122d912192050f0f Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Tue, 29 Oct 2024 16:40:23 -0700 Subject: [PATCH 02/33] Python frontend stability and inline storage specification (#1711) The PR adds a new syntax to support inline storage specification with the `@` operator, supporting the following statements: `a = np.ones(M) @ dace.StorageType.CPU_ThreadLocal`. This PR also fixes multiple minor issues in the Python frontend: * `WarpTiling` did not respect sequential map schedules * Non-sequence inputs for `numpy.fill` variants (e.g., `numpy.zeros(N)`) * NumPy replacement syntax errors would sometimes not have source information * Fix type inference for nested scopes in Python frontend * Dynamic thread block scheduling does not support multi-dimensional maps * Default schedule inference should use dynamic thread blocks if they exist * Type hints with storage type not being adhered to by the Python frontend * Validation issue #1562 The following changes were added as skipped tests and deferred to future PRs: * Dynamic map range related issues: Fix deferred to #1696 * Dynamic thread block scheduling would not pass object to nested functions: Fix deferred to future PR, see #1189 for more information --- dace/codegen/targets/cuda.py | 74 ++++++------ dace/codegen/tools/type_inference.py | 4 +- dace/dtypes.py | 2 - dace/frontend/python/newast.py | 31 +++-- dace/frontend/python/replacements.py | 114 ++++++++---------- dace/sdfg/infer_types.py | 10 +- dace/sdfg/sdfg.py | 8 +- dace/sdfg/validation.py | 3 +- dace/transformation/dataflow/warp_tiling.py | 4 + dace/transformation/helpers.py | 17 ++- tests/dynamic_tb_map_cudatest.py | 70 ++++++++++- tests/numpy/array_creation_test.py | 49 +++++++- tests/numpy/map_syntax_test.py | 52 ++++++++ .../device_annotations_test.py | 49 ++++++-- tests/sdfg/cycles_test.py | 19 ++- 15 files changed, 370 insertions(+), 136 deletions(-) diff --git a/dace/codegen/targets/cuda.py b/dace/codegen/targets/cuda.py index f080f2cc62..1cf8919d74 100644 --- a/dace/codegen/targets/cuda.py +++ b/dace/codegen/targets/cuda.py @@ -23,8 +23,8 @@ from dace.codegen.targets.target import IllegalCopy, TargetCodeGenerator, make_absolute from dace.config import Config from dace.frontend import operations -from dace.sdfg import (SDFG, ScopeSubgraphView, SDFGState, has_dynamic_map_inputs, - is_array_stream_view, is_devicelevel_gpu, nodes, scope_contains_scope) +from dace.sdfg import (SDFG, ScopeSubgraphView, SDFGState, has_dynamic_map_inputs, is_array_stream_view, + is_devicelevel_gpu, nodes, scope_contains_scope) from dace.sdfg import utils as sdutil from dace.sdfg.graph import MultiConnectorEdge from dace.sdfg.state import ControlFlowRegion, StateSubgraphView @@ -68,6 +68,7 @@ def __init__(self, frame_codegen: 'DaCeCodeGenerator', sdfg: SDFG): dispatcher = self._dispatcher self.create_grid_barrier = False + self.dynamic_tbmap_type = None self.extra_nsdfg_args = [] CUDACodeGen._in_device_code = False self._cpu_codegen: Optional['CPUCodeGen'] = None @@ -892,8 +893,8 @@ def increment(streams): return max_streams, max_events - def _emit_copy(self, state_id: int, src_node: nodes.Node, src_storage: dtypes.StorageType, - dst_node: nodes.Node, dst_storage: dtypes.StorageType, dst_schedule: dtypes.ScheduleType, + def _emit_copy(self, state_id: int, src_node: nodes.Node, src_storage: dtypes.StorageType, dst_node: nodes.Node, + dst_storage: dtypes.StorageType, dst_schedule: dtypes.ScheduleType, edge: Tuple[nodes.Node, str, nodes.Node, str, Memlet], sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgraphView, callsite_stream: CodeIOStream) -> None: u, uconn, v, vconn, memlet = edge @@ -1163,11 +1164,8 @@ def _emit_copy(self, state_id: int, src_node: nodes.Node, src_storage: dtypes.St copysize=', '.join(_topy(copy_shape)), is_async='true' if state_dfg.out_degree(dst_node) == 0 else 'false', accum=accum or '::Copy', - args=', '.join( - [src_expr] + _topy(src_strides) + [dst_expr] + _topy(dst_strides) + custom_reduction - ) - ), - cfg, state_id, [src_node, dst_node]) + args=', '.join([src_expr] + _topy(src_strides) + [dst_expr] + _topy(dst_strides) + + custom_reduction)), cfg, state_id, [src_node, dst_node]) else: callsite_stream.write( (' {func}<{type}, {bdims}, {copysize}, ' + @@ -1236,8 +1234,12 @@ def _begin_streams(self, sdfg, state): result.add(e.dst._cuda_stream) return result - def generate_state(self, sdfg: SDFG, cfg: ControlFlowRegion, state: SDFGState, - function_stream: CodeIOStream, callsite_stream: CodeIOStream, + def generate_state(self, + sdfg: SDFG, + cfg: ControlFlowRegion, + state: SDFGState, + function_stream: CodeIOStream, + callsite_stream: CodeIOStream, generate_state_footer: bool = False) -> None: # Two modes: device-level state and if this state has active streams if CUDACodeGen._in_device_code: @@ -1361,8 +1363,7 @@ def generate_devicelevel_state(self, sdfg: SDFG, cfg: ControlFlowRegion, state: "&& threadIdx.x == 0) " "{ // sub-graph begin", cfg, state.block_id) elif write_scope == 'block': - callsite_stream.write("if (threadIdx.x == 0) " - "{ // sub-graph begin", cfg, state.block_id) + callsite_stream.write("if (threadIdx.x == 0) " "{ // sub-graph begin", cfg, state.block_id) else: callsite_stream.write("{ // subgraph begin", cfg, state.block_id) else: @@ -1985,16 +1986,13 @@ def generate_kernel_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_scope: S # allocating shared memory for dynamic threadblock maps if has_dtbmap: - kernel_stream.write( - '__shared__ dace::' - 'DynamicMap<{fine_grained}, {block_size}>' - '::shared_type dace_dyn_map_shared;'.format( - fine_grained=('true' - if Config.get_bool('compiler', 'cuda', 'dynamic_map_fine_grained') else 'false'), - block_size=functools.reduce( - (lambda x, y: x * y), - [int(x) for x in Config.get('compiler', 'cuda', 'dynamic_map_block_size').split(',')])), cfg, - state_id, node) + self.dynamic_tbmap_type = ( + f'dace::DynamicMap<{"true" if Config.get_bool("compiler", "cuda", "dynamic_map_fine_grained") else "false"}, ' + f'{functools.reduce((lambda x, y: x * y), [int(x) for x in Config.get("compiler", "cuda", "dynamic_map_block_size").split(",")])}>' + '::shared_type') + kernel_stream.write(f'__shared__ {self.dynamic_tbmap_type} dace_dyn_map_shared;', cfg, state_id, node) + else: + self.dynamic_tbmap_type = None # Add extra opening brace (dynamic map ranges, closed in MapExit # generator) @@ -2072,8 +2070,8 @@ def generate_kernel_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_scope: S # Generate conditions for this block's execution using min and max # element, e.g., skipping out-of-bounds threads in trailing block - # unless thsi is handled by another map down the line - if (not has_tbmap and not has_dtbmap and node.map.schedule != dtypes.ScheduleType.GPU_Persistent): + # unless this is handled by another map down the line + if ((not has_tbmap or has_dtbmap) and node.map.schedule != dtypes.ScheduleType.GPU_Persistent): dsym_end = [d + bs - 1 for d, bs in zip(dsym, self._block_dims)] minels = krange.min_element() maxels = krange.max_element() @@ -2090,10 +2088,12 @@ def generate_kernel_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_scope: S condition += '%s < %s' % (v, _topy(maxel + 1)) if len(condition) > 0: self._kernel_grid_conditions.append(f'if ({condition}) {{') - kernel_stream.write('if (%s) {' % condition, cfg, state_id, scope_entry) + if not has_dtbmap: + kernel_stream.write('if (%s) {' % condition, cfg, state_id, scope_entry) else: self._kernel_grid_conditions.append('{') - kernel_stream.write('{', cfg, state_id, scope_entry) + if not has_dtbmap: + kernel_stream.write('{', cfg, state_id, scope_entry) self._dispatcher.dispatch_subgraph(sdfg, cfg, @@ -2112,6 +2112,7 @@ def generate_kernel_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_scope: S self._kernel_state = None CUDACodeGen._in_device_code = False self._grid_dims = None + self.dynamic_tbmap_type = None def get_next_scope_entries(self, dfg, scope_entry): parent_scope_entry = dfg.entry_node(scope_entry) @@ -2179,10 +2180,8 @@ def generate_devicelevel_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_sco current_sdfg = current_state.parent if not outer_scope: raise ValueError(f'Failed to find the outer scope of {scope_entry}') - callsite_stream.write( - 'if ({} < {}) {{'.format(outer_scope.map.params[0], - _topy(subsets.Range(outer_scope.map.range[::-1]).max_element()[0] + 1)), cfg, - state_id, scope_entry) + for cond in self._kernel_grid_conditions: + callsite_stream.write(cond, cfg, state_id, scope_entry) # NOTE: Dynamic map inputs must be defined both outside and inside the dynamic Map schedule. # They define inside the schedule the bounds of the any nested Maps. @@ -2205,8 +2204,9 @@ def generate_devicelevel_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_sco '__dace_dynmap_begin = {begin};\n' '__dace_dynmap_end = {end};'.format(begin=dynmap_begin, end=dynmap_end), cfg, state_id, scope_entry) - # close if - callsite_stream.write('}', cfg, state_id, scope_entry) + # Close kernel grid conditions + for _ in self._kernel_grid_conditions: + callsite_stream.write('}', cfg, state_id, scope_entry) callsite_stream.write( 'dace::DynamicMap<{fine_grained}, {bsize}>::' @@ -2215,7 +2215,7 @@ def generate_devicelevel_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_sco 'auto {param}) {{'.format(fine_grained=('true' if Config.get_bool( 'compiler', 'cuda', 'dynamic_map_fine_grained') else 'false'), bsize=total_block_size, - kmapIdx=outer_scope.map.params[0], + kmapIdx=outer_scope.map.params[-1], param=dynmap_var), cfg, state_id, scope_entry) for e in dace.sdfg.dynamic_map_inputs(dfg, scope_entry): @@ -2556,8 +2556,8 @@ def generate_devicelevel_scope(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg_sco for cond in self._kernel_grid_conditions: callsite_stream.write(cond, cfg, state_id, scope_entry) - def generate_node(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgraphView, state_id: int, - node: nodes.Node, function_stream: CodeIOStream, callsite_stream: CodeIOStream) -> None: + def generate_node(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgraphView, state_id: int, node: nodes.Node, + function_stream: CodeIOStream, callsite_stream: CodeIOStream) -> None: if self.node_dispatch_predicate(sdfg, dfg, node): # Dynamically obtain node generator according to class name gen = getattr(self, '_generate_' + type(node).__name__, False) @@ -2594,6 +2594,8 @@ def generate_nsdfg_arguments(self, sdfg, cfg, dfg, state, node): result = self._cpu_codegen.generate_nsdfg_arguments(sdfg, cfg, dfg, state, node) if self.create_grid_barrier: result.append(('cub::GridBarrier&', '__gbar', '__gbar')) + if self.dynamic_tbmap_type: + result.append((f'{self.dynamic_tbmap_type}&', 'dace_dyn_map_shared', 'dace_dyn_map_shared')) # Add data from nested SDFGs to kernel arguments result.extend([(atype, aname, aname) for atype, aname, _ in self.extra_nsdfg_args]) diff --git a/dace/codegen/tools/type_inference.py b/dace/codegen/tools/type_inference.py index 893866522f..8f8dd84151 100644 --- a/dace/codegen/tools/type_inference.py +++ b/dace/codegen/tools/type_inference.py @@ -9,7 +9,7 @@ import numpy as np import ast -from dace import dtypes +from dace import data, dtypes from dace import symbolic from dace.codegen import cppunparse from dace.symbolic import symbol, SymExpr, symstr @@ -286,6 +286,8 @@ def _Name(t, symbols, inferred_symbols): inferred_type = dtypes.typeclass(inferred_type.type) elif isinstance(inferred_type, symbolic.symbol): inferred_type = inferred_type.dtype + elif isinstance(inferred_type, data.Data): + inferred_type = inferred_type.dtype elif t_id in inferred_symbols: inferred_type = inferred_symbols[t_id] return inferred_type diff --git a/dace/dtypes.py b/dace/dtypes.py index c5f9bb4732..a016ac60e2 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -1,10 +1,8 @@ # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. """ A module that contains various DaCe type definitions. """ -from __future__ import print_function import ctypes import aenum import inspect -import itertools import numpy import re from collections import OrderedDict diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py index cacf15d785..78890c9cdd 100644 --- a/dace/frontend/python/newast.py +++ b/dace/frontend/python/newast.py @@ -1489,19 +1489,19 @@ def _symbols_from_params(self, params: List[Tuple[str, Union[str, dtypes.typecla else: values = str(val).split(':') if len(values) == 1: - result[name] = symbolic.symbol(name, infer_expr_type(values[0], {**self.globals, **dyn_inputs})) + result[name] = symbolic.symbol(name, infer_expr_type(values[0], {**self.defined, **dyn_inputs})) elif len(values) == 2: result[name] = symbolic.symbol( name, dtypes.result_type_of(infer_expr_type(values[0], { - **self.globals, + **self.defined, **dyn_inputs }), infer_expr_type(values[1], { - **self.globals, + **self.defined, **dyn_inputs }))) elif len(values) == 3: - result[name] = symbolic.symbol(name, infer_expr_type(values[0], {**self.globals, **dyn_inputs})) + result[name] = symbolic.symbol(name, infer_expr_type(values[0], {**self.defined, **dyn_inputs})) else: raise DaceSyntaxError( self, None, "Invalid number of arguments in a range iterator. " @@ -3258,18 +3258,23 @@ def visit_AnnAssign(self, node: ast.AnnAssign): dtype = astutils.evalnode(node.annotation, {**self.globals, **self.defined}) if isinstance(dtype, data.Data): simple_type = dtype.dtype + storage = dtype.storage else: simple_type = dtype + storage = dtypes.StorageType.Default if not isinstance(simple_type, dtypes.typeclass): raise TypeError except: dtype = None + storage = dtypes.StorageType.Default type_name = rname(node.annotation) warnings.warn('typeclass {} is not supported'.format(type_name)) if node.value is None and dtype is not None: # Annotating type without assignment self.annotated_types[rname(node.target)] = dtype return - self._visit_assign(node, node.target, None, dtype=dtype) + results = self._visit_assign(node, node.target, None, dtype=dtype) + if storage != dtypes.StorageType.Default: + self.sdfg.arrays[results[0][0]].storage = storage def _visit_assign(self, node, node_target, op, dtype=None, is_return=False): # Get targets (elts) and results @@ -3563,6 +3568,8 @@ def _visit_assign(self, node, node_target, op, dtype=None, is_return=False): self.cfg_target.add_edge(self.last_block, output_indirection, dace.sdfg.InterstateEdge()) self.last_block = output_indirection + return results + def visit_AugAssign(self, node: ast.AugAssign): self._visit_assign(node, node.target, augassign_ops[type(node.op).__name__]) @@ -4623,10 +4630,16 @@ def visit_Call(self, node: ast.Call, create_callbacks=False): self._add_state('call_%d' % node.lineno) self.last_block.set_default_lineinfo(self.current_lineinfo) - if found_ufunc: - result = func(self, node, self.sdfg, self.last_block, ufunc_name, args, keywords) - else: - result = func(self, self.sdfg, self.last_block, *args, **keywords) + try: + if found_ufunc: + result = func(self, node, self.sdfg, self.last_block, ufunc_name, args, keywords) + else: + result = func(self, self.sdfg, self.last_block, *args, **keywords) + except DaceSyntaxError as ex: + # Attach source information to exception + if ex.node is None: + ex.node = node + raise self.last_block.set_default_lineinfo(None) diff --git a/dace/frontend/python/replacements.py b/dace/frontend/python/replacements.py index 5e6118a34b..537fef97bf 100644 --- a/dace/frontend/python/replacements.py +++ b/dace/frontend/python/replacements.py @@ -322,24 +322,30 @@ def _numpy_full(pv: ProgramVisitor, is_data = True vtype = sdfg.arrays[fill_value].dtype dtype = dtype or vtype + + # Handle one-dimensional inputs + if isinstance(shape, (Number, str)) or symbolic.issymbolic(shape): + shape = [shape] + + if any(isinstance(s, str) for s in shape): + raise DaceSyntaxError( + pv, None, f'Data-dependent shape {shape} is currently not allowed. Only constants ' + 'and symbolic values can be used.') + name, _ = sdfg.add_temp_transient(shape, dtype) if is_data: state.add_mapped_tasklet( - '_numpy_full_', { - "__i{}".format(i): "0: {}".format(s) - for i, s in enumerate(shape) - }, + '_numpy_full_', {"__i{}".format(i): "0: {}".format(s) + for i, s in enumerate(shape)}, dict(__inp=dace.Memlet(data=fill_value, subset='0')), "__out = __inp", dict(__out=dace.Memlet.simple(name, ",".join(["__i{}".format(i) for i in range(len(shape))]))), external_edges=True) else: state.add_mapped_tasklet( - '_numpy_full_', { - "__i{}".format(i): "0: {}".format(s) - for i, s in enumerate(shape) - }, {}, + '_numpy_full_', {"__i{}".format(i): "0: {}".format(s) + for i, s in enumerate(shape)}, {}, "__out = {}".format(fill_value), dict(__out=dace.Memlet.simple(name, ",".join(["__i{}".format(i) for i in range(len(shape))]))), external_edges=True) @@ -459,10 +465,8 @@ def _numpy_flip(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str, axis inpidx = ','.join([f'__i{i}' for i in range(ndim)]) outidx = ','.join([f'{s} - __i{i} - 1' if a else f'__i{i}' for i, (a, s) in enumerate(zip(axis, desc.shape))]) state.add_mapped_tasklet(name="_numpy_flip_", - map_ranges={ - f'__i{i}': f'0:{s}:1' - for i, s in enumerate(desc.shape) - }, + map_ranges={f'__i{i}': f'0:{s}:1' + for i, s in enumerate(desc.shape)}, inputs={'__inp': Memlet(f'{arr}[{inpidx}]')}, code='__out = __inp', outputs={'__out': Memlet(f'{arr_copy}[{outidx}]')}, @@ -532,10 +536,8 @@ def _numpy_rot90(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str, k=1 outidx = ','.join(out_indices) state.add_mapped_tasklet(name="_rot90_", - map_ranges={ - f'__i{i}': f'0:{s}:1' - for i, s in enumerate(desc.shape) - }, + map_ranges={f'__i{i}': f'0:{s}:1' + for i, s in enumerate(desc.shape)}, inputs={'__inp': Memlet(f'{arr}[{inpidx}]')}, code='__out = __inp', outputs={'__out': Memlet(f'{arr_copy}[{outidx}]')}, @@ -644,7 +646,8 @@ def _elementwise(pv: 'ProgramVisitor', else: state.add_mapped_tasklet( name="_elementwise_", - map_ranges={f'__i{dim}': f'0:{N}' for dim, N in enumerate(inparr.shape)}, + map_ranges={f'__i{dim}': f'0:{N}' + for dim, N in enumerate(inparr.shape)}, inputs={'__inp': Memlet.simple(in_array, ','.join([f'__i{dim}' for dim in range(len(inparr.shape))]))}, code=code, outputs={'__out': Memlet.simple(out_array, ','.join([f'__i{dim}' for dim in range(len(inparr.shape))]))}, @@ -694,10 +697,8 @@ def _simple_call(sdfg: SDFG, state: SDFGState, inpname: str, func: str, restype: else: state.add_mapped_tasklet( name=func, - map_ranges={ - '__i%d' % i: '0:%s' % n - for i, n in enumerate(inparr.shape) - }, + map_ranges={'__i%d' % i: '0:%s' % n + for i, n in enumerate(inparr.shape)}, inputs={'__inp': Memlet.simple(inpname, ','.join(['__i%d' % i for i in range(len(inparr.shape))]))}, code='__out = {f}(__inp)'.format(f=func), outputs={'__out': Memlet.simple(outname, ','.join(['__i%d' % i for i in range(len(inparr.shape))]))}, @@ -1046,27 +1047,22 @@ def _argminmax(pv: ProgramVisitor, code = "__init = _val_and_idx(val={}, idx=-1)".format( dtypes.min_value(a_arr.dtype) if func == 'max' else dtypes.max_value(a_arr.dtype)) - nest.add_state().add_mapped_tasklet(name="_arg{}_convert_".format(func), - map_ranges={ - '__i%d' % i: '0:%s' % n - for i, n in enumerate(a_arr.shape) if i != axis - }, - inputs={}, - code=code, - outputs={ - '__init': - Memlet.simple( - reduced_structs, - ','.join('__i%d' % i for i in range(len(a_arr.shape)) if i != axis)) - }, - external_edges=True) + nest.add_state().add_mapped_tasklet( + name="_arg{}_convert_".format(func), + map_ranges={'__i%d' % i: '0:%s' % n + for i, n in enumerate(a_arr.shape) if i != axis}, + inputs={}, + code=code, + outputs={ + '__init': Memlet.simple(reduced_structs, + ','.join('__i%d' % i for i in range(len(a_arr.shape)) if i != axis)) + }, + external_edges=True) nest.add_state().add_mapped_tasklet( name="_arg{}_reduce_".format(func), - map_ranges={ - '__i%d' % i: '0:%s' % n - for i, n in enumerate(a_arr.shape) - }, + map_ranges={'__i%d' % i: '0:%s' % n + for i, n in enumerate(a_arr.shape)}, inputs={'__in': Memlet.simple(a, ','.join('__i%d' % i for i in range(len(a_arr.shape))))}, code="__out = _val_and_idx(idx={}, val=__in)".format("__i%d" % axis), outputs={ @@ -1086,10 +1082,8 @@ def _argminmax(pv: ProgramVisitor, nest.add_state().add_mapped_tasklet( name="_arg{}_extract_".format(func), - map_ranges={ - '__i%d' % i: '0:%s' % n - for i, n in enumerate(a_arr.shape) if i != axis - }, + map_ranges={'__i%d' % i: '0:%s' % n + for i, n in enumerate(a_arr.shape) if i != axis}, inputs={ '__in': Memlet.simple(reduced_structs, ','.join('__i%d' % i for i in range(len(a_arr.shape)) if i != axis)) @@ -1212,10 +1206,9 @@ def _unop(sdfg: SDFG, state: SDFGState, op1: str, opcode: str, opname: str): opcode = 'not' name, _ = sdfg.add_temp_transient(arr1.shape, restype, arr1.storage) - state.add_mapped_tasklet("_%s_" % opname, { - '__i%d' % i: '0:%s' % s - for i, s in enumerate(arr1.shape) - }, {'__in1': Memlet.simple(op1, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))}, + state.add_mapped_tasklet("_%s_" % opname, {'__i%d' % i: '0:%s' % s + for i, s in enumerate(arr1.shape)}, + {'__in1': Memlet.simple(op1, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))}, '__out = %s __in1' % opcode, {'__out': Memlet.simple(name, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))}, external_edges=True) @@ -4316,10 +4309,8 @@ def _ndarray_fill(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str, va shape = sdfg.arrays[arr].shape state.add_mapped_tasklet( '_numpy_fill_', - map_ranges={ - f"__i{dim}": f"0:{s}" - for dim, s in enumerate(shape) - }, + map_ranges={f"__i{dim}": f"0:{s}" + for dim, s in enumerate(shape)}, inputs=inputs, code=f"__out = {body}", outputs={'__out': dace.Memlet.simple(arr, ",".join([f"__i{dim}" for dim in range(len(shape))]))}, @@ -4544,6 +4535,13 @@ def _ndarray_astype(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str, return _datatype_converter(sdfg, state, arr, dtype)[0] +@oprepo.replaces_operator('Array', 'MatMult', otherclass='StorageType') +def _cast_storage(visitor: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, arr: str, stype: dace.StorageType) -> str: + desc = sdfg.arrays[arr] + desc.storage = stype + return arr + + # Replacements that need ufuncs ############################################### # TODO: Fix by separating to different modules and importing @@ -4747,13 +4745,7 @@ def _tensordot(pv: 'ProgramVisitor', @oprepo.replaces("cupy._core.core.ndarray") @oprepo.replaces("cupy.ndarray") -def _define_cupy_local( - pv: "ProgramVisitor", - sdfg: SDFG, - state: SDFGState, - shape: Shape, - dtype: typeclass -): +def _define_cupy_local(pv: "ProgramVisitor", sdfg: SDFG, state: SDFGState, shape: Shape, dtype: typeclass): """Defines a local array in a DaCe program.""" if not isinstance(shape, (list, tuple)): shape = [shape] @@ -4781,10 +4773,8 @@ def _cupy_full(pv: ProgramVisitor, name, _ = sdfg.add_temp_transient(shape, dtype, storage=dtypes.StorageType.GPU_Global) state.add_mapped_tasklet( - '_cupy_full_', { - "__i{}".format(i): "0: {}".format(s) - for i, s in enumerate(shape) - }, {}, + '_cupy_full_', {"__i{}".format(i): "0: {}".format(s) + for i, s in enumerate(shape)}, {}, "__out = {}".format(fill_value), dict(__out=dace.Memlet.simple(name, ",".join(["__i{}".format(i) for i in range(len(shape))]))), external_edges=True) diff --git a/dace/sdfg/infer_types.py b/dace/sdfg/infer_types.py index cf58cf76cc..97010e95a7 100644 --- a/dace/sdfg/infer_types.py +++ b/dace/sdfg/infer_types.py @@ -116,8 +116,7 @@ def infer_connector_types(sdfg: SDFG): for e in state.out_edges(node): cname = e.src_conn if cname and node.out_connectors[cname] is None: - raise TypeError('Ambiguous or uninferable type in' - ' connector "%s" of node "%s"' % (cname, node)) + raise TypeError('Ambiguous or uninferable type in' ' connector "%s" of node "%s"' % (cname, node)) ############################################################################# @@ -301,6 +300,12 @@ def _set_default_schedule_in_scope(state: SDFGState, else: child_schedule = _determine_child_schedule(parent_schedules) + # Special case for dynamic thread-block neighboring schedules + if child_schedule == dtypes.ScheduleType.GPU_ThreadBlock: + from dace.transformation.helpers import gpu_map_has_explicit_dyn_threadblocks # Avoid import loops + if gpu_map_has_explicit_dyn_threadblocks(state, parent_node): + child_schedule = dtypes.ScheduleType.GPU_ThreadBlock_Dynamic + # Set child schedule type in scope for node in child_nodes[parent_node]: # Set default schedule types @@ -393,6 +398,7 @@ def _get_storage_from_parent(data_name: str, sdfg: SDFG) -> dtypes.StorageType: raise ValueError(f'Could not find data descriptor {data_name} in parent SDFG') + def infer_aliasing(node: nodes.NestedSDFG, sdfg: SDFG, state: SDFGState) -> None: """ Infers aliasing information on nested SDFG arrays based on external edges and connectors. diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 38a41236a6..f25a6e24d5 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -761,13 +761,13 @@ def add_symbol(self, name, stype, find_new_name: bool = False): if name in self.symbols: raise FileExistsError(f'Symbol "{name}" already exists in SDFG') if name in self.arrays: - raise FileExistsError(f'Can not create symbol "{name}", the name is used by a data descriptor.') + raise FileExistsError(f'Cannot create symbol "{name}", the name is used by a data descriptor.') if name in self._subarrays: - raise FileExistsError(f'Can not create symbol "{name}", the name is used by a subarray.') + raise FileExistsError(f'Cannot create symbol "{name}", the name is used by a subarray.') if name in self._rdistrarrays: - raise FileExistsError(f'Can not create symbol "{name}", the name is used by a RedistrArray.') + raise FileExistsError(f'Cannot create symbol "{name}", the name is used by a RedistrArray.') if name in self._pgrids: - raise FileExistsError(f'Can not create symbol "{name}", the name is used by a ProcessGrid.') + raise FileExistsError(f'Cannot create symbol "{name}", the name is used by a ProcessGrid.') if not isinstance(stype, dtypes.typeclass): stype = dtypes.dtype_to_typeclass(stype) self.symbols[name] = stype diff --git a/dace/sdfg/validation.py b/dace/sdfg/validation.py index f02a5003e9..2df9e17445 100644 --- a/dace/sdfg/validation.py +++ b/dace/sdfg/validation.py @@ -396,7 +396,6 @@ def validate_state(state: 'dace.sdfg.SDFGState', symbols = symbols or {} initialized_transients = (initialized_transients if initialized_transients is not None else {'__pystate'}) references = references or set() - scope = state.scope_dict() # Obtain whether we are already in an accelerator context if not hasattr(context, 'in_gpu'): @@ -426,6 +425,8 @@ def validate_state(state: 'dace.sdfg.SDFGState', if state.has_cycles(): raise InvalidSDFGError('State should be acyclic but contains cycles', sdfg, state_id) + scope = state.scope_dict() + for nid, node in enumerate(state.nodes()): # Reference check if id(node) in references: diff --git a/dace/transformation/dataflow/warp_tiling.py b/dace/transformation/dataflow/warp_tiling.py index 362b51d9ac..f9091950e3 100644 --- a/dace/transformation/dataflow/warp_tiling.py +++ b/dace/transformation/dataflow/warp_tiling.py @@ -55,6 +55,10 @@ def apply(self, graph: SDFGState, sdfg: SDFG) -> nodes.MapEntry: # Stride and offset all internal maps maps_to_stride = xfh.get_internal_scopes(graph, new_me, immediate=True) for nstate, nmap in maps_to_stride: + # Skip sequential maps + if nmap.schedule == dtypes.ScheduleType.Sequential: + continue + nsdfg = nstate.parent nsdfg_node = nsdfg.parent_nsdfg_node diff --git a/dace/transformation/helpers.py b/dace/transformation/helpers.py index 6ca4602079..b7bf49e62b 100644 --- a/dace/transformation/helpers.py +++ b/dace/transformation/helpers.py @@ -934,11 +934,7 @@ def replicate_scope(sdfg: SDFG, state: SDFGState, scope: ScopeSubgraphView) -> S return ScopeSubgraphView(state, new_nodes, new_entry) -def offset_map(state: SDFGState, - entry: nodes.MapEntry, - dim: int, - offset: symbolic.SymbolicType, - negative: bool = True): +def offset_map(state: SDFGState, entry: nodes.MapEntry, dim: int, offset: symbolic.SymbolicType, negative: bool = True): """ Offsets a map parameter and its contents by a value. @@ -1270,6 +1266,17 @@ def gpu_map_has_explicit_threadblocks(state: SDFGState, entry: nodes.EntryNode) return False +def gpu_map_has_explicit_dyn_threadblocks(state: SDFGState, entry: nodes.EntryNode) -> bool: + """ + Returns True if GPU_Device map has explicit thread-block maps nested within. + """ + internal_maps = get_internal_scopes(state, entry) + if any(m.schedule == dtypes.ScheduleType.GPU_ThreadBlock_Dynamic for _, m in internal_maps): + return True + + return False + + def reconnect_edge_through_map( state: SDFGState, edge: graph.MultiConnectorEdge[Memlet], new_node: Union[nodes.EntryNode, nodes.ExitNode], keep_src: bool) -> Tuple[graph.MultiConnectorEdge[Memlet], graph.MultiConnectorEdge[Memlet]]: diff --git a/tests/dynamic_tb_map_cudatest.py b/tests/dynamic_tb_map_cudatest.py index b24e5f2ea6..edc1eac9f2 100644 --- a/tests/dynamic_tb_map_cudatest.py +++ b/tests/dynamic_tb_map_cudatest.py @@ -12,10 +12,8 @@ @dace.program(dace.uint32[H + 1], dace.uint32[nnz], dace.float32[nnz], dace.float32[W], dace.float32[H]) def spmv(A_row, A_col, A_val, x, b): - @dace.mapscope(_[0:H]) def compute_row(i): - @dace.map(_[A_row[i]:A_row[i + 1]]) def compute(j): a << A_val[j] @@ -292,8 +290,76 @@ def sddvm(D_vals: dace.float32[nnz_D], A2_crd: dace.int32[nnz_A], A2_pos: dace.i assert np.allclose(val, ref.data) +@pytest.mark.gpu +def test_dynamic_multidim_map(): + @dace.program + def tester(a: dace.float32[H, W, nnz]): + A = dace.ndarray([H, W, nnz], dtype=dace.float32, storage=dace.StorageType.GPU_Global) + A[:] = a + for i, j in dace.map[0:H, 0:W] @ dace.ScheduleType.GPU_Device: + for k in dace.map[0:nnz] @ dace.ScheduleType.GPU_ThreadBlock_Dynamic: + A[i, j, k] = i * 110 + j * 11 + k + a[:] = A + + a = np.zeros((10, 11, 65), dtype=np.float32) + tester(a) + assert np.allclose(a, np.fromfunction(lambda i, j, k: i * 110 + j * 11 + k, (10, 11, 65), dtype=np.float32)) + + +@pytest.mark.skip('Nested maps with work-stealing thread-block schedule are currently unsupported') +def test_dynamic_nested_map(): + @dace.program + def nested2(A: dace.float32[W], i: dace.int32, j: dace.int32): + A[j] = i * 10 + j + + @dace.program + def nested1(A: dace.float32[W], i: dace.int32): + for j in dace.map[0:W] @ dace.ScheduleType.GPU_ThreadBlock_Dynamic: + nested2(A, i, j) + + @dace.program + def dynamic_nested_map(a: dace.float32[H, W]): + A = dace.ndarray([H, W], dtype=dace.float32, storage=dace.StorageType.GPU_Global) + A[:] = a + for i in dace.map[0:H] @ dace.ScheduleType.GPU_Device: + nested1(A[i], i) + + a[:] = A + + a = np.zeros((10, 11), dtype=np.float32) + sdfg = dynamic_nested_map.to_sdfg(simplify=False) + for _, _, arr in sdfg.arrays_recursive(): + if arr.storage in (dace.StorageType.GPU_Shared, dace.StorageType.Default): + arr.storage = dace.StorageType.Register + sdfg(a, H=10, W=11) + assert np.allclose(a, np.fromfunction(lambda i, j: i * 10 + j, (10, 11), dtype=np.float32)) + + +@pytest.mark.gpu +def test_dynamic_default_schedule(): + N = dace.symbol('N') + + @dace.program + def tester(a: dace.float32[N, 10]): + A = dace.ndarray([N, 10], dtype=dace.float32, storage=dace.StorageType.GPU_Global) + A[:] = a + for i in dace.map[0:N] @ dace.ScheduleType.GPU_Device: + smem = np.empty((10, ), dtype=np.float32) @ dace.StorageType.GPU_Shared + smem[:] = 1 + for j in dace.map[0:10] @ dace.ScheduleType.GPU_ThreadBlock_Dynamic: + A[i, j] = i * 65 + smem[j] + a[:] = A + + a = np.zeros((65, 10), dtype=np.float32) + tester(a) + assert np.allclose(a, np.fromfunction(lambda i, j: i * 65 + 1, (65, 10), dtype=np.float32)) + + if __name__ == '__main__': test_dynamic_map() test_dynamic_maps() test_nested_dynamic_map() test_dynamic_map_with_step() + test_dynamic_multidim_map() + # test_dynamic_nested_map() + test_dynamic_default_schedule() diff --git a/tests/numpy/array_creation_test.py b/tests/numpy/array_creation_test.py index 85908c7a1f..7329b48b3f 100644 --- a/tests/numpy/array_creation_test.py +++ b/tests/numpy/array_creation_test.py @@ -1,7 +1,9 @@ # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. import dace +from dace.frontend.python.common import DaceSyntaxError import numpy as np from common import compare_numpy_output +import pytest # M = dace.symbol('M') # N = dace.symbol('N') @@ -154,7 +156,7 @@ def test_arange_6(): def program_strides_0(): A = dace.ndarray((2, 2), dtype=dace.int32, strides=(2, 1)) for i, j in dace.map[0:2, 0:2]: - A[i, j] = i * 2 + j + A[i, j] = i * 2 + j return A @@ -168,7 +170,7 @@ def test_strides_0(): def program_strides_1(): A = dace.ndarray((2, 2), dtype=dace.int32, strides=(4, 2)) for i, j in dace.map[0:2, 0:2]: - A[i, j] = i * 2 + j + A[i, j] = i * 2 + j return A @@ -182,7 +184,7 @@ def test_strides_1(): def program_strides_2(): A = dace.ndarray((2, 2), dtype=dace.int32, strides=(1, 2)) for i, j in dace.map[0:2, 0:2]: - A[i, j] = i * 2 + j + A[i, j] = i * 2 + j return A @@ -196,7 +198,7 @@ def test_strides_2(): def program_strides_3(): A = dace.ndarray((2, 2), dtype=dace.int32, strides=(2, 4)) for i, j in dace.map[0:2, 0:2]: - A[i, j] = i * 2 + j + A[i, j] = i * 2 + j return A @@ -206,6 +208,42 @@ def test_strides_3(): assert np.allclose(A, [[0, 1], [2, 3]]) +def test_zeros_symbolic_size_scalar(): + K = dace.symbol('K') + + @dace.program + def zeros_symbolic_size(): + return np.zeros((K), dtype=np.uint32) + + out = zeros_symbolic_size(K=10) + assert (list(out.shape) == [10]) + assert (out.dtype == np.uint32) + + +def test_ones_scalar_size_scalar(): + + @dace.program + def ones_scalar_size(k: dace.int32): + a = np.ones(k, dtype=np.uint32) + return np.sum(a) + + with pytest.raises(DaceSyntaxError): + out = ones_scalar_size(20) + assert out == 20 + + +def test_ones_scalar_size(): + + @dace.program + def ones_scalar_size(k: dace.int32): + a = np.ones((k, k), dtype=np.uint32) + return np.sum(a) + + with pytest.raises(DaceSyntaxError): + out = ones_scalar_size(20) + assert out == 20 * 20 + + if __name__ == "__main__": test_empty() test_empty_like1() @@ -233,3 +271,6 @@ def test_strides_3(): test_strides_1() test_strides_2() test_strides_3() + test_zeros_symbolic_size_scalar() + test_ones_scalar_size_scalar() + test_ones_scalar_size() diff --git a/tests/numpy/map_syntax_test.py b/tests/numpy/map_syntax_test.py index fe7af1d644..27a0cfe018 100644 --- a/tests/numpy/map_syntax_test.py +++ b/tests/numpy/map_syntax_test.py @@ -1,6 +1,7 @@ # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. import numpy as np import dace +import pytest M, N, K = (dace.symbol(name) for name in ['M', 'N', 'K']) @@ -35,6 +36,57 @@ def test_map_python(): assert np.allclose(A[:, 1:], B[:, 1:]) +@pytest.mark.skip('Fails due to bug in Python frontend') +def test_nested_map_with_indirection(): + N = dace.symbol('N') + + @dace.program + def indirect_to_indirect(arr1: dace.float64[N], ind: dace.int32[10], arr2: dace.float64[N]): + for i in dace.map[0:9]: + begin, end, stride = ind[i], ind[i + 1], 1 + for _ in dace.map[0:1]: + for j in dace.map[begin:end:stride]: + arr2[j] = arr1[j] + i + + a = np.random.rand(50) + b = np.zeros(50) + ind = np.array([0, 5, 10, 15, 20, 25, 30, 35, 40, 45], dtype=np.int32) + sdfg = indirect_to_indirect.to_sdfg(simplify=False) + sdfg(a, ind, b) + + ref = np.zeros(50) + for i in range(9): + begin, end = ind[i], ind[i + 1] + ref[begin:end] = a[begin:end] + i + + assert np.allclose(b, ref) + + +@pytest.mark.skip('Fails due to bug in Python frontend') +def test_dynamic_map_range_scalar(): + """ + From issue #650. + """ + + @dace.program + def test(A: dace.float64[20], B: dace.float64[20]): + N = dace.define_local_scalar(dace.int32) + N = 5 + for i in dace.map[0:N]: + for j in dace.map[0:N]: + with dace.tasklet: + a << A[i] + b >> B[j] + b = a + 1 + + A = np.random.rand(20) + B = np.zeros(20) + test(A, B) + assert np.allclose(B[:5], A[:5] + 1) + + if __name__ == '__main__': test_copy3d() test_map_python() + # test_nested_map_with_indirection() + # test_dynamic_map_range_scalar() diff --git a/tests/python_frontend/device_annotations_test.py b/tests/python_frontend/device_annotations_test.py index 65c8501b23..d6b512f00b 100644 --- a/tests/python_frontend/device_annotations_test.py +++ b/tests/python_frontend/device_annotations_test.py @@ -1,16 +1,19 @@ -# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved. +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. import dace import pytest +import numpy as np from dace.dtypes import StorageType, DeviceType, ScheduleType from dace import dtypes -cupy = pytest.importorskip("cupy") +try: + import cupy +except (ImportError, ModuleNotFoundError): + cupy = None @pytest.mark.gpu def test_storage(): - @dace.program def add(X: dace.float32[32, 32] @ StorageType.GPU_Global): return X + 1 @@ -46,7 +49,6 @@ def add2(X: dace.float32[32, 32] @ StorageType.GPU_Global): @pytest.mark.gpu def test_pythonmode(): - def runs_on_gpu(a: dace.float64[20] @ StorageType.GPU_Global, b: dace.float64[20] @ StorageType.GPU_Global): # This map will become a GPU kernel for i in dace.map[0:20] @ ScheduleType.GPU_Device: @@ -58,7 +60,40 @@ def runs_on_gpu(a: dace.float64[20] @ StorageType.GPU_Global, b: dace.float64[20 assert cupy.allclose(gpu_b, gpu_a + 1) +def test_inline_storage_hint(): + N = dace.symbol('N') + + @dace.program + def tester(): + b = np.ones(N, dtype=np.float32) @ dace.StorageType.CPU_ThreadLocal + return b + 1 + + sdfg = tester.to_sdfg(simplify=False) + assert sdfg.arrays['b'].storage == StorageType.CPU_ThreadLocal + + b = tester(N=10) + assert np.allclose(b, 2) + + +def test_annotated_storage_hint(): + N = dace.symbol('N') + + @dace.program + def tester(): + b: dace.float32[N] @ dace.StorageType.CPU_ThreadLocal = np.ones(N, dtype=np.float32) + return b + 1 + + sdfg = tester.to_sdfg(simplify=False) + assert sdfg.arrays['b'].storage == StorageType.CPU_ThreadLocal + + b = tester(N=10) + assert np.allclose(b, 2) + + if __name__ == "__main__": - test_storage() - test_schedule() - test_pythonmode() + if cupy is not None: + test_storage() + test_schedule() + test_pythonmode() + test_inline_storage_hint() + test_annotated_storage_hint() diff --git a/tests/sdfg/cycles_test.py b/tests/sdfg/cycles_test.py index 480392ab2d..b01aec55fd 100644 --- a/tests/sdfg/cycles_test.py +++ b/tests/sdfg/cycles_test.py @@ -2,7 +2,7 @@ import pytest import dace - +from dace.sdfg.validation import InvalidSDFGError def test_cycles(): with pytest.raises(ValueError, match="Found cycles.*"): @@ -29,6 +29,23 @@ def test_cycles_memlet_path(): sdfg.validate() +def test_cycles_1562(): + """ + Test for issue #1562. + """ + with pytest.raises(InvalidSDFGError, match="cycles"): + sdfg = dace.SDFG("foo") + state = sdfg.add_state() + mentry_2, mexit_2 = state.add_map("map_2", dict(i="0:9")) + mentry_6, mexit_6 = state.add_map("map_6", dict(i="0:9")) + mentry_8, mexit_8 = state.add_map("map_8", dict(i="0:9")) + state.add_edge(mentry_8, "OUT_0", mentry_6, "IN_0", dace.Memlet(data="bla", subset='0:9')) + state.add_edge(mentry_6, "OUT_0", mentry_2, "IN_0", dace.Memlet(data="bla", subset='0:9')) + state.add_edge(mentry_2, "OUT_0", mentry_6, "IN_0", dace.Memlet(data="bla", subset='0:9')) + sdfg.validate() + + if __name__ == '__main__': test_cycles() test_cycles_memlet_path() + test_cycles_1562() From 1d8a693ef9a76af21b53e3a97c18cbf66b7ad92b Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Wed, 30 Oct 2024 06:31:02 +0100 Subject: [PATCH 03/33] infer_symbols_from_datadescriptor : modification to infer offset (#1525) Small modification to infer offset on top of shape and strides from the data descriptor (needed in gt4py) --- dace/frontend/python/parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dace/frontend/python/parser.py b/dace/frontend/python/parser.py index d99be1265d..d03759fa8e 100644 --- a/dace/frontend/python/parser.py +++ b/dace/frontend/python/parser.py @@ -92,14 +92,15 @@ def infer_symbols_from_datadescriptor(sdfg: SDFG, desc = sdfg.arrays[arg_name] if not hasattr(desc, 'shape') or not hasattr(arg_val, 'shape'): continue - symbolic_values = list(desc.shape) + list(getattr(desc, 'strides', [])) + symbolic_values = list(desc.shape) + list(getattr(desc, 'strides', [])) + list(getattr(desc, 'offset', [])) given_values = list(arg_val.shape) given_strides = [] if hasattr(arg_val, 'strides'): # NumPy arrays use bytes in strides factor = getattr(arg_val, 'itemsize', 1) given_strides = [s // factor for s in arg_val.strides] - given_values += given_strides + given_offset = [o for o in arg_val.offset] if hasattr(arg_val, 'offset') else [] + given_values += given_strides + given_offset for sym_dim, real_dim in zip(symbolic_values, given_values): repldict = {} From 3c164c44900a4b89685ff77105b6e63f2ed9759b Mon Sep 17 00:00:00 2001 From: Yakup Koray Budanaz Date: Wed, 30 Oct 2024 06:31:49 +0100 Subject: [PATCH 04/33] Add CFG to generate_scope in tutorials (#1706) The DFG is missing from codegen tutorial, resulting with an error in codegen. Adding cfg as a param fixes it. --- tutorials/codegen.ipynb | 44 +++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/tutorials/codegen.ipynb b/tutorials/codegen.ipynb index a6effd7996..2c79f1a2e0 100644 --- a/tutorials/codegen.ipynb +++ b/tutorials/codegen.ipynb @@ -480,48 +480,50 @@ " self.frame = frame_codegen\n", " # Can be used to dispatch other code generators for allocation/nodes\n", " self.dispatcher = frame_codegen.dispatcher\n", - " \n", + "\n", " ################################################################\n", - " # Register handlers/hooks through dispatcher: Can be used for \n", + " # Register handlers/hooks through dispatcher: Can be used for\n", " # nodes, memory copy/allocation, scopes, states, and more.\n", - " \n", + "\n", " # In this case, register scopes\n", " self.dispatcher.register_map_dispatcher(dace.ScheduleType.LoopyLoop, self)\n", - " \n", + "\n", " # You can similarly use register_{array,copy,node,state}_dispatcher\n", - " \n", - " # A scope dispatcher will trigger a method called generate_scope whenever \n", + "\n", + " # A scope dispatcher will trigger a method called generate_scope whenever\n", " # an SDFG has a scope with that schedule\n", - " def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView,\n", - " state_id: int, function_stream: CodeIOStream,\n", - " callsite_stream: CodeIOStream):\n", + " def generate_scope(self, sdfg: dace.SDFG, cfg: dace.ControlFlowRegion,\n", + " scope: ScopeSubgraphView, state_id: int,\n", + " function_stream: CodeIOStream, callsite_stream: CodeIOStream):\n", " # The parameters here are:\n", " # sdfg: The SDFG we are currently generating.\n", + " # cfg: The current control flow graph (CFG) we are currently generating. For example, + " it can be the SDFG or a loop region. " # scope: The subgraph of the state containing only the scope (map contents)\n", " # we want to generate the code for.\n", - " # state_id: The state in the SDFG the subgraph is taken from (i.e., \n", + " # state_id: The state in the SDFG the subgraph is taken from (i.e.,\n", " # `sdfg.node(state_id)` is the same as `scope.graph`)\n", " # function_stream: A cursor to the global code (which can be used to define\n", " # functions, hence the name).\n", " # callsite_stream: A cursor to the current location in the code, most of\n", " # the code is generated here.\n", - " \n", + "\n", " # We can get the map entry node from the scope graph\n", " entry_node = scope.source_nodes()[0]\n", - " \n", + "\n", " # First, generate an opening brace (for instrumentation and dynamic map ranges)\n", " callsite_stream.write('{', sdfg, state_id, entry_node)\n", - " \n", + "\n", " ################################################################\n", - " # Generate specific code: We will generate a reversed loop with a \n", + " # Generate specific code: We will generate a reversed loop with a\n", " # comment for each dimension of the map. For the sake of simplicity,\n", " # dynamic map ranges are not supported.\n", - " \n", + "\n", " for param, rng in zip(entry_node.map.params, entry_node.map.range):\n", " # We use the sym2cpp function from the cpp support functions\n", " # to convert symbolic expressions to proper C++\n", " begin, end, stride = (sym2cpp(r) for r in rng)\n", - " \n", + "\n", " # Every write is optionally (but recommended to be) tagged with\n", " # 1-3 extra arguments, serving as line information to match\n", " # SDFG, state, and graph nodes/edges to written code.\n", @@ -529,17 +531,17 @@ " for (auto {param} = {end}; {param} >= {begin}; {param} -= {stride}) {{''',\n", " sdfg, state_id, entry_node\n", " )\n", - " \n", + "\n", " # NOTE: CodeIOStream will automatically take care of indentation for us.\n", - " \n", - " \n", + "\n", + "\n", " # Now that the loops have been defined, use the dispatcher to invoke any\n", " # code generator (including this one) that is registered to deal with\n", " # the internal nodes in the subgraph. We skip the MapEntry node.\n", - " self.dispatcher.dispatch_subgraph(sdfg, scope, state_id,\n", + " self.dispatcher.dispatch_subgraph(sdfg, cfg, scope, state_id,\n", " function_stream, callsite_stream,\n", " skip_entry_node=True)\n", - " \n", + "\n", " # NOTE: Since skip_exit_node above is set to False, closing braces will\n", " # be automatically generated" ] From 1343a6e6440d808644ffccd5937ae1fcb136b92e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20M=C3=BCller?= <147368808+philip-paul-mueller@users.noreply.github.com> Date: Wed, 30 Oct 2024 16:51:40 +0100 Subject: [PATCH 05/33] Better `CopyToMap` (#1675) By default the transformation uses some linearization followed by a delinearization approach, while this is needed to copy certain shapes, it is unnecessarily complicated for memlets such as `a[0:10, 20:30] -> 40:50, 60:70`. This PR adds special cases where the source and destination subset has the same size and transforms it to a simple copy. It also supports the case where some dimensions are one, i.e. memlets such as `a[0:10, 0:10] -> 0:10, 1, 0:20`. For all cases tests were added. Most importantly the transformation now applies if the strides are the same. Before this case was blocked. This PR helps to _avoid_ errors that are related to [Issue#1674](https://github.com/spcl/dace/issues/1674), but it is not a fix or a solution to it. --- dace/transformation/dataflow/copy_to_map.py | 90 ++++++++--- tests/transformations/copy_to_map_test.py | 164 +++++++++++++++++++- 2 files changed, 229 insertions(+), 25 deletions(-) diff --git a/dace/transformation/dataflow/copy_to_map.py b/dace/transformation/dataflow/copy_to_map.py index 5b4260ad55..9c4dbce627 100644 --- a/dace/transformation/dataflow/copy_to_map.py +++ b/dace/transformation/dataflow/copy_to_map.py @@ -1,12 +1,13 @@ # Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved. -from dace import dtypes, symbolic, data, subsets, Memlet +from dace import dtypes, symbolic, data, subsets, Memlet, properties from dace.sdfg.scope import is_devicelevel_gpu from dace.transformation import transformation as xf from dace.sdfg import SDFGState, SDFG, nodes, utils as sdutil from typing import Tuple +import itertools - +@properties.make_properties class CopyToMap(xf.SingleStateTransformation): """ Converts an access node -> access node copy into a map. Useful for generating manual code and @@ -14,6 +15,10 @@ class CopyToMap(xf.SingleStateTransformation): """ a = xf.PatternNode(nodes.AccessNode) b = xf.PatternNode(nodes.AccessNode) + ignore_strides = properties.Property( + default=False, + desc='Ignore the stride of the data container; Defaults to `False`.', + ) @classmethod def expressions(cls): @@ -31,7 +36,10 @@ def can_be_applied(self, graph: SDFGState, expr_index: int, sdfg: SDFG, permissi if isinstance(self.b.desc(sdfg), data.View): if sdutil.get_view_node(graph, self.b) == self.a: return False - if self.a.desc(sdfg).strides == self.b.desc(sdfg).strides: + if (not self.ignore_strides) and self.a.desc(sdfg).strides == self.b.desc(sdfg).strides: + return False + # Ensures that the edge goes from `a` -> `b`. + if not any(edge.dst is self.b for edge in graph.out_edges(self.a)): return False return True @@ -62,31 +70,69 @@ def delinearize_linearize(self, desc: data.Array, copy_shape: Tuple[symbolic.Sym return subsets.Range([(ind, ind, 1) for ind in cur_index]) def apply(self, state: SDFGState, sdfg: SDFG): - adesc = self.a.desc(sdfg) - bdesc = self.b.desc(sdfg) - edge = state.edges_between(self.a, self.b)[0] + avnode = self.a + av = avnode.data + adesc = avnode.desc(sdfg) + bvnode = self.b + bv = bvnode.data + bdesc = bvnode.desc(sdfg) + + edge = state.edges_between(avnode, bvnode)[0] + src_subset = edge.data.get_src_subset(edge, state) + if src_subset is None: + src_subset = subsets.Range.from_array(adesc) + src_subset_size = src_subset.size() + red_src_subset_size = tuple(s for s in src_subset_size if s != 1) + + dst_subset = edge.data.get_dst_subset(edge, state) + if dst_subset is None: + dst_subset = subsets.Range.from_array(bdesc) + dst_subset_size = dst_subset.size() + red_dst_subset_size = tuple(s for s in dst_subset_size if s != 1) if len(adesc.shape) >= len(bdesc.shape): - copy_shape = edge.data.get_src_subset(edge, state).size() + copy_shape = src_subset_size copy_a = True else: - copy_shape = edge.data.get_dst_subset(edge, state).size() + copy_shape = dst_subset_size copy_a = False - maprange = {f'__i{i}': (0, s - 1, 1) for i, s in enumerate(copy_shape)} - - av = self.a.data - bv = self.b.data - avnode = self.a - bvnode = self.b - - # Linearize and delinearize to get index expression for other side - if copy_a: - a_index = [symbolic.pystr_to_symbolic(f'__i{i}') for i in range(len(copy_shape))] - b_index = self.delinearize_linearize(bdesc, copy_shape, edge.data.get_dst_subset(edge, state)) + if tuple(src_subset_size) == tuple(dst_subset_size): + # The two subsets have exactly the same shape, so we can just copying with an offset. + # We use another index variables for the tests only. + maprange = {f'__j{i}': (0, s - 1, 1) for i, s in enumerate(copy_shape)} + a_index = [symbolic.pystr_to_symbolic(f'__j{i} + ({src_subset[i][0]})') for i in range(len(copy_shape))] + b_index = [symbolic.pystr_to_symbolic(f'__j{i} + ({dst_subset[i][0]})') for i in range(len(copy_shape))] + elif red_src_subset_size == red_dst_subset_size and (len(red_dst_subset_size) > 0): + # If we remove all size 1 dimensions that the two subsets have the same size. + # This is essentially the memlet `a[0:10, 2, 0:10] -> 0:10, 10:20` + # We use another index variable only for the tests but we would have to + # recreate the index anyways. + maprange = {f'__j{i}': (0, s - 1, 1) for i, s in enumerate(red_src_subset_size)} + cnt = itertools.count(0) + a_index = [ + symbolic.pystr_to_symbolic(f'{src_subset[i][0]}') + if s == 1 + else symbolic.pystr_to_symbolic(f'__j{next(cnt)} + ({src_subset[i][0]})') + for i, s in enumerate(src_subset_size) + ] + cnt = itertools.count(0) + b_index = [ + symbolic.pystr_to_symbolic(f'{dst_subset[i][0]}') + if s == 1 + else symbolic.pystr_to_symbolic(f'__j{next(cnt)} + ({dst_subset[i][0]})') + for i, s in enumerate(dst_subset_size) + ] else: - a_index = self.delinearize_linearize(adesc, copy_shape, edge.data.get_src_subset(edge, state)) - b_index = [symbolic.pystr_to_symbolic(f'__i{i}') for i in range(len(copy_shape))] + # We have to delinearize and linearize + # We use another index variable for the tests. + maprange = {f'__i{i}': (0, s - 1, 1) for i, s in enumerate(copy_shape)} + if copy_a: + a_index = [symbolic.pystr_to_symbolic(f'__i{i}') for i in range(len(copy_shape))] + b_index = self.delinearize_linearize(bdesc, copy_shape, edge.data.get_dst_subset(edge, state)) + else: + a_index = self.delinearize_linearize(adesc, copy_shape, edge.data.get_src_subset(edge, state)) + b_index = [symbolic.pystr_to_symbolic(f'__i{i}') for i in range(len(copy_shape))] a_subset = subsets.Range([(ind, ind, 1) for ind in a_index]) b_subset = subsets.Range([(ind, ind, 1) for ind in b_index]) @@ -101,7 +147,7 @@ def apply(self, state: SDFGState, sdfg: SDFG): schedule = dtypes.ScheduleType.GPU_Device # Add copy map - t, _, _ = state.add_mapped_tasklet('copy', + t, _, _ = state.add_mapped_tasklet(f'copy_{av}_{bv}', maprange, dict(__inp=Memlet(data=av, subset=a_subset)), '__out = __inp', diff --git a/tests/transformations/copy_to_map_test.py b/tests/transformations/copy_to_map_test.py index 2b237d84d5..a0931fa1b8 100644 --- a/tests/transformations/copy_to_map_test.py +++ b/tests/transformations/copy_to_map_test.py @@ -4,6 +4,8 @@ import copy import pytest import numpy as np +import re +from typing import Tuple, Optional def _copy_to_map(storage: dace.StorageType): @@ -102,9 +104,165 @@ def test_preprocess(): assert np.allclose(out, inp) +def _perform_non_lin_delin_test( + sdfg: dace.SDFG, +) -> bool: + """Performs test for the special case CopyToMap that bypasses linearizing and delinearaziong. + """ + assert sdfg.number_of_nodes() == 1 + state: dace.SDFGState = sdfg.states()[0] + assert state.number_of_nodes() == 2 + assert state.number_of_edges() == 1 + assert all(isinstance(node, dace.nodes.AccessNode) for node in state.nodes()) + sdfg.validate() + + a = np.random.rand(*sdfg.arrays["a"].shape) + b_unopt = np.random.rand(*sdfg.arrays["b"].shape) + b_opt = b_unopt.copy() + sdfg(a=a, b=b_unopt) + + nb_runs = sdfg.apply_transformations_repeated(CopyToMap, validate=True, options={"ignore_strides": True}) + assert nb_runs == 1, f"Expected 1 application, but {nb_runs} were performed." + + # Now looking for the tasklet and checking if the memlets follows the expected + # simple pattern. + tasklet: dace.nodes.Tasklet = next(iter([node for node in state.nodes() if isinstance(node, dace.nodes.Tasklet)])) + pattern: re.Pattern = re.compile(r"(__j[0-9])|(__j[0-9]+\s*\+\s*[0-9]+)|([0-9]+)") + + assert state.in_degree(tasklet) == 1 + assert state.out_degree(tasklet) == 1 + in_edge = next(iter(state.in_edges(tasklet))) + out_edge = next(iter(state.out_edges(tasklet))) + + assert all(pattern.fullmatch(str(idxs[0]).strip()) for idxs in in_edge.data.src_subset), f"IN: {in_edge.data.src_subset}" + assert all(pattern.fullmatch(str(idxs[0]).strip()) for idxs in out_edge.data.dst_subset), f"OUT: {out_edge.data.dst_subset}" + + # Now call it again after the optimization. + sdfg(a=a, b=b_opt) + assert np.allclose(b_unopt, b_opt) + + return True + +def _make_non_lin_delin_sdfg( + shape_a: Tuple[int, ...], + shape_b: Optional[Tuple[int, ...]] = None +) -> Tuple[dace.SDFG, dace.SDFGState, dace.nodes.AccessNode, dace.nodes.AccessNode]: + + if shape_b is None: + shape_b = shape_a + + sdfg = dace.SDFG("bypass1") + state = sdfg.add_state(is_start_block=True) + + ac = [] + for name, shape in [('a', shape_a), ('b', shape_b)]: + sdfg.add_array( + name=name, + shape=shape, + dtype=dace.float64, + transient=False, + ) + ac.append(state.add_access(name)) + + return sdfg, state, ac[0], ac[1] + + +def test_non_lin_delin_1(): + sdfg, state, a, b = _make_non_lin_delin_sdfg((10, 10)) + state.add_nedge( + a, + b, + dace.Memlet("a[0:10, 0:10] -> [0:10, 0:10]"), + ) + _perform_non_lin_delin_test(sdfg) + +def test_non_lin_delin_2(): + sdfg, state, a, b = _make_non_lin_delin_sdfg((10, 10), (100, 100)) + state.add_nedge( + a, + b, + dace.Memlet("a[0:10, 0:10] -> [50:60, 40:50]"), + ) + _perform_non_lin_delin_test(sdfg) + + +def test_non_lin_delin_3(): + sdfg, state, a, b = _make_non_lin_delin_sdfg((100, 100), (100, 100)) + state.add_nedge( + a, + b, + dace.Memlet("a[1:11, 20:30] -> [50:60, 40:50]"), + ) + _perform_non_lin_delin_test(sdfg) + + +def test_non_lin_delin_4(): + sdfg, state, a, b = _make_non_lin_delin_sdfg((100, 4, 100), (100, 100)) + state.add_nedge( + a, + b, + dace.Memlet("a[1:11, 2, 20:30] -> [50:60, 40:50]"), + ) + _perform_non_lin_delin_test(sdfg) + + +def test_non_lin_delin_5(): + sdfg, state, a, b = _make_non_lin_delin_sdfg((100, 4, 100), (100, 10, 100)) + state.add_nedge( + a, + b, + dace.Memlet("a[1:11, 2, 20:30] -> [50:60, 4, 40:50]"), + ) + _perform_non_lin_delin_test(sdfg) + + +def test_non_lin_delin_6(): + sdfg, state, a, b = _make_non_lin_delin_sdfg((100, 100), (100, 10, 100)) + state.add_nedge( + a, + b, + dace.Memlet("a[1:11, 20:30] -> [50:60, 4, 40:50]"), + ) + _perform_non_lin_delin_test(sdfg) + + +def test_non_lin_delin_7(): + sdfg, state, a, b = _make_non_lin_delin_sdfg((10, 10), (20, 20)) + state.add_nedge( + a, + b, + dace.Memlet("b[5:15, 6:16]"), + ) + _perform_non_lin_delin_test(sdfg) + + +def test_non_lin_delin_8(): + sdfg, state, a, b = _make_non_lin_delin_sdfg((20, 20), (10, 10)) + state.add_nedge( + a, + b, + dace.Memlet("a[5:15, 6:16]"), + ) + _perform_non_lin_delin_test(sdfg) + + if __name__ == '__main__': + test_non_lin_delin_1() + test_non_lin_delin_2() + test_non_lin_delin_3() + test_non_lin_delin_4() + test_non_lin_delin_5() + test_non_lin_delin_6() + test_non_lin_delin_7() + test_non_lin_delin_8() + test_copy_to_map() - test_copy_to_map_gpu() test_flatten_to_map() - test_flatten_to_map_gpu() - test_preprocess() + try: + import cupy + test_copy_to_map_gpu() + test_flatten_to_map_gpu() + test_preprocess() + except ModuleNotFoundError as E: + if "'cupy'" not in str(E): + raise From 2811e40486f8ed6c21f348abfa93747b8edd6215 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Wed, 30 Oct 2024 12:12:38 -0700 Subject: [PATCH 06/33] More NumPy operation implementations (#1498) * Concatenation and stacking (numpy.concatenate, numpy.stack, and their variants) * numpy.linspace * Fix nested attribute parsing (Fixes #1295) * numpy.clip * numpy.split and its variants * numpy.full variants (zeros, ones, etc.) with a single value for shape (`np.zeros(N)`) * NumPy-compatible numpy.arange dtype inference * `numpy.fft.{fft, ifft}` --- dace/codegen/cppunparse.py | 8 +- dace/distr_types.py | 4 + dace/frontend/common/distr.py | 32 +- dace/frontend/python/newast.py | 57 +- dace/frontend/python/preprocessing.py | 2 + dace/frontend/python/replacements.py | 641 ++++++++++++++++++-- dace/libraries/blas/nodes/gemv.py | 16 +- dace/libraries/fft/__init__.py | 6 + dace/libraries/fft/algorithms/__init__.py | 0 dace/libraries/fft/algorithms/dft.py | 45 ++ dace/libraries/fft/environments/__init__.py | 2 + dace/libraries/fft/environments/cufft.py | 21 + dace/libraries/fft/nodes/__init__.py | 2 + dace/libraries/fft/nodes/fft.py | 204 +++++++ dace/libraries/standard/nodes/transpose.py | 31 +- tests/library/fft_test.py | 101 +++ tests/numpy/array_creation_test.py | 42 ++ tests/numpy/attention_simple_test.py | 2 +- tests/numpy/attribute_test.py | 43 ++ tests/numpy/concat_test.py | 133 ++++ tests/numpy/nested_call_subarray_test.py | 4 +- tests/numpy/split_test.py | 142 +++++ tests/numpy/ufunc_test.py | 6 + 23 files changed, 1458 insertions(+), 86 deletions(-) create mode 100644 dace/libraries/fft/__init__.py create mode 100644 dace/libraries/fft/algorithms/__init__.py create mode 100644 dace/libraries/fft/algorithms/dft.py create mode 100644 dace/libraries/fft/environments/__init__.py create mode 100644 dace/libraries/fft/environments/cufft.py create mode 100644 dace/libraries/fft/nodes/__init__.py create mode 100644 dace/libraries/fft/nodes/fft.py create mode 100644 tests/library/fft_test.py create mode 100644 tests/numpy/concat_test.py create mode 100644 tests/numpy/split_test.py diff --git a/dace/codegen/cppunparse.py b/dace/codegen/cppunparse.py index 18ee00721b..edeb5270ca 100644 --- a/dace/codegen/cppunparse.py +++ b/dace/codegen/cppunparse.py @@ -749,6 +749,8 @@ def _Num(self, t): # For complex values, use ``dtype_to_typeclass`` if isinstance(t_n, complex): dtype = dtypes.dtype_to_typeclass(complex) + repr_n = f'{dtype}({t_n.real}, {t_n.imag})' + # Handle large integer values if isinstance(t_n, int): @@ -765,10 +767,8 @@ def _Num(self, t): elif bits >= 64: warnings.warn(f'Value wider than 64 bits encountered in expression ({t_n}), emitting as-is') - if repr_n.endswith("j"): - self.write("%s(0, %s)" % (dtype, repr_n.replace("inf", INFSTR)[:-1])) - else: - self.write(repr_n.replace("inf", INFSTR)) + repr_n = repr_n.replace("inf", INFSTR) + self.write(repr_n) def _List(self, t): raise NotImplementedError('Invalid C++') diff --git a/dace/distr_types.py b/dace/distr_types.py index 1b595a1b84..b60eb4925e 100644 --- a/dace/distr_types.py +++ b/dace/distr_types.py @@ -96,6 +96,10 @@ def _validate(self): raise ValueError('Color must have only logical true (1) or false (0) values.') return True + @property + def dtype(self): + return type(self) + def to_json(self): attrs = serialize.all_properties_to_json(self) retdict = {"type": type(self).__name__, "attributes": attrs} diff --git a/dace/frontend/common/distr.py b/dace/frontend/common/distr.py index 88a6b0c54a..c517028d53 100644 --- a/dace/frontend/common/distr.py +++ b/dace/frontend/common/distr.py @@ -50,14 +50,14 @@ def _cart_create(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, dims: Shape @oprepo.replaces_method('Intracomm', 'Create_cart') -def _intracomm_create(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: 'Intracomm', dims: ShapeType): +def _intracomm_create(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: str, dims: ShapeType): """ Equivalent to `dace.comm.Cart_create(dims). :param dims: Shape of the process-grid (see `dims` parameter of `MPI_Cart_create`), e.g., [2, 3, 3]. :return: Name of the new process-grid descriptor. """ from mpi4py import MPI - icomm_name, icomm_obj = icomm + icomm_name, icomm_obj = icomm, pv.globals[icomm] if icomm_obj != MPI.COMM_WORLD: raise ValueError('Only the mpi4py.MPI.COMM_WORLD Intracomm is supported in DaCe Python programs.') return _cart_create(pv, sdfg, state, dims) @@ -186,13 +186,13 @@ def _bcast(pv: ProgramVisitor, def _intracomm_bcast(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, - comm: Tuple[str, 'Comm'], + comm: str, buffer: str, root: Union[str, sp.Expr, Number] = 0): """ Equivalent to `dace.comm.Bcast(buffer, root)`. """ from mpi4py import MPI - comm_name, comm_obj = comm + comm_name, comm_obj = comm, pv.globals[comm] if comm_obj == MPI.COMM_WORLD: return _bcast(pv, sdfg, state, buffer, root) # NOTE: Highly experimental @@ -267,12 +267,12 @@ def _alltoall(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, inbuffer: str, @oprepo.replaces_method('Intracomm', 'Alltoall') -def _intracomm_alltoall(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: 'Intracomm', inp_buffer: str, +def _intracomm_alltoall(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: str, inp_buffer: str, out_buffer: str): """ Equivalent to `dace.comm.Alltoall(inp_buffer, out_buffer)`. """ from mpi4py import MPI - icomm_name, icomm_obj = icomm + icomm_name, icomm_obj = icomm, pv.globals[icomm] if icomm_obj != MPI.COMM_WORLD: raise ValueError('Only the mpi4py.MPI.COMM_WORLD Intracomm is supported in DaCe Python programs.') return _alltoall(pv, sdfg, state, inp_buffer, out_buffer) @@ -303,12 +303,12 @@ def _allreduce(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, buffer: str, op @oprepo.replaces_method('Intracomm', 'Allreduce') -def _intracomm_allreduce(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: 'Intracomm', inp_buffer: 'InPlace', +def _intracomm_allreduce(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: str, inp_buffer: 'InPlace', out_buffer: str, op: str): """ Equivalent to `dace.comm.Allreduce(out_buffer, op)`. """ from mpi4py import MPI - icomm_name, icomm_obj = icomm + icomm_name, icomm_obj = icomm, pv.globals[icomm] if icomm_obj != MPI.COMM_WORLD: raise ValueError('Only the mpi4py.MPI.COMM_WORLD Intracomm is supported in DaCe Python programs.') if inp_buffer != MPI.IN_PLACE: @@ -470,12 +470,12 @@ def _send(pv: ProgramVisitor, @oprepo.replaces_method('Intracomm', 'Send') -def _intracomm_send(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: 'Intracomm', buffer: str, +def _intracomm_send(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: str, buffer: str, dst: Union[str, sp.Expr, Number], tag: Union[str, sp.Expr, Number]): """ Equivalent to `dace.comm.end(buffer, dst, tag)`. """ from mpi4py import MPI - icomm_name, icomm_obj = icomm + icomm_name, icomm_obj = icomm, pv.globals[icomm] if icomm_obj != MPI.COMM_WORLD: raise ValueError('Only the mpi4py.MPI.COMM_WORLD Intracomm is supported in DaCe Python programs.') return _send(pv, sdfg, state, buffer, dst, tag) @@ -592,12 +592,12 @@ def _isend(pv: ProgramVisitor, @oprepo.replaces_method('Intracomm', 'Isend') -def _intracomm_isend(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: 'Intracomm', buffer: str, +def _intracomm_isend(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: str, buffer: str, dst: Union[str, sp.Expr, Number], tag: Union[str, sp.Expr, Number]): """ Equivalent to `dace.comm.Isend(buffer, dst, tag, req)`. """ from mpi4py import MPI - icomm_name, icomm_obj = icomm + icomm_name, icomm_obj = icomm, pv.globals[icomm] if icomm_obj != MPI.COMM_WORLD: raise ValueError('Only the mpi4py.MPI.COMM_WORLD Intracomm is supported in DaCe Python programs.') req, _ = sdfg.add_array("isend_req", [1], dace.dtypes.opaque("MPI_Request"), transient=True, find_new_name=True) @@ -690,12 +690,12 @@ def _recv(pv: ProgramVisitor, @oprepo.replaces_method('Intracomm', 'Recv') -def _intracomm_Recv(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: 'Intracomm', buffer: str, +def _intracomm_Recv(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: str, buffer: str, src: Union[str, sp.Expr, Number], tag: Union[str, sp.Expr, Number]): """ Equivalent to `dace.comm.Recv(buffer, src, tagq)`. """ from mpi4py import MPI - icomm_name, icomm_obj = icomm + icomm_name, icomm_obj = icomm, pv.globals[icomm] if icomm_obj != MPI.COMM_WORLD: raise ValueError('Only the mpi4py.MPI.COMM_WORLD Intracomm is supported in DaCe Python programs.') return _recv(pv, sdfg, state, buffer, src, tag) @@ -810,12 +810,12 @@ def _irecv(pv: ProgramVisitor, @oprepo.replaces_method('Intracomm', 'Irecv') -def _intracomm_irecv(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: 'Intracomm', buffer: str, +def _intracomm_irecv(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, icomm: str, buffer: str, src: Union[str, sp.Expr, Number], tag: Union[str, sp.Expr, Number]): """ Equivalent to `dace.comm.Irecv(buffer, src, tag, req)`. """ from mpi4py import MPI - icomm_name, icomm_obj = icomm + icomm_name, icomm_obj = icomm, pv.globals[icomm] if icomm_obj != MPI.COMM_WORLD: raise ValueError('Only the mpi4py.MPI.COMM_WORLD Intracomm is supported in DaCe Python programs.') req, _ = sdfg.add_array("irecv_req", [1], dace.dtypes.opaque("MPI_Request"), transient=True, find_new_name=True) diff --git a/dace/frontend/python/newast.py b/dace/frontend/python/newast.py index 78890c9cdd..b4e83cc1e7 100644 --- a/dace/frontend/python/newast.py +++ b/dace/frontend/python/newast.py @@ -1342,7 +1342,7 @@ def defined(self): # MPI-related stuff result.update({ - k: self.sdfg.process_grids[v] + v: self.sdfg.process_grids[v] for k, v in self.variables.items() if v in self.sdfg.process_grids }) try: @@ -4461,7 +4461,14 @@ def visit_Call(self, node: ast.Call, create_callbacks=False): func = node.func.value if func is None: - funcname = rname(node) + func_result = self.visit(node.func) + if isinstance(func_result, str): + if isinstance(node.func, ast.Attribute): + funcname = f'{func_result}.{node.func.attr}' + else: + funcname = func_result + else: + funcname = rname(node) # Check if the function exists as an SDFG in a different module modname = until(funcname, '.') if ('.' in funcname and len(modname) > 0 and modname in self.globals @@ -4576,7 +4583,7 @@ def visit_Call(self, node: ast.Call, create_callbacks=False): arg = self.scope_vars[modname] else: # Fallback to (name, object) - arg = (modname, self.defined[modname]) + arg = modname args.append(arg) # Otherwise, try to find a default implementation for the SDFG elif not found_ufunc: @@ -4795,12 +4802,18 @@ def _visitname(self, name: str, node: ast.AST): self.sdfg.add_symbol(result.name, result.dtype) return result + if name in self.closure.callbacks: + return name + if name in self.sdfg.arrays: return name if name in self.sdfg.symbols: return name + if name in __builtins__: + return name + if name not in self.scope_vars: raise DaceSyntaxError(self, node, 'Use of undefined variable "%s"' % name) rname = self.scope_vars[name] @@ -4845,33 +4858,43 @@ def visit_NameConstant(self, node: NameConstant): return self.visit_Constant(node) def visit_Attribute(self, node: ast.Attribute): - # If visiting an attribute, return attribute value if it's of an array or global - name = until(astutils.unparse(node), '.') - result = self._visitname(name, node) + result = self.visit(node.value) + if isinstance(result, (tuple, list, dict)): + if len(result) > 1: + raise DaceSyntaxError( + self, node.value, f'{type(result)} object cannot use attributes. Try storing the ' + 'object to a different variable first (e.g., ``a = result; a.attribute``') + else: + result = result[0] + tmpname = f"{result}.{astutils.unparse(node.attr)}" if tmpname in self.sdfg.arrays: return tmpname + if isinstance(result, str) and result in self.sdfg.arrays: arr = self.sdfg.arrays[result] elif isinstance(result, str) and result in self.scope_arrays: arr = self.scope_arrays[result] else: - return result + arr = None # Try to find sub-SDFG attribute - func = oprepo.Replacements.get_attribute(type(arr), node.attr) - if func is not None: - # A new state is likely needed here, e.g., for transposition (ndarray.T) - self._add_state('%s_%d' % (type(node).__name__, node.lineno)) - self.last_block.set_default_lineinfo(self.current_lineinfo) - result = func(self, self.sdfg, self.last_block, result) - self.last_block.set_default_lineinfo(None) - return result + if arr is not None: + func = oprepo.Replacements.get_attribute(type(arr), node.attr) + if func is not None: + # A new state is likely needed here, e.g., for transposition (ndarray.T) + self._add_state('%s_%d' % (type(node).__name__, node.lineno)) + self.last_block.set_default_lineinfo(self.current_lineinfo) + result = func(self, self.sdfg, self.last_block, result) + self.last_block.set_default_lineinfo(None) + return result # Otherwise, try to find compile-time attribute (such as shape) try: - return getattr(arr, node.attr) - except KeyError: + if arr is not None: + return getattr(arr, node.attr) + return getattr(result, node.attr) + except (AttributeError, KeyError): return result def visit_List(self, node: ast.List): diff --git a/dace/frontend/python/preprocessing.py b/dace/frontend/python/preprocessing.py index eca07a4930..f51b67ddb2 100644 --- a/dace/frontend/python/preprocessing.py +++ b/dace/frontend/python/preprocessing.py @@ -527,6 +527,8 @@ def global_value_to_node(self, elif isinstance(value, symbolic.symbol): # Symbols resolve to the symbol name newnode = ast.Name(id=value.name, ctx=ast.Load()) + elif isinstance(value, sympy.Basic): # Symbolic or constant expression + newnode = ast.parse(symbolic.symstr(value)).body[0].value elif isinstance(value, ast.Name): newnode = ast.Name(id=value.id, ctx=ast.Load()) elif (dtypes.isconstant(value) or isinstance(value, (StringLiteral, SDFG)) or hasattr(value, '__sdfg__')): diff --git a/dace/frontend/python/replacements.py b/dace/frontend/python/replacements.py index 537fef97bf..c5b3e3b2a2 100644 --- a/dace/frontend/python/replacements.py +++ b/dace/frontend/python/replacements.py @@ -313,6 +313,9 @@ def _numpy_full(pv: ProgramVisitor, """ Creates and array of the specified shape and initializes it with the fill value. """ + if isinstance(shape, Number) or symbolic.issymbolic(shape): + shape = [shape] + is_data = False if isinstance(fill_value, (Number, np.bool_)): vtype = dtypes.dtype_to_typeclass(type(fill_value)) @@ -548,8 +551,13 @@ def _numpy_rot90(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, arr: str, k=1 @oprepo.replaces('numpy.arange') @oprepo.replaces('dace.arange') -def _arange(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, *args, **kwargs): - """ Implementes numpy.arange """ +def _arange(pv: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + *args, + dtype: dtypes.typeclass = None, + like: Optional[str] = None): + """ Implements numpy.arange """ start = 0 stop = None @@ -563,35 +571,42 @@ def _arange(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, *args, **kwargs): else: start, stop, step = args + if isinstance(start, str): + raise TypeError(f'Cannot compile numpy.arange with a scalar start value "{start}" (only constants and symbolic ' + 'expressions are supported). Please use numpy.linspace instead.') + if isinstance(stop, str): + raise TypeError(f'Cannot compile numpy.arange with a scalar stop value "{stop}" (only constants and symbolic ' + 'expressions are supported). Please use numpy.linspace instead.') + if isinstance(step, str): + raise TypeError(f'Cannot compile numpy.arange with a scalar step value "{step}" (only constants and symbolic ' + 'expressions are supported). Please use numpy.linspace instead.') + actual_step = step if isinstance(start, Number) and isinstance(stop, Number): actual_step = type(start + step)(start + step) - start if any(not isinstance(s, Number) for s in [start, stop, step]): - shape = (symbolic.int_ceil(stop - start, step), ) + if step == 1: # Common case where ceiling is not necessary + shape = (stop - start,) + else: + shape = (symbolic.int_ceil(stop - start, step), ) else: shape = (np.int64(np.ceil((stop - start) / step)), ) - if not isinstance(shape[0], Number) and ('dtype' not in kwargs or kwargs['dtype'] == None): - raise NotImplementedError("The current implementation of numpy.arange requires that the output dtype is given " - "when at least one of (start, stop, step) is symbolic.") + # Infer dtype from input arguments + if dtype is None: + dtype, _ = _result_type(args) + # TODO: Unclear what 'like' does - # if 'like' in kwargs and kwargs['like'] != None: - # outname, outarr = sdfg.add_temp_transient_like(sdfg.arrays[kwargs['like']]) + # if like is not None: + # outname, outarr = sdfg.add_temp_transient_like(sdfg.arrays[like]) # outarr.shape = shape - if 'dtype' in kwargs and kwargs['dtype'] != None: - dtype = kwargs['dtype'] - if not isinstance(dtype, dtypes.typeclass): - dtype = dtypes.dtype_to_typeclass(dtype) - outname, outarr = sdfg.add_temp_transient(shape, dtype) - else: - # infer dtype based on args's dtype - # (since the `dtype` keyword argument isn't given, none of the arguments can be symbolic) - if any(isinstance(arg, (float, np.float32, np.float64)) for arg in args): - dtype = dtypes.float64 - else: - dtype = dtypes.int64 - outname, outarr = sdfg.add_temp_transient(shape, dtype) + if not isinstance(dtype, dtypes.typeclass): + dtype = dtypes.dtype_to_typeclass(dtype) + outname, outarr = sdfg.add_temp_transient(shape, dtype) + + start = f'decltype(__out)({start})' + actual_step = f'decltype(__out)({actual_step})' state.add_mapped_tasklet(name="_numpy_arange_", map_ranges={'__i': f"0:{shape[0]}"}, @@ -603,6 +618,131 @@ def _arange(pv: ProgramVisitor, sdfg: SDFG, state: SDFGState, *args, **kwargs): return outname +def _add_axis_to_shape(shape: Sequence[symbolic.SymbolicType], axis: int, + axis_value: Any) -> List[symbolic.SymbolicType]: + if axis > len(shape): + raise ValueError(f'axis {axis} is out of bounds for array of dimension {len(shape)}') + if axis < 0: + naxis = len(shape) + 1 + axis + if naxis < 0 or naxis > len(shape): + raise ValueError(f'axis {axis} is out of bounds for array of dimension {len(shape)}') + axis = naxis + + # Make a new shape list with the inserted dimension + new_shape = [None] * (len(shape) + 1) + for i in range(len(shape) + 1): + if i == axis: + new_shape[i] = axis_value + elif i < axis: + new_shape[i] = shape[i] + else: + new_shape[i] = shape[i - 1] + + return new_shape + + +@oprepo.replaces('numpy.linspace') +def _linspace(pv: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + start: Union[Number, symbolic.SymbolicType, str], + stop: Union[Number, symbolic.SymbolicType, str], + num: Union[Integral, symbolic.SymbolicType] = 50, + endpoint: bool = True, + retstep: bool = False, + dtype: dtypes.typeclass = None, + axis: int = 0): + """ Implements numpy.linspace """ + # Argument checks + if not isinstance(num, (Integral, sp.Basic)): + raise TypeError('numpy.linspace can only be compiled when the ``num`` argument is symbolic or constant.') + if not isinstance(axis, Integral): + raise TypeError('numpy.linspace can only be compiled when the ``axis`` argument is constant.') + + # Start and stop are broadcast together, then, a new dimension is added to axis (taken from ``ndim + 1``), + # along which the numbers are filled. + start_shape = sdfg.arrays[start].shape if (isinstance(start, str) and start in sdfg.arrays) else [] + stop_shape = sdfg.arrays[stop].shape if (isinstance(stop, str) and stop in sdfg.arrays) else [] + + shape, ranges, outind, ind1, ind2 = _broadcast_together(start_shape, stop_shape) + shape_with_axis = _add_axis_to_shape(shape, axis, num) + ranges_with_axis = _add_axis_to_shape(ranges, axis, ('__sind', f'0:{symbolic.symstr(num)}')) + if outind: + outind_with_axis = _add_axis_to_shape(outind.split(', '), axis, '__sind') + else: + outind_with_axis = ['__sind'] + + if dtype is None: + # Infer output type from start and stop + start_type = sdfg.arrays[start] if (isinstance(start, str) and start in sdfg.arrays) else start + stop_type = sdfg.arrays[stop] if (isinstance(stop, str) and stop in sdfg.arrays) else stop + + dtype, _ = _result_type((start_type, stop_type), 'Add') + + # From the NumPy documentation: The inferred dtype will never be an integer; float is chosen even if the + # arguments would produce an array of integers. + if dtype in (dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16, dtypes.uint32, + dtypes.uint64): + dtype = dtypes.dtype_to_typeclass(float) + + outname, _ = sdfg.add_temp_transient(shape_with_axis, dtype) + + if endpoint == True: + num -= 1 + + # Fill in input memlets as necessary + inputs = {} + if isinstance(start, str) and start in sdfg.arrays: + index = f'[{ind1}]' if ind1 else '' + inputs['__start'] = Memlet(f'{start}{index}') + startcode = '__start' + else: + startcode = symbolic.symstr(start) + + if isinstance(stop, str) and stop in sdfg.arrays: + index = f'[{ind2}]' if ind2 else '' + inputs['__stop'] = Memlet(f'{stop}{index}') + stopcode = '__stop' + else: + stopcode = symbolic.symstr(stop) + + # Create tasklet code based on inputs + code = f'__out = {startcode} + __sind * decltype(__out)({stopcode} - {startcode}) / decltype(__out)({symbolic.symstr(num)})' + + state.add_mapped_tasklet(name="linspace", + map_ranges=ranges_with_axis, + inputs=inputs, + code=code, + outputs={'__out': Memlet(f"{outname}[{','.join(outind_with_axis)}]")}, + external_edges=True) + + if retstep == False: + return outname + + # Return step if requested + + # Handle scalar outputs + if not ranges: + ranges = [('__unused', '0:1')] + out_index = f'[{outind}]' + + if len(shape) > 0: + stepname, _ = sdfg.add_temp_transient(shape, dtype) + else: + stepname, _ = sdfg.add_scalar(sdfg.temp_data_name(), dtype, transient=True) + out_index = '[0]' + + state.add_mapped_tasklet( + 'retstep', + ranges, + copy.deepcopy(inputs), + f'__out = decltype(__out)({stopcode} - {startcode}) / decltype(__out)({symbolic.symstr(num)})', + {'__out': Memlet(f"{stepname}{out_index}")}, + external_edges=True) + + return outname, stepname + + @oprepo.replaces('elementwise') @oprepo.replaces('dace.elementwise') def _elementwise(pv: 'ProgramVisitor', @@ -708,9 +848,9 @@ def _simple_call(sdfg: SDFG, state: SDFGState, inpname: str, func: str, restype: def _complex_to_scalar(complex_type: dace.typeclass): - if complex_type is dace.complex64: + if complex_type == dace.complex64: return dace.float32 - elif complex_type is dace.complex128: + elif complex_type == dace.complex128: return dace.float64 else: return complex_type @@ -814,7 +954,8 @@ def _len_array(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, a: str): return sdfg.arrays[a].shape[0] if a in sdfg.constants_prop: return len(sdfg.constants[a]) - raise TypeError(f'`len` is not supported for input "{a}" (type {type(a)})') + else: + return len(a) @oprepo.replaces('transpose') @@ -1632,8 +1773,17 @@ def _result_type(arguments: Sequence[Union[str, Number, symbolic.symbol, sp.Basi else: # Operators with 3 or more arguments result_type = _np_result_type(dtypes_for_result) + coarse_result_type = None + if result_type in complex_types: + coarse_result_type = 3 # complex + elif result_type in float_types: + coarse_result_type = 2 # float + elif result_type in signed_types: + coarse_result_type = 1 # signed integer, bool + else: + coarse_result_type = 0 # unsigned integer for i, t in enumerate(coarse_types): - if t != result_type: + if t != coarse_result_type: casting[i] = _cast_str(result_type) return result_type, casting @@ -2512,6 +2662,13 @@ def _matmult(visitor: ProgramVisitor, sdfg: SDFG, state: SDFGState, op1: str, op code="__out = log1p(__in1)", reduce=None, initial=np.log1p.identity), + clip=dict(name="_numpy_clip_", + operator=None, + inputs=["__in_a", "__in_amin", "__in_amax"], + outputs=["__out"], + code="__out = min(max(__in_a, __in_amin), __in_amax)", + reduce=None, + initial=np.inf), sqrt=dict(name="_numpy_sqrt_", operator="Sqrt", inputs=["__in1"], @@ -4087,14 +4244,13 @@ def implement_ufunc_outer(visitor: ProgramVisitor, ast_node: ast.Call, sdfg: SDF @oprepo.replaces('numpy.reshape') -def reshape( - pv: ProgramVisitor, - sdfg: SDFG, - state: SDFGState, - arr: str, - newshape: Union[str, symbolic.SymbolicType, Tuple[Union[str, symbolic.SymbolicType]]], - order: StringLiteral = StringLiteral('C') -) -> str: +def reshape(pv: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + arr: str, + newshape: Union[str, symbolic.SymbolicType, Tuple[Union[str, symbolic.SymbolicType]]], + order: StringLiteral = StringLiteral('C'), + strides: Optional[Any] = None) -> str: if isinstance(arr, (list, tuple)) and len(arr) == 1: arr = arr[0] desc = sdfg.arrays[arr] @@ -4108,10 +4264,11 @@ def reshape( # New shape and strides as symbolic expressions newshape = [symbolic.pystr_to_symbolic(s) for s in newshape] - if fortran_strides: - strides = [data._prod(newshape[:i]) for i in range(len(newshape))] - else: - strides = [data._prod(newshape[i + 1:]) for i in range(len(newshape))] + if strides is None: + if fortran_strides: + strides = [data._prod(newshape[:i]) for i in range(len(newshape))] + else: + strides = [data._prod(newshape[i + 1:]) for i in range(len(newshape))] newarr, newdesc = sdfg.add_view(arr, newshape, @@ -4326,9 +4483,13 @@ def _ndarray_reshape( sdfg: SDFG, state: SDFGState, arr: str, - newshape: Union[str, symbolic.SymbolicType, Tuple[Union[str, symbolic.SymbolicType]]], + *newshape: Union[str, symbolic.SymbolicType, Tuple[Union[str, symbolic.SymbolicType]]], order: StringLiteral = StringLiteral('C') ) -> str: + if len(newshape) == 0: + raise TypeError('reshape() takes at least 1 argument (0 given)') + if len(newshape) == 1 and isinstance(newshape, (list, tuple)): + newshape = newshape[0] return reshape(pv, sdfg, state, arr, newshape, order) @@ -4833,3 +4994,407 @@ def _op(visitor: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, op1: StringLite for op, method in _boolop_to_method.items(): _makeboolop(op, method) + + +@oprepo.replaces('numpy.concatenate') +def _concat(visitor: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + arrays: Tuple[Any], + axis: Optional[int] = 0, + out: Optional[Any] = None, + *, + dtype=None, + casting: str = 'same_kind'): + if dtype is not None and out is not None: + raise ValueError('Arguments dtype and out cannot be given together') + if casting != 'same_kind': + raise NotImplementedError('The casting argument is currently unsupported') + if not isinstance(arrays, (tuple, list)): + raise ValueError('List of arrays is not iterable, cannot compile concatenation') + if axis is not None and not isinstance(axis, Integral): + raise ValueError('Axis is not a compile-time evaluatable integer, cannot compile concatenation') + if len(arrays) == 1: + return arrays[0] + for i in range(len(arrays)): + if arrays[i] not in sdfg.arrays: + raise TypeError(f'Index {i} is not an array') + if out is not None: + if out not in sdfg.arrays: + raise TypeError('Output is not an array') + dtype = sdfg.arrays[out].dtype + + descs = [sdfg.arrays[arr] for arr in arrays] + shape = list(descs[0].shape) + + if axis is None: # Flatten arrays, then concatenate + arrays = [flat(visitor, sdfg, state, arr) for arr in arrays] + descs = [sdfg.arrays[arr] for arr in arrays] + shape = list(descs[0].shape) + axis = 0 + else: + # Check shapes for validity + first_shape = copy.copy(shape) + first_shape[axis] = 0 + for i, d in enumerate(descs[1:]): + other_shape = list(d.shape) + other_shape[axis] = 0 + if other_shape != first_shape: + raise ValueError(f'Array shapes do not match at index {i}') + + shape[axis] = sum(desc.shape[axis] for desc in descs) + if out is None: + if dtype is None: + dtype = descs[0].dtype + name, odesc = sdfg.add_temp_transient(shape, dtype, storage=descs[0].storage, lifetime=descs[0].lifetime) + else: + name = out + odesc = sdfg.arrays[out] + + # Make copies + w = state.add_write(name) + offset = 0 + subset = subsets.Range.from_array(odesc) + for arr, desc in zip(arrays, descs): + r = state.add_read(arr) + subset = copy.deepcopy(subset) + subset[axis] = (offset, offset + desc.shape[axis] - 1, 1) + state.add_edge(r, None, w, None, Memlet(data=name, subset=subset)) + offset += desc.shape[axis] + + return name + + +@oprepo.replaces('numpy.stack') +def _stack(visitor: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + arrays: Tuple[Any], + axis: int = 0, + out: Any = None, + *, + dtype=None, + casting: str = 'same_kind'): + if dtype is not None and out is not None: + raise ValueError('Arguments dtype and out cannot be given together') + if casting != 'same_kind': + raise NotImplementedError('The casting argument is currently unsupported') + if not isinstance(arrays, (tuple, list)): + raise ValueError('List of arrays is not iterable, cannot compile stack call') + if not isinstance(axis, Integral): + raise ValueError('Axis is not a compile-time evaluatable integer, cannot compile stack call') + + for i in range(len(arrays)): + if arrays[i] not in sdfg.arrays: + raise TypeError(f'Index {i} is not an array') + + descs = [sdfg.arrays[a] for a in arrays] + shape = descs[0].shape + for i, d in enumerate(descs[1:]): + if d.shape != shape: + raise ValueError(f'Array shapes are not equal ({shape} != {d.shape} at index {i})') + + if axis > len(shape): + raise ValueError(f'axis {axis} is out of bounds for array of dimension {len(shape)}') + if axis < 0: + naxis = len(shape) + 1 + axis + if naxis < 0 or naxis > len(shape): + raise ValueError(f'axis {axis} is out of bounds for array of dimension {len(shape)}') + axis = naxis + + # Stacking is implemented as a reshape followed by concatenation + reshaped = [] + for arr, desc in zip(arrays, descs): + # Make a reshaped view with the inserted dimension + new_shape = [0] * (len(shape) + 1) + new_strides = [0] * (len(shape) + 1) + for i in range(len(shape) + 1): + if i == axis: + new_shape[i] = 1 + new_strides[i] = desc.strides[i - 1] if i != 0 else desc.strides[i] + elif i < axis: + new_shape[i] = shape[i] + new_strides[i] = desc.strides[i] + else: + new_shape[i] = shape[i - 1] + new_strides[i] = desc.strides[i - 1] + + rname = reshape(visitor, sdfg, state, arr, new_shape, strides=new_strides) + reshaped.append(rname) + + return _concat(visitor, sdfg, state, reshaped, axis, out, dtype=dtype, casting=casting) + + +@oprepo.replaces('numpy.vstack') +@oprepo.replaces('numpy.row_stack') +def _vstack(visitor: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + tup: Tuple[Any], + *, + dtype=None, + casting: str = 'same_kind'): + if not isinstance(tup, (tuple, list)): + raise ValueError('List of arrays is not iterable, cannot compile stack call') + if tup[0] not in sdfg.arrays: + raise TypeError(f'Index 0 is not an array') + + # In the 1-D case, stacking is performed along the first axis + if len(sdfg.arrays[tup[0]].shape) == 1: + return _stack(visitor, sdfg, state, tup, axis=0, out=None, dtype=dtype, casting=casting) + # Otherwise, concatenation is performed + return _concat(visitor, sdfg, state, tup, axis=0, out=None, dtype=dtype, casting=casting) + + +@oprepo.replaces('numpy.hstack') +@oprepo.replaces('numpy.column_stack') +def _hstack(visitor: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + tup: Tuple[Any], + *, + dtype=None, + casting: str = 'same_kind'): + if not isinstance(tup, (tuple, list)): + raise ValueError('List of arrays is not iterable, cannot compile stack call') + if tup[0] not in sdfg.arrays: + raise TypeError(f'Index 0 is not an array') + + # In the 1-D case, concatenation is performed along the first axis + if len(sdfg.arrays[tup[0]].shape) == 1: + return _concat(visitor, sdfg, state, tup, axis=0, out=None, dtype=dtype, casting=casting) + + return _concat(visitor, sdfg, state, tup, axis=1, out=None, dtype=dtype, casting=casting) + + +@oprepo.replaces('numpy.dstack') +def _dstack(visitor: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + tup: Tuple[Any], + *, + dtype=None, + casting: str = 'same_kind'): + if not isinstance(tup, (tuple, list)): + raise ValueError('List of arrays is not iterable, cannot compile a stack call') + if tup[0] not in sdfg.arrays: + raise TypeError(f'Index 0 is not an array') + if len(sdfg.arrays[tup[0]].shape) < 3: + raise NotImplementedError('dstack is not implemented for arrays that are smaller than 3D') + + return _concat(visitor, sdfg, state, tup, axis=2, out=None, dtype=dtype, casting=casting) + + +def _split_core(visitor: ProgramVisitor, sdfg: SDFG, state: SDFGState, ary: str, + indices_or_sections: Union[int, Sequence[symbolic.SymbolicType], str], axis: int, allow_uneven: bool): + # Argument checks + if not isinstance(ary, str) or ary not in sdfg.arrays: + raise TypeError('Split object must be an array') + if not isinstance(axis, Integral): + raise ValueError('Cannot determine split dimension, axis is not a compile-time evaluatable integer') + + desc = sdfg.arrays[ary] + + # Test validity of axis + orig_axis = axis + if axis < 0: + axis = len(desc.shape) + axis + if axis < 0 or axis >= len(desc.shape): + raise ValueError(f'axis {orig_axis} is out of bounds for array of dimension {len(desc.shape)}') + + # indices_or_sections may only be an integer (not symbolic), list of integers, list of symbols, or an array + if isinstance(indices_or_sections, str): + raise ValueError('Array-indexed split cannot be compiled due to data-dependent sizes. ' + 'Consider using numpy.reshape instead.') + elif isinstance(indices_or_sections, (list, tuple)): + if any(isinstance(i, str) for i in indices_or_sections): + raise ValueError('Array-indexed split cannot be compiled due to data-dependent sizes. ' + 'Use symbolic values as an argument instead.') + # Sequence is given + sections = indices_or_sections + elif isinstance(indices_or_sections, Integral): # Constant integer given + if indices_or_sections <= 0: + raise ValueError('Number of sections must be larger than zero.') + + # If uneven sizes are not allowed and ary shape is numeric, check evenness + if not allow_uneven and not symbolic.issymbolic(desc.shape[axis]): + if desc.shape[axis] % indices_or_sections != 0: + raise ValueError('Array split does not result in an equal division. Consider using numpy.array_split ' + 'instead.') + if indices_or_sections > desc.shape[axis]: + raise ValueError('Cannot compile array split as it will result in empty arrays.') + + # Sequence is not given, compute sections + # Mimic behavior of array_split in numpy: Sections are [s+1 x N%s], s, ..., s + size = desc.shape[axis] // indices_or_sections + remainder = desc.shape[axis] % indices_or_sections + sections = [] + offset = 0 + for _ in range(min(remainder, indices_or_sections)): + offset += size + 1 + sections.append(offset) + for _ in range(remainder, indices_or_sections - 1): + offset += size + sections.append(offset) + + elif symbolic.issymbolic(indices_or_sections): + raise ValueError('Symbolic split cannot be compiled due to output tuple size being unknown. ' + 'Consider using numpy.reshape instead.') + else: + raise TypeError(f'Unsupported type {type(indices_or_sections)} for indices_or_sections in numpy.split') + + # Split according to sections + r = state.add_read(ary) + result = [] + offset = 0 + for section in sections: + shape = list(desc.shape) + shape[axis] = section - offset + name, _ = sdfg.add_temp_transient(shape, desc.dtype, storage=desc.storage, lifetime=desc.lifetime) + # Add copy + w = state.add_write(name) + subset = subsets.Range.from_array(desc) + subset[axis] = (offset, offset + shape[axis] - 1, 1) + offset += shape[axis] + state.add_nedge(r, w, Memlet(data=ary, subset=subset)) + result.append(name) + + # Add final section + shape = list(desc.shape) + shape[axis] -= offset + name, _ = sdfg.add_temp_transient(shape, desc.dtype, storage=desc.storage, lifetime=desc.lifetime) + w = state.add_write(name) + subset = subsets.Range.from_array(desc) + subset[axis] = (offset, offset + shape[axis] - 1, 1) + state.add_nedge(r, w, Memlet(data=ary, subset=subset)) + result.append(name) + + # Always return a list of results, even if the size is 1 + return result + + +@oprepo.replaces('numpy.split') +def _split(visitor: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + ary: str, + indices_or_sections: Union[symbolic.SymbolicType, List[symbolic.SymbolicType], str], + axis: int = 0): + return _split_core(visitor, sdfg, state, ary, indices_or_sections, axis, allow_uneven=False) + + +@oprepo.replaces('numpy.array_split') +def _array_split(visitor: ProgramVisitor, + sdfg: SDFG, + state: SDFGState, + ary: str, + indices_or_sections: Union[symbolic.SymbolicType, List[symbolic.SymbolicType], str], + axis: int = 0): + return _split_core(visitor, sdfg, state, ary, indices_or_sections, axis, allow_uneven=True) + + +@oprepo.replaces('numpy.dsplit') +def _dsplit(visitor: ProgramVisitor, sdfg: SDFG, state: SDFGState, ary: str, + indices_or_sections: Union[symbolic.SymbolicType, List[symbolic.SymbolicType], str]): + if isinstance(ary, str) and ary in sdfg.arrays: + if len(sdfg.arrays[ary].shape) < 3: + raise ValueError('Array dimensionality must be 3 or above for dsplit') + return _split_core(visitor, sdfg, state, ary, indices_or_sections, axis=2, allow_uneven=False) + + +@oprepo.replaces('numpy.hsplit') +def _hsplit(visitor: ProgramVisitor, sdfg: SDFG, state: SDFGState, ary: str, + indices_or_sections: Union[symbolic.SymbolicType, List[symbolic.SymbolicType], str]): + if isinstance(ary, str) and ary in sdfg.arrays: + # In case of a 1D array, split with axis=0 + if len(sdfg.arrays[ary].shape) <= 1: + return _split_core(visitor, sdfg, state, ary, indices_or_sections, axis=0, allow_uneven=False) + return _split_core(visitor, sdfg, state, ary, indices_or_sections, axis=1, allow_uneven=False) + + +@oprepo.replaces('numpy.vsplit') +def _vsplit(visitor: ProgramVisitor, sdfg: SDFG, state: SDFGState, ary: str, + indices_or_sections: Union[symbolic.SymbolicType, List[symbolic.SymbolicType], str]): + return _split_core(visitor, sdfg, state, ary, indices_or_sections, axis=0, allow_uneven=False) + + +############################################################################################################ +# Fast Fourier Transform numpy package (numpy.fft) + +def _real_to_complex(real_type: dace.typeclass): + if real_type == dace.float32: + return dace.complex64 + elif real_type == dace.float64: + return dace.complex128 + else: + return real_type + + +def _fft_core(pv: 'ProgramVisitor', + sdfg: SDFG, + state: SDFGState, + a: str, + n: Optional[dace.symbolic.SymbolicType] = None, + axis=-1, + norm: StringLiteral = StringLiteral('backward'), + is_inverse: bool = False): + from dace.libraries.fft.nodes import FFT, IFFT # Avoid import loops + if axis != 0 and axis != -1: + raise NotImplementedError('Only one dimensional arrays are supported at the moment') + if not isinstance(a, str) or a not in sdfg.arrays: + raise ValueError('Input must be a valid array') + + libnode = FFT('fft') if not is_inverse else IFFT('ifft') + + desc = sdfg.arrays[a] + N = desc.shape[axis] + + # If n is not None, either pad input or slice and add a view + if n is not None: + raise NotImplementedError + + # Compute factor + if norm == 'forward': + factor = (1 / N) if not is_inverse else 1 + elif norm == 'backward': + factor = 1 if not is_inverse else (1 / N) + elif norm == 'ortho': + factor = sp.sqrt(1 / N) + else: + raise ValueError('norm argument can only be one of "forward", "backward", or "ortho".') + libnode.factor = factor + + # Compute output type from input type + if is_inverse and desc.dtype not in (dace.complex64, dace.complex128): + raise TypeError(f'Inverse FFT only accepts complex inputs, got {desc.dtype}') + dtype = _real_to_complex(desc.dtype) + + name, odesc = sdfg.add_temp_transient_like(desc, dtype) + r = state.add_read(a) + w = state.add_write(name) + state.add_edge(r, None, libnode, '_inp', Memlet.from_array(a, desc)) + state.add_edge(libnode, '_out', w, None, Memlet.from_array(name, odesc)) + + return name + + +@oprepo.replaces('numpy.fft.fft') +def _fft(pv: 'ProgramVisitor', + sdfg: SDFG, + state: SDFGState, + a: str, + n: Optional[dace.symbolic.SymbolicType] = None, + axis=-1, + norm: StringLiteral = StringLiteral('backward')): + return _fft_core(pv, sdfg, state, a, n, axis, norm, False) + + +@oprepo.replaces('numpy.fft.ifft') +def _ifft(pv: 'ProgramVisitor', + sdfg: SDFG, + state: SDFGState, + a, + n=None, + axis=-1, + norm: StringLiteral = StringLiteral('backward')): + return _fft_core(pv, sdfg, state, a, n, axis, norm, True) diff --git a/dace/libraries/blas/nodes/gemv.py b/dace/libraries/blas/nodes/gemv.py index baf6fb415d..52091c6864 100644 --- a/dace/libraries/blas/nodes/gemv.py +++ b/dace/libraries/blas/nodes/gemv.py @@ -730,6 +730,9 @@ def expansion(node: 'Gemv', state, sdfg, m=None, n=None, **kwargs): dtype_a = outer_array_a.dtype.type dtype = outer_array_x.dtype.base_type veclen = outer_array_x.dtype.veclen + alpha = f'{dtype.ctype}({node.alpha})' + beta = f'{dtype.ctype}({node.beta})' + m = m or node.m n = n or node.n if m is None: @@ -765,8 +768,17 @@ def expansion(node: 'Gemv', state, sdfg, m=None, n=None, **kwargs): func = func.lower() + 'gemv' - code = f"""cblas_{func}({layout}, {trans}, {m}, {n}, {node.alpha}, _A, {lda}, - _x, {strides_x[0]}, {node.beta}, _y, {strides_y[0]});""" + code = '' + if dtype in (dace.complex64, dace.complex128): + code = f''' + {dtype.ctype} __alpha = {alpha}; + {dtype.ctype} __beta = {beta}; + ''' + alpha = '&__alpha' + beta = '&__beta' + + code += f"""cblas_{func}({layout}, {trans}, {m}, {n}, {alpha}, _A, {lda}, + _x, {strides_x[0]}, {beta}, _y, {strides_y[0]});""" tasklet = dace.sdfg.nodes.Tasklet(node.name, node.in_connectors, diff --git a/dace/libraries/fft/__init__.py b/dace/libraries/fft/__init__.py new file mode 100644 index 0000000000..71fb014f32 --- /dev/null +++ b/dace/libraries/fft/__init__.py @@ -0,0 +1,6 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +from dace.library import register_library +from .nodes import * +from .environments import * + +register_library(__name__, "fft") diff --git a/dace/libraries/fft/algorithms/__init__.py b/dace/libraries/fft/algorithms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dace/libraries/fft/algorithms/dft.py b/dace/libraries/fft/algorithms/dft.py new file mode 100644 index 0000000000..340dfed22d --- /dev/null +++ b/dace/libraries/fft/algorithms/dft.py @@ -0,0 +1,45 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +""" +One-dimensional Discrete Fourier Transform (DFT) native implementations. +""" +import dace +import numpy as np +import math + + +# Native, naive version of the Discrete Fourier Transform +@dace.program +def dft(_inp, _out, N: dace.compiletime, factor: dace.compiletime): + i = np.arange(N) + e = np.exp(-2j * np.pi * i * i[:, None] / N) + _out[:] = factor * (e @ _inp.astype(dace.complex128)) + + +@dace.program +def idft(_inp, _out, N: dace.compiletime, factor: dace.compiletime): + i = np.arange(N) + e = np.exp(2j * np.pi * i * i[:, None] / N) + _out[:] = factor * (e @ _inp.astype(dace.complex128)) + + +# Single-map version of DFT, useful for integrating small Fourier transforms into other operations +@dace.program +def dft_explicit(_inp, _out, N: dace.compiletime, factor: dace.compiletime): + _out[:] = 0 + for i, n in dace.map[0:N, 0:N]: + with dace.tasklet: + inp << _inp[n] + exponent = 2 * math.pi * i * n / N + b = decltype(b)(math.cos(exponent), -math.sin(exponent)) * inp * factor + b >> _out(1, lambda a, b: a + b)[i] + + +@dace.program +def idft_explicit(_inp, _out, N: dace.compiletime, factor: dace.compiletime): + _out[:] = 0 + for i, n in dace.map[0:N, 0:N]: + with dace.tasklet: + inp << _inp[n] + exponent = 2 * math.pi * i * n / N + b = decltype(b)(math.cos(exponent), math.sin(exponent)) * inp * factor + b >> _out(1, lambda a, b: a + b)[i] diff --git a/dace/libraries/fft/environments/__init__.py b/dace/libraries/fft/environments/__init__.py new file mode 100644 index 0000000000..0900214e68 --- /dev/null +++ b/dace/libraries/fft/environments/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +from .cufft import * diff --git a/dace/libraries/fft/environments/cufft.py b/dace/libraries/fft/environments/cufft.py new file mode 100644 index 0000000000..dd243d376a --- /dev/null +++ b/dace/libraries/fft/environments/cufft.py @@ -0,0 +1,21 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +import dace.library + + +@dace.library.environment +class cuFFT: + + cmake_minimum_version = None + cmake_packages = ["CUDA"] + cmake_variables = {} + cmake_includes = [] + cmake_libraries = ["cufft"] + cmake_compile_flags = [] + cmake_link_flags = [] + cmake_files = [] + + headers = {'frame': ["cufft.h", "cufftXt.h"], 'cuda': ["cufft.h", "cufftXt.h"]} + state_fields = [] + init_code = "" + finalize_code = "" + dependencies = [] diff --git a/dace/libraries/fft/nodes/__init__.py b/dace/libraries/fft/nodes/__init__.py new file mode 100644 index 0000000000..dd8f132aa4 --- /dev/null +++ b/dace/libraries/fft/nodes/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +from .fft import FFT, IFFT diff --git a/dace/libraries/fft/nodes/fft.py b/dace/libraries/fft/nodes/fft.py new file mode 100644 index 0000000000..bc85f8785b --- /dev/null +++ b/dace/libraries/fft/nodes/fft.py @@ -0,0 +1,204 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +""" +Implements Forward and Inverse Fast Fourier Transform (FFT) library nodes +""" +import warnings + +from dace import data, dtypes, SDFG, SDFGState, symbolic, library, nodes, properties +from dace import transformation as xf +from dace.libraries.fft import environments as env + + +# Define the library nodes +@library.node +class FFT(nodes.LibraryNode): + implementations = {} + default_implementation = 'pure' + + factor = properties.SymbolicProperty(desc='Coefficient to multiply outputs. Used for normalization', default=1.0) + + def __init__(self, name, *args, schedule=None, **kwargs): + super().__init__(name, *args, schedule=schedule, inputs={'_inp'}, outputs={'_out'}, **kwargs) + + +@library.node +class IFFT(nodes.LibraryNode): + implementations = {} + default_implementation = 'pure' + + factor = properties.SymbolicProperty(desc='Coefficient to multiply outputs. Used for normalization', default=1.0) + + def __init__(self, name, *args, schedule=None, **kwargs): + super().__init__(name, *args, schedule=schedule, inputs={'_inp'}, outputs={'_out'}, **kwargs) + + +################################################################################################## +# Native SDFG expansions +################################################################################################## + + +@library.register_expansion(FFT, 'pure') +class DFTExpansion(xf.ExpandTransformation): + environments = [] + + @staticmethod + def expansion(node: FFT, parent_state: SDFGState, parent_sdfg: SDFG) -> SDFG: + from dace.libraries.fft.algorithms import dft # Lazy import functions + input, output = _get_input_and_output(parent_state, node) + indesc = parent_sdfg.arrays[input] + outdesc = parent_sdfg.arrays[output] + if len(indesc.shape) != 1: + raise NotImplementedError('Native SDFG expansion for FFT does not yet support N-dimensional inputs') + + warnings.warn('Performance Warning: No assumptions on FFT input size, falling back to DFT') + return dft.dft_explicit.to_sdfg(indesc, outdesc, N=indesc.shape[0], factor=node.factor) + + +@library.register_expansion(IFFT, 'pure') +class IDFTExpansion(xf.ExpandTransformation): + environments = [] + + @staticmethod + def expansion(node: IFFT, parent_state: SDFGState, parent_sdfg: SDFG) -> SDFG: + from dace.libraries.fft.algorithms import dft # Lazy import functions + input, output = _get_input_and_output(parent_state, node) + indesc = parent_sdfg.arrays[input] + outdesc = parent_sdfg.arrays[output] + if len(indesc.shape) != 1: + raise NotImplementedError('Native SDFG expansion for IFFT does not yet support N-dimensional inputs') + + warnings.warn('Performance Warning: No assumptions on IFFT input size, falling back to DFT') + return dft.idft_explicit.to_sdfg(indesc, outdesc, N=indesc.shape[0], factor=node.factor) + + +################################################################################################## +# cuFFT expansions +################################################################################################## + + +@library.register_expansion(FFT, 'cuFFT') +class cuFFTFFTExpansion(xf.ExpandTransformation): + environments = [env.cuFFT] + plan_uid = 0 + + @staticmethod + def expansion(node: FFT, parent_state: SDFGState, parent_sdfg: SDFG) -> SDFG: + input, output = _get_input_and_output(parent_state, node) + indesc = parent_sdfg.arrays[input] + outdesc = parent_sdfg.arrays[output] + if str(node.factor) != '1': + raise NotImplementedError('Multiplicative post-FFT factors are not yet implemented') + return _generate_cufft_code(indesc, outdesc, parent_sdfg, False) + + +@library.register_expansion(IFFT, 'cuFFT') +class cuFFTIFFTExpansion(xf.ExpandTransformation): + environments = [env.cuFFT] + plan_uid = 0 + + @staticmethod + def expansion(node: IFFT, parent_state: SDFGState, parent_sdfg: SDFG) -> SDFG: + input, output = _get_input_and_output(parent_state, node) + indesc = parent_sdfg.arrays[input] + outdesc = parent_sdfg.arrays[output] + if str(node.factor) != '1': + raise NotImplementedError('Multiplicative post-FFT factors are not yet implemented') + return _generate_cufft_code(indesc, outdesc, parent_sdfg, True) + + +def _generate_cufft_code(indesc: data.Data, outdesc: data.Data, sdfg: SDFG, is_inverse: bool): + from dace.codegen.targets import cpp # Avoid import loops + if len(indesc.shape) not in (1, 2, 3): + raise ValueError('cuFFT only supports 1/2/3-dimensional FFT') + if indesc.storage != dtypes.StorageType.GPU_Global: + raise ValueError('cuFFT implementation requires input array to be on GPU') + if outdesc.storage != dtypes.StorageType.GPU_Global: + raise ValueError('cuFFT implementation requires output array to be on GPU') + + cufft_type = _types_to_cufft(indesc.dtype, outdesc.dtype) + init_code = '' + exit_code = '' + callsite_code = '' + + # Make a unique name for this plan + if not is_inverse: + plan_name = f'fwdplan{cuFFTFFTExpansion.plan_uid}' + cuFFTFFTExpansion.plan_uid += 1 + direction = 'CUFFT_FORWARD' + tasklet_prefix = '' + else: + plan_name = f'invplan{cuFFTIFFTExpansion.plan_uid}' + cuFFTIFFTExpansion.plan_uid += 1 + direction = 'CUFFT_INVERSE' + tasklet_prefix = 'i' + + fields = [ + f'cufftHandle {plan_name};', + ] + plan_name = f'__state->{plan_name}' + + init_code += f''' + cufftCreate(&{plan_name}); + ''' + exit_code += f''' + cufftDestroy({plan_name}); + ''' + + cdims = ', '.join([cpp.sym2cpp(s) for s in indesc.shape]) + make_plan = f''' + {{ + size_t __work_size = 0; + cufftMakePlan{len(indesc.shape)}d({plan_name}, {cdims}, {cufft_type}, /*batch=*/1, &__work_size); + }} + ''' + + # Make plan in init if not symbolic or not data-dependent, otherwise make at callsite. + symbols_that_change = set(s for ise in sdfg.edges() for s in ise.data.assignments.keys()) + symbols_that_change &= set(map(str, sdfg.symbols.keys())) + + def _fsyms(x): + if symbolic.issymbolic(x): + return set(map(str, x.free_symbols)) + return set() + + if symbols_that_change and any(_fsyms(s) & symbols_that_change for s in indesc.shape): + callsite_code += make_plan + else: + init_code += make_plan + + # Execute plan + callsite_code += f''' + cufftSetStream({plan_name}, __dace_current_stream); + cufftXtExec({plan_name}, _inp, _out, {direction}); + ''' + + return nodes.Tasklet(f'cufft_{tasklet_prefix}fft', {'_inp'}, {'_out'}, + callsite_code, + language=dtypes.Language.CPP, + state_fields=fields, + code_init=init_code, + code_exit=exit_code) + + +################################################################################################## +# Helper functions +################################################################################################## + + +def _get_input_and_output(state: SDFGState, node: nodes.LibraryNode): + """ + Helper function that returns the input and output arrays of the library node + """ + in_edge = next(e for e in state.in_edges(node) if e.dst_conn) + out_edge = next(e for e in state.out_edges(node) if e.src_conn) + return in_edge.data.data, out_edge.data.data + + +def _types_to_cufft(indtype: dtypes.typeclass, outdtype: dtypes.typeclass): + typedict = { + dtypes.float32: 'R', + dtypes.float64: 'D', + dtypes.complex64: 'C', + dtypes.complex128: 'Z', + } + return f'CUFFT_{typedict[indtype]}2{typedict[outdtype]}' diff --git a/dace/libraries/standard/nodes/transpose.py b/dace/libraries/standard/nodes/transpose.py index 58c6cfc33e..e2795ef951 100644 --- a/dace/libraries/standard/nodes/transpose.py +++ b/dace/libraries/standard/nodes/transpose.py @@ -100,6 +100,12 @@ class ExpandTransposeMKL(ExpandTransformation): @staticmethod def expansion(node, state, sdfg): node.validate(sdfg, state) + + # Fall back to native implementation if input and output types are not the same + if (sdfg.arrays[list(state.in_edges_by_connector(node, '_inp'))[0].data.data].dtype != sdfg.arrays[list( + state.out_edges_by_connector(node, '_out'))[0].data.data].dtype): + return ExpandTransposePure.make_sdfg(node, state, sdfg) + dtype = node.dtype if dtype == dace.float32: func = "somatcopy" @@ -141,22 +147,30 @@ class ExpandTransposeOpenBLAS(ExpandTransformation): @staticmethod def expansion(node, state, sdfg): node.validate(sdfg, state) + + # Fall back to native implementation if input and output types are not the same + if (sdfg.arrays[list(state.in_edges_by_connector(node, '_inp'))[0].data.data].dtype != sdfg.arrays[list( + state.out_edges_by_connector(node, '_out'))[0].data.data].dtype): + return ExpandTransposePure.make_sdfg(node, state, sdfg) + dtype = node.dtype cast = "" if dtype == dace.float32: func = "somatcopy" alpha = "1.0f" + cast = '' elif dtype == dace.float64: func = "domatcopy" alpha = "1.0" + cast = '' elif dtype == dace.complex64: func = "comatcopy" - cast = "(float*)" - alpha = f"{cast}dace::blas::BlasConstants::Get().Complex64Pone()" + alpha = "dace::blas::BlasConstants::Get().Complex64Pone()" + cast = '(float*)' elif dtype == dace.complex128: func = "zomatcopy" - cast = "(double*)" - alpha = f"{cast}dace::blas::BlasConstants::Get().Complex128Pone()" + alpha = "dace::blas::BlasConstants::Get().Complex128Pone()" + cast = '(double*)' else: raise ValueError("Unsupported type for OpenBLAS omatcopy extension: " + str(dtype)) # TODO: Add stride support @@ -164,8 +178,8 @@ def expansion(node, state, sdfg): # Adaptations for BLAS API order = 'CblasRowMajor' trans = 'CblasTrans' - code = ("cblas_{f}({o}, {t}, {m}, {n}, {a}, {c}_inp, " - "{n}, {c}_out, {m});").format(f=func, o=order, t=trans, m=m, n=n, a=alpha, c=cast) + code = ("cblas_{f}({o}, {t}, {m}, {n}, {cast}{a}, {cast}_inp, " + "{n}, {cast}_out, {m});").format(f=func, o=order, t=trans, m=m, n=n, a=alpha, cast=cast) tasklet = dace.sdfg.nodes.Tasklet(node.name, node.in_connectors, node.out_connectors, @@ -184,6 +198,11 @@ def expansion(node, state, sdfg, **kwargs): node.validate(sdfg, state) dtype = node.dtype + # Fall back to native implementation if input and output types are not the same + if (sdfg.arrays[list(state.in_edges_by_connector(node, '_inp'))[0].data.data].dtype != sdfg.arrays[list( + state.out_edges_by_connector(node, '_out'))[0].data.data].dtype): + return ExpandTransposePure.make_sdfg(node, state, sdfg) + try: func, cdtype, factort = blas_helpers.cublas_type_metadata(dtype) except TypeError as ex: diff --git a/tests/library/fft_test.py b/tests/library/fft_test.py new file mode 100644 index 0000000000..440d0a46cf --- /dev/null +++ b/tests/library/fft_test.py @@ -0,0 +1,101 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +import pytest +import numpy as np + +import dace + + +@pytest.mark.parametrize('symbolic', (False, True)) +def test_fft(symbolic): + if symbolic: + N = dace.symbol('N') + else: + N = 21 + + @dace.program + def tester(x: dace.complex128[N]): + return np.fft.fft(x) + + a = np.random.rand(21) + 1j * np.random.rand(21) + b = tester(a) + assert np.allclose(b, np.fft.fft(a)) + + +def test_fft_r2c(): + """ + Tests implicit conversion to complex types + """ + + @dace.program + def tester(x: dace.float32[20]): + return np.fft.fft(x) + + a = np.random.rand(20).astype(np.float32) + b = tester(a) + assert b.dtype == np.complex64 + assert np.allclose(b, np.fft.fft(a)) + + +@pytest.mark.parametrize('norm', ('backward', 'forward', 'ortho')) +def test_ifft(norm): + + @dace.program + def tester(x: dace.complex128[21]): + return np.fft.ifft(x, norm=norm) + + a = np.random.rand(21) + 1j * np.random.rand(21) + b = tester(a) + assert np.allclose(b, np.fft.ifft(a, norm=norm)) + + +@pytest.mark.gpu +def test_cufft(): + import dace.libraries.fft as fftlib + + @dace.program + def tester(x: dace.complex128[210]): + return np.fft.fft(x) + + sdfg = tester.to_sdfg() + sdfg.apply_gpu_transformations() + fftlib.FFT.default_implementation = 'cuFFT' + sdfg.expand_library_nodes() + fftlib.FFT.default_implementation = 'pure' + + a = np.random.rand(210) + 1j * np.random.rand(210) + b = sdfg(a) + assert np.allclose(b, np.fft.fft(a)) + + +@pytest.mark.gpu +def test_cufft_twoplans(): + import dace.libraries.fft as fftlib + + @dace.program + def tester(x: dace.complex128[210], y: dace.complex64[19]): + return np.fft.fft(x), np.fft.ifft(y, norm='forward') + + sdfg = tester.to_sdfg() + sdfg.apply_gpu_transformations() + fftlib.FFT.default_implementation = 'cuFFT' + fftlib.IFFT.default_implementation = 'cuFFT' + sdfg.expand_library_nodes() + fftlib.FFT.default_implementation = 'pure' + fftlib.IFFT.default_implementation = 'pure' + + a = np.random.rand(210) + 1j * np.random.rand(210) + b = (np.random.rand(19) + 1j * np.random.rand(19)).astype(np.complex64) + c, d = sdfg(a, b) + assert np.allclose(c, np.fft.fft(a)) + assert np.allclose(d, np.fft.ifft(b, norm='forward')) + + +if __name__ == '__main__': + test_fft(False) + test_fft(True) + test_fft_r2c() + test_ifft('backward') + test_ifft('forward') + test_ifft('ortho') + test_cufft() + test_cufft_twoplans() diff --git a/tests/numpy/array_creation_test.py b/tests/numpy/array_creation_test.py index 7329b48b3f..a1f6d0329f 100644 --- a/tests/numpy/array_creation_test.py +++ b/tests/numpy/array_creation_test.py @@ -152,6 +152,42 @@ def test_arange_6(): return np.arange(2.5, 10, 3) +@compare_numpy_output() +def test_linspace_1(): + return np.linspace(2.5, 10, num=3) + + +@compare_numpy_output() +def test_linspace_2(): + space, step = np.linspace(2.5, 10, num=3, retstep=True) + return space, step + + +@compare_numpy_output() +def test_linspace_3(): + a = np.array([1, 2, 3]) + return np.linspace(a, 5, num=10) + + +@compare_numpy_output() +def test_linspace_4(): + a = np.array([[1, 2, 3], [4, 5, 6]]) + space, step = np.linspace(a, 10, endpoint=False, retstep=True) + return space, step + + +@compare_numpy_output() +def test_linspace_5(): + a = np.array([[1, 2, 3], [4, 5, 6]]) + b = np.array([[5], [10]]) + return np.linspace(a, b, endpoint=False, axis=1) + + +@compare_numpy_output() +def test_linspace_6(): + return np.linspace(-5, 5.5, dtype=np.float32) + + @dace.program def program_strides_0(): A = dace.ndarray((2, 2), dtype=dace.int32, strides=(2, 1)) @@ -267,6 +303,12 @@ def ones_scalar_size(k: dace.int32): test_arange_4() test_arange_5() test_arange_6() + test_linspace_1() + test_linspace_2() + test_linspace_3() + test_linspace_4() + test_linspace_5() + test_linspace_6() test_strides_0() test_strides_1() test_strides_2() diff --git a/tests/numpy/attention_simple_test.py b/tests/numpy/attention_simple_test.py index 49558a154b..2ce0205e3f 100644 --- a/tests/numpy/attention_simple_test.py +++ b/tests/numpy/attention_simple_test.py @@ -11,7 +11,7 @@ def dace_softmax(X_in: dace.float32[N], X_out: dace.float32[N]): tmp_max = dace.reduce(lambda a, b: max(a, b), X_in) - X_out[:] = exp(X_in - tmp_max) + X_out[:] = np.exp(X_in - tmp_max) tmp_sum = dace.reduce(lambda a, b: a + b, X_out, identity=0) X_out[:] /= tmp_sum diff --git a/tests/numpy/attribute_test.py b/tests/numpy/attribute_test.py index 2181883015..e011eafc89 100644 --- a/tests/numpy/attribute_test.py +++ b/tests/numpy/attribute_test.py @@ -54,7 +54,50 @@ def fn(a: dace.float64[N, F_in], b: dace.float64[N, heads, F_out], c: dace.float assert np.allclose(c, c_expected) +def test_nested_attribute(): + + @dace.program + def tester(a: dace.complex128[20, 10]): + return a.T.real + + r = np.random.rand(20, 10) + im = np.random.rand(20, 10) + a = r + 1j * im + res = tester(a) + assert np.allclose(res, r.T) + + +def test_attribute_of_expr(): + """ + Regression reported in Issue #1295. + """ + + @dace.program + def tester(a: dace.float64[20, 20], b: dace.float64[20, 20], c: dace.float64[20, 20]): + c[:, :] = (a @ b).T + + a = np.random.rand(20, 20) + b = np.random.rand(20, 20) + c = np.random.rand(20, 20) + ref = (a @ b).T + tester(a, b, c) + assert np.allclose(c, ref) + + +def test_attribute_function(): + + @dace.program + def tester(): + return np.arange(10).reshape(10, 1) + + a = tester() + assert np.allclose(a, np.arange(10).reshape(10, 1)) + + if __name__ == '__main__': test_attribute_in_ranged_loop() test_attribute_in_ranged_loop_symbolic() test_attribute_new_state() + test_nested_attribute() + test_attribute_of_expr() + test_attribute_function() diff --git a/tests/numpy/concat_test.py b/tests/numpy/concat_test.py new file mode 100644 index 0000000000..614258e34f --- /dev/null +++ b/tests/numpy/concat_test.py @@ -0,0 +1,133 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +import dace +import numpy as np +from common import compare_numpy_output +import pytest + +M = 10 +N = 20 +K = 30 + + +@compare_numpy_output() +def test_concatenate(): + a = np.zeros([N, N], dtype=np.float32) + b = np.ones([N, 1], dtype=np.float32) + return np.concatenate((a, b), axis=-1) + + +@compare_numpy_output() +def test_concatenate_four(): + a = np.zeros([N, N], dtype=np.float32) + b = np.ones([N, 1], dtype=np.float32) + c = np.full([N, M], 2.0, dtype=np.float32) + return np.concatenate((a, b, c, a), axis=-1) + + +@compare_numpy_output() +def test_concatenate_out(): + a = np.zeros([N, N], dtype=np.float32) + b = np.ones([M, N], dtype=np.float32) + c = np.full([N + M, N], -1, dtype=np.float32) + np.concatenate([a, b], out=c) + return c + 1 + + +def test_concatenate_symbolic(): + n = dace.symbol('n') + m = dace.symbol('m') + k = dace.symbol('k') + + @dace.program + def tester(a: dace.float64[k, m], b: dace.float64[k, n]): + return np.concatenate((a, b), axis=1) + + aa = np.random.rand(10, 4) + bb = np.random.rand(10, 5) + cc = tester(aa, bb) + assert tuple(cc.shape) == (10, 9) + assert np.allclose(np.concatenate((aa, bb), axis=1), cc) + + +def test_concatenate_fail(): + with pytest.raises(ValueError): + + @dace.program + def tester(a: dace.float64[K, M], b: dace.float64[N, K]): + return np.concatenate((a, b), axis=1) + + aa = np.random.rand(K, M) + bb = np.random.rand(N, K) + tester(aa, bb) + + +@compare_numpy_output() +def test_concatenate_flatten(): + a = np.zeros([1, 2, 3], dtype=np.float32) + b = np.ones([4, 5, 6], dtype=np.float32) + return np.concatenate([a, b], axis=None) + + +@compare_numpy_output() +def test_stack(): + a = np.zeros([N, M, K], dtype=np.float32) + b = np.ones([N, M, K], dtype=np.float32) + return np.stack((a, b), axis=-1) + + +@compare_numpy_output() +def test_vstack(): + a = np.zeros([N, M], dtype=np.float32) + b = np.ones([N, M], dtype=np.float32) + return np.vstack((a, b)) + + +@compare_numpy_output() +def test_vstack_1d(): + a = np.zeros([N], dtype=np.float32) + b = np.ones([N], dtype=np.float32) + return np.vstack((a, b)) + + +@compare_numpy_output() +def test_hstack(): + a = np.zeros([N, M], dtype=np.float32) + b = np.ones([N, M], dtype=np.float32) + return np.hstack((a, b)) + + +@compare_numpy_output() +def test_hstack_1d(): + a = np.zeros([N], dtype=np.float32) + b = np.ones([N], dtype=np.float32) + return np.hstack((a, b)) + + +@compare_numpy_output() +def test_dstack(): + a = np.zeros([N, M, K], dtype=np.float32) + b = np.ones([N, M, K], dtype=np.float32) + return np.dstack((a, b)) + + +@compare_numpy_output() +def test_dstack_4d(): + a = np.zeros([N, M, K, K], dtype=np.float32) + b = np.ones([N, M, K, K], dtype=np.float32) + return np.dstack((a, b)) + + +if __name__ == "__main__": + test_concatenate() + test_concatenate_four() + test_concatenate_out() + test_concatenate_symbolic() + test_concatenate_fail() + test_concatenate_flatten() + test_stack() + test_vstack() + test_vstack_1d() + test_hstack() + test_hstack_1d() + test_dstack() + test_dstack_4d() diff --git a/tests/numpy/nested_call_subarray_test.py b/tests/numpy/nested_call_subarray_test.py index 6a92b004fa..7501652328 100644 --- a/tests/numpy/nested_call_subarray_test.py +++ b/tests/numpy/nested_call_subarray_test.py @@ -8,7 +8,7 @@ @dace.program def dace_softmax_ncs(X_in: dace.float32[N], X_out: dace.float32[N]): tmp_max = dace.reduce(lambda a, b: a + b, X_in, identity=0) - X_out[:] = exp(X_in - tmp_max) + X_out[:] = np.exp(X_in - tmp_max) tmp_sum = dace.reduce(lambda a, b: max(a, b), X_in) X_out[:] /= tmp_sum @@ -22,7 +22,7 @@ def test_ncs_local_program(): @dace.program def dace_softmax_localprog(X_in: dace.float32[N], X_out: dace.float32[N]): tmp_max = dace.reduce(lambda a, b: a + b, X_in, identity=0) - X_out[:] = exp(X_in - tmp_max) + X_out[:] = np.exp(X_in - tmp_max) tmp_sum = dace.reduce(lambda a, b: max(a, b), X_in) X_out[:] /= tmp_sum diff --git a/tests/numpy/split_test.py b/tests/numpy/split_test.py new file mode 100644 index 0000000000..e4088754e8 --- /dev/null +++ b/tests/numpy/split_test.py @@ -0,0 +1,142 @@ +# Copyright 2019-2024 ETH Zurich and the DaCe authors. All rights reserved. +""" +Tests variants of the numpy split array manipulation. +""" +import dace +import numpy as np +from common import compare_numpy_output +import pytest + +M = 9 +N = 20 +K = 30 + + +@compare_numpy_output() +def test_split(): + arr = np.arange(M) + a, b, c = np.split(arr, 3) + return a + b + c + + +def test_uneven_split_fail(): + with pytest.raises(ValueError): + + @dace.program + def tester(): + arr = np.arange(N) + a, b, c = np.split(arr, 3) + return a + b + c + + tester() + + +def test_symbolic_split_fail(): + with pytest.raises(ValueError): + n = dace.symbol('n') + + @dace.program + def tester(): + arr = np.arange(N) + a, b, c = np.split(arr, n) + return a + b + c + + tester() + + +def test_array_split_fail(): + with pytest.raises(ValueError): + + @dace.program + def tester(): + arr = np.arange(N) + split = np.arange(N) + a, b, c = np.split(arr, split) + return a + b + c + + tester() + + +@compare_numpy_output() +def test_array_split(): + arr = np.arange(N) + a, b, c = np.array_split(arr, 3) + return a, b, c + + +@compare_numpy_output() +def test_array_split_multidim(): + arr = np.ones((N, N)) + a, b, c = np.array_split(arr, 3, axis=1) + return a, b, c + + +@compare_numpy_output() +def test_split_sequence(): + arr = np.arange(N) + a, b = np.split(arr, [3]) + return a, b + + +@compare_numpy_output() +def test_split_sequence_2(): + arr = np.arange(M) + a, b, c = np.split(arr, [3, 6]) + return a + b + c + + +def test_split_sequence_symbolic(): + n = dace.symbol('n') + + @dace.program + def tester(arr: dace.float64[3 * n]): + a, b, c = np.split(arr, [n, n + 2]) + return a, b, c + + nval = K // 3 + a = np.random.rand(K) + ra, rb, rc = tester(a) + assert ra.shape[0] == nval + assert rb.shape[0] == 2 + assert rc.shape[0] == K - nval - 2 + ref = np.split(a, [nval, nval + 2]) + assert len(ref) == 3 + assert np.allclose(ra, ref[0]) + assert np.allclose(rb, ref[1]) + assert np.allclose(rc, ref[2]) + + +@compare_numpy_output() +def test_vsplit(): + arr = np.ones((N, M)) + a, b = np.vsplit(arr, 2) + return a, b + + +@compare_numpy_output() +def test_hsplit(): + arr = np.ones((M, N)) + a, b = np.hsplit(arr, 2) + return a, b + + +@compare_numpy_output() +def test_dsplit_4d(): + arr = np.ones([N, M, K, K], dtype=np.float32) + a, b, c = np.dsplit(arr, 3) + return a, b, c + + +if __name__ == "__main__": + test_split() + test_uneven_split_fail() + test_symbolic_split_fail() + test_array_split_fail() + test_array_split() + test_array_split_multidim() + test_split_sequence() + test_split_sequence_2() + test_split_sequence_symbolic() + test_vsplit() + test_hsplit() + test_dsplit_4d() diff --git a/tests/numpy/ufunc_test.py b/tests/numpy/ufunc_test.py index 06bd4c3189..b769ab1082 100644 --- a/tests/numpy/ufunc_test.py +++ b/tests/numpy/ufunc_test.py @@ -1304,6 +1304,11 @@ def test_ufunc_trunc_u(A: dace.uint32[10]): return np.trunc(A) +@compare_numpy_output() +def test_ufunc_clip(A: dace.float32[10]): + return np.clip(A, 0.2, 0.5) + + if __name__ == "__main__": test_ufunc_add_ff() test_ufunc_subtract_ff() @@ -1542,3 +1547,4 @@ def test_ufunc_trunc_u(A: dace.uint32[10]): test_ufunc_trunc_c() test_ufunc_trunc_f() test_ufunc_trunc_u() + test_ufunc_clip() From 945b5ce4ad26ba9047467d95e75e9aec48792506 Mon Sep 17 00:00:00 2001 From: Philipp Schaad Date: Thu, 31 Oct 2024 12:51:56 +0100 Subject: [PATCH 07/33] Fix jupyter's version of SDFV (#1714) This requires https://github.com/spcl/dace-webclient/pull/179 to be merged before being ready. --- dace/sdfg/sdfg.py | 8 +- dace/viewer/webclient | 2 +- tutorials/explicit.ipynb | 35 ++--- tutorials/getting_started.ipynb | 112 ++++++-------- tutorials/numpy_frontend.ipynb | 40 ++--- tutorials/sdfg_api.ipynb | 82 ++++++----- tutorials/transformations.ipynb | 251 +++++++++++++++----------------- 7 files changed, 255 insertions(+), 275 deletions(-) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index f25a6e24d5..cb8a7d5c2d 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -1464,9 +1464,11 @@ def _repr_html_(self): var sdfg_{uid} = {sdfg}; """.format( # Dumping to a string so that Jupyter Javascript can parse it # recursively diff --git a/dace/viewer/webclient b/dace/viewer/webclient index c6b8fe4fd2..64861bbc05 160000 --- a/dace/viewer/webclient +++ b/dace/viewer/webclient @@ -1 +1 @@ -Subproject commit c6b8fe4fd2c3616b0480ead4c24d8012b91a31fd +Subproject commit 64861bbc054c62bc6cb3f8525bfc4703d6c5e364 diff --git a/tutorials/explicit.ipynb b/tutorials/explicit.ipynb index 45d172cf35..de718ffc4a 100644 --- a/tutorials/explicit.ipynb +++ b/tutorials/explicit.ipynb @@ -123,15 +123,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -159,16 +161,7 @@ "cell_type": "code", "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING: Casting scalar argument \"M\" from int to \n", - "WARNING: Casting scalar argument \"N\" from int to \n" - ] - } - ], + "outputs": [], "source": [ "sdfg(A=A, B=B, M=A.shape[0], N=A.shape[1])" ] @@ -201,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -225,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -310,13 +303,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "WARNING: Casting scalar argument \"threshold\" from int to \n" + "WARNING: Passing uint32 array argument \"outsz\" to a int32 array\n" ] }, { "data": { "text/plain": [ - "array([121], dtype=uint32)" + "array([114], dtype=uint32)" ] }, "execution_count": 13, @@ -351,7 +344,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "dace_dev", "language": "python", "name": "python3" }, @@ -365,7 +358,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/tutorials/getting_started.ipynb b/tutorials/getting_started.ipynb index 4405c28d56..266d207abc 100644 --- a/tutorials/getting_started.ipynb +++ b/tutorials/getting_started.ipynb @@ -13,22 +13,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 16, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import dace" ] @@ -42,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -66,8 +53,8 @@ { "data": { "text/plain": [ - "array([[0.74867876, 0.85403223, 0.16573784],\n", - " [0.71994615, 0.29855314, 0.21483992]])" + "array([[0.02638476, 0.15801766, 0.60640768],\n", + " [0.75281897, 0.02027034, 0.92066681]])" ] }, "execution_count": 3, @@ -89,8 +76,8 @@ { "data": { "text/plain": [ - "array([[1.49735752, 1.70806445, 0.33147568],\n", - " [1.4398923 , 0.59710627, 0.42967985]])" + "array([[0.05276951, 0.31603533, 1.21281536],\n", + " [1.50563794, 0.04054068, 1.84133362]])" ] }, "execution_count": 4, @@ -113,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -121,22 +108,21 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ "SDFG (getstarted)" ] }, - "execution_count": 5, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -174,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -190,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -201,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -209,22 +195,21 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ "SDFG (getstarted_sym)" ] }, - "execution_count": 8, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -248,19 +233,19 @@ { "data": { "text/plain": [ - "array([[1.63216549, 1.26522381, 0.21606686, ..., 0.56988572, 1.12572538,\n", - " 1.72701877],\n", - " [0.3829452 , 1.52386969, 0.82165197, ..., 1.3105662 , 1.19336786,\n", - " 1.43671993],\n", - " [1.55277426, 1.50918516, 1.30665626, ..., 1.06562809, 1.53069088,\n", - " 1.10071159],\n", + "array([[0.98818461, 1.27933885, 0.2033508 , ..., 0.547033 , 0.4299565 ,\n", + " 0.24654365],\n", + " [1.91945996, 0.8587834 , 1.6074685 , ..., 0.60969216, 1.7881462 ,\n", + " 1.6251679 ],\n", + " [0.09656663, 0.86573612, 0.79912191, ..., 1.50199177, 0.14342504,\n", + " 0.77152323],\n", " ...,\n", - " [0.60629736, 1.73240929, 1.26797782, ..., 1.72034476, 1.56691557,\n", - " 0.22283613],\n", - " [1.96245486, 1.60559508, 0.02009914, ..., 1.40944583, 1.44560312,\n", - " 0.37804927],\n", - " [1.17875002, 0.96963921, 0.28278902, ..., 1.56747976, 0.4616313 ,\n", - " 0.94999278]])" + " [1.86926975, 0.16524055, 0.57659078, ..., 0.06706506, 1.94858343,\n", + " 0.21332081],\n", + " [0.78987173, 0.32493361, 0.33111051, ..., 0.41438505, 1.6625166 ,\n", + " 1.4539469 ],\n", + " [0.32619914, 0.84155838, 0.85757214, ..., 0.93809 , 0.25236549,\n", + " 1.95588663]])" ] }, "execution_count": 9, @@ -283,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -299,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -315,7 +300,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -324,14 +309,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "12 ms ± 143 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "12 ms ± 258 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -341,14 +326,14 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "3.86 ms ± 271 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "5.1 ms ± 470 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -367,7 +352,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -375,22 +360,21 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ "SDFG (sse_sigma)" ] }, - "execution_count": 17, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -422,7 +406,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "dace_dev", "language": "python", "name": "python3" }, @@ -436,7 +420,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/tutorials/numpy_frontend.ipynb b/tutorials/numpy_frontend.ipynb index fafda2f1b1..83ca6875ba 100644 --- a/tutorials/numpy_frontend.ipynb +++ b/tutorials/numpy_frontend.ipynb @@ -107,15 +107,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -267,15 +269,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -328,15 +332,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -461,7 +467,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "dace_dev", "language": "python", "name": "python3" }, @@ -475,7 +481,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/tutorials/sdfg_api.ipynb b/tutorials/sdfg_api.ipynb index 645158ce88..beb4f4b6b0 100644 --- a/tutorials/sdfg_api.ipynb +++ b/tutorials/sdfg_api.ipynb @@ -112,15 +112,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -200,15 +202,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -240,15 +244,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -287,15 +293,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -347,15 +355,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -395,15 +405,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -477,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -486,14 +498,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Difference: 1.6358224e-06\n" + "Difference: 7.1136246\n" ] } ], @@ -504,7 +516,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "dace_dev", "language": "python", "name": "python3" }, @@ -518,7 +530,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/tutorials/transformations.ipynb b/tutorials/transformations.ipynb index d54b294e6e..931df79e18 100644 --- a/tutorials/transformations.ipynb +++ b/tutorials/transformations.ipynb @@ -70,15 +70,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -112,15 +114,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -155,15 +159,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -197,15 +203,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -348,15 +356,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -399,15 +409,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -456,30 +468,22 @@ "4. Transformation GPUTransformLocalStorage in outer_fused[__i0=0:1000, __i1=0:1000]\n", "5. Transformation GPUTransformMap in outer_fused[__i0=0:1000, __i1=0:1000]\n", "6. Transformation GPUTransformSDFG in []\n", - "7. Transformation MapDimShuffle in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", - "8. Transformation MapExpansion in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", - "9. Transformation MapFission in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", - "10. Transformation MapTiling in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", - "11. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", - "12. Transformation MapUnroll in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", - "13. Transformation NestSDFG in []\n", - "14. Transformation ReductionNOperation in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", - "15. Transformation StripMining in outer_fused: ['__i0', '__i1']\n", - "16. Transformation TaskletFusion in [Tasklet (_Mult_), AccessNode (__tmp0), Tasklet (_Add_)]\n" - ] - }, - { - "name": "stdin", - "output_type": "stream", - "text": [ - "Select the pattern to apply (0 - 16 or name$id): MapExpansion$0\n" + "7. Transformation MapExpansion in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "8. Transformation MapFission in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "9. Transformation MapTiling in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "10. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "11. Transformation MapUnroll in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "12. Transformation NestSDFG in []\n", + "13. Transformation ReductionNOperation in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "14. Transformation StripMining in outer_fused: ['__i0', '__i1']\n", + "15. Transformation TaskletFusion in [Tasklet (_Mult_), AccessNode (__tmp0), Tasklet (_Add_)]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "You selected (MapExpansion$0) pattern MapExpansion in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])] with parameters {}\n", + "You selected (7) pattern MapExpansion in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])] with parameters {}\n", "0. Transformation ElementWiseArrayOperation in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", "1. Transformation FPGATransformSDFG in []\n", "2. Transformation FPGATransformState in [SDFGState (BinOp_5)]\n", @@ -491,88 +495,63 @@ "8. Transformation InLocalStorage in outer_fused[__i0=0:1000] -> outer_fused___i1[__i1=0:1000]\n", "9. Transformation MPITransformMap in [MapEntry (outer_fused[__i0=0:1000])]\n", "10. Transformation MPITransformMap in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "11. Transformation MapDimShuffle in [MapEntry (outer_fused[__i0=0:1000])]\n", - "12. Transformation MapDimShuffle in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "13. Transformation MapFission in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "14. Transformation MapInterchange in [MapEntry (outer_fused[__i0=0:1000]), MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "15. Transformation MapTiling in [MapEntry (outer_fused[__i0=0:1000])]\n", - "16. Transformation MapTiling in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "17. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[__i0=0:1000])]\n", - "18. Transformation MapTilingWithOverlap in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "19. Transformation MapToForLoop in [MapEntry (outer_fused[__i0=0:1000])]\n", - "20. Transformation MapToForLoop in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "11. Transformation MapFission in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "12. Transformation MapInterchange in [MapEntry (outer_fused[__i0=0:1000]), MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "13. Transformation MapTiling in [MapEntry (outer_fused[__i0=0:1000])]\n", + "14. Transformation MapTiling in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "15. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[__i0=0:1000])]\n", + "16. Transformation MapTilingWithOverlap in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "17. Transformation MapToForLoop in [MapEntry (outer_fused[__i0=0:1000])]\n", + "18. Transformation MapToForLoop in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "19. Transformation MapToForLoopRegion in [MapEntry (outer_fused[__i0=0:1000])]\n", + "20. Transformation MapToForLoopRegion in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", "21. Transformation MapUnroll in [MapEntry (outer_fused[__i0=0:1000])]\n", "22. Transformation NestSDFG in []\n", "23. Transformation OutLocalStorage in outer_fused___i1[__i1=0:1000] -> outer_fused[__i0=0:1000]\n", "24. Transformation ReductionNOperation in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", "25. Transformation StripMining in outer_fused: ['__i0']\n", "26. Transformation StripMining in outer_fused___i1: ['__i1']\n", - "27. Transformation TaskletFusion in [Tasklet (_Mult_), AccessNode (__tmp0), Tasklet (_Add_)]\n" - ] - }, - { - "name": "stdin", - "output_type": "stream", - "text": [ - "Select the pattern to apply (0 - 27 or name$id): MapTiling$0(tile_sizes=(128,))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "You selected (MapTiling$0) pattern MapTiling in [MapEntry (outer_fused[__i0=0:1000])] with parameters {'tile_sizes': (128,)}\n", - "0. Transformation ElementWiseArrayOperation in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "1. Transformation FPGATransformSDFG in []\n", - "2. Transformation FPGATransformState in [SDFGState (BinOp_5)]\n", - "3. Transformation GPUGridStridedTiling in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]), MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "4. Transformation GPUGridStridedTiling in [MapEntry (outer_fused[tile___i0=0:1000:128]), MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", - "5. Transformation GPUTransformLocalStorage in outer_fused[tile___i0=0:1000:128]\n", - "6. Transformation GPUTransformMap in outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]\n", - "7. Transformation GPUTransformMap in outer_fused___i1[__i1=0:1000]\n", - "8. Transformation GPUTransformMap in outer_fused[tile___i0=0:1000:128]\n", + "27. Transformation TaskletFusion in [Tasklet (_Mult_), AccessNode (__tmp0), Tasklet (_Add_)]\n", + "You selected (11) pattern MapFission in [MapEntry (outer_fused___i1[__i1=0:1000])] with parameters {}\n", + "0. Transformation BufferTiling in [MapExit (outer_fused___i1_fission[__i1=0:1000]), AccessNode (__tmp0), MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "1. Transformation ElementWiseArrayOperation in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "2. Transformation ElementWiseArrayOperation in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "3. Transformation FPGATransformSDFG in []\n", + "4. Transformation FPGATransformState in [SDFGState (BinOp_5)]\n", + "5. Transformation GPUTransformLocalStorage in outer_fused[__i0=0:1000]\n", + "6. Transformation GPUTransformMap in outer_fused[__i0=0:1000]\n", + "7. Transformation GPUTransformMap in outer_fused___i1_fission[__i1=0:1000]\n", + "8. Transformation GPUTransformMap in outer_fused___i1_fission[__i1=0:1000]\n", "9. Transformation GPUTransformSDFG in []\n", - "10. Transformation InLocalStorage in outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1] -> outer_fused___i1[__i1=0:1000]\n", - "11. Transformation InLocalStorage in outer_fused[tile___i0=0:1000:128] -> outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]\n", - "12. Transformation MPITransformMap in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", - "13. Transformation MPITransformMap in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "14. Transformation MPITransformMap in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", - "15. Transformation MapDimShuffle in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", - "16. Transformation MapDimShuffle in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "17. Transformation MapDimShuffle in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", - "18. Transformation MapFission in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "19. Transformation MapInterchange in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]), MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "20. Transformation MapTiling in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", - "21. Transformation MapTiling in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "22. Transformation MapTiling in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", - "23. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", - "24. Transformation MapTilingWithOverlap in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "25. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", - "26. Transformation MapToForLoop in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", - "27. Transformation MapToForLoop in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", - "28. Transformation MapToForLoop in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", - "29. Transformation MapUnroll in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", + "10. Transformation InLocalStorage in outer_fused[__i0=0:1000] -> outer_fused___i1_fission[__i1=0:1000]\n", + "11. Transformation InLocalStorage in outer_fused[__i0=0:1000] -> outer_fused___i1_fission[__i1=0:1000]\n", + "12. Transformation MPITransformMap in [MapEntry (outer_fused[__i0=0:1000])]\n", + "13. Transformation MPITransformMap in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "14. Transformation MPITransformMap in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "15. Transformation MapFission in [MapEntry (outer_fused[__i0=0:1000])]\n", + "16. Transformation MapFusion in [MapExit (outer_fused___i1_fission[__i1=0:1000]), AccessNode (__tmp0), MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "17. Transformation MapTiling in [MapEntry (outer_fused[__i0=0:1000])]\n", + "18. Transformation MapTiling in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "19. Transformation MapTiling in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "20. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[__i0=0:1000])]\n", + "21. Transformation MapTilingWithOverlap in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "22. Transformation MapTilingWithOverlap in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "23. Transformation MapToForLoop in [MapEntry (outer_fused[__i0=0:1000])]\n", + "24. Transformation MapToForLoop in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "25. Transformation MapToForLoop in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "26. Transformation MapToForLoopRegion in [MapEntry (outer_fused[__i0=0:1000])]\n", + "27. Transformation MapToForLoopRegion in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "28. Transformation MapToForLoopRegion in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "29. Transformation MapUnroll in [MapEntry (outer_fused[__i0=0:1000])]\n", "30. Transformation NestSDFG in []\n", - "31. Transformation OutLocalStorage in outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1] -> outer_fused[tile___i0=0:1000:128]\n", - "32. Transformation OutLocalStorage in outer_fused___i1[__i1=0:1000] -> outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]\n", - "33. Transformation ReductionNOperation in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "31. Transformation OTFMapFusion in [MapExit (outer_fused___i1_fission[__i1=0:1000]), AccessNode (__tmp0), MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "32. Transformation OutLocalStorage in outer_fused___i1_fission[__i1=0:1000] -> outer_fused[__i0=0:1000]\n", + "33. Transformation ReductionNOperation in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", "34. Transformation StripMining in outer_fused: ['__i0']\n", - "35. Transformation StripMining in outer_fused___i1: ['__i1']\n", - "36. Transformation StripMining in outer_fused: ['tile___i0']\n", - "37. Transformation TaskletFusion in [Tasklet (_Mult_), AccessNode (__tmp0), Tasklet (_Add_)]\n" - ] - }, - { - "name": "stdin", - "output_type": "stream", - "text": [ - "Select the pattern to apply (0 - 37 or name$id): \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "35. Transformation StripMining in outer_fused___i1_fission: ['__i1']\n", + "36. Transformation StripMining in outer_fused___i1_fission: ['__i1']\n", + "37. Transformation Vectorization in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", + "38. Transformation Vectorization in [MapEntry (outer_fused___i1_fission[__i1=0:1000])]\n", "You did not select a valid option. Quitting optimization ...\n" ] } @@ -654,15 +633,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -786,7 +767,7 @@ { "data": { "text/plain": [ - "1" + "0" ] }, "execution_count": 14, @@ -815,15 +796,17 @@ "text/html": [ "\n", "
\n", - "
\n", + "
\n", "
\n", "\n", "" ], "text/plain": [ @@ -864,7 +847,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "dace_dev", "language": "python", "name": "python3" }, @@ -878,7 +861,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.0" } }, "nbformat": 4, From 9dd70bb02f676acefd8ebec7f136abb6f0009d25 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo Date: Fri, 1 Nov 2024 17:01:00 +0100 Subject: [PATCH 08/33] Fix broken codegen tutorial (#1720) Quick fix follow-up from https://github.com/spcl/dace/pull/1706 which left a broken notebook. Co-authored-by: Roman Cattaneo <> --- tutorials/codegen.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/codegen.ipynb b/tutorials/codegen.ipynb index 2c79f1a2e0..84b2cf7f01 100644 --- a/tutorials/codegen.ipynb +++ b/tutorials/codegen.ipynb @@ -497,8 +497,8 @@ " function_stream: CodeIOStream, callsite_stream: CodeIOStream):\n", " # The parameters here are:\n", " # sdfg: The SDFG we are currently generating.\n", - " # cfg: The current control flow graph (CFG) we are currently generating. For example, - " it can be the SDFG or a loop region. + " # cfg: The current control flow graph (CFG) we are currently generating. For example\n", + " # it can be the SDFG or a loop region.\n", " # scope: The subgraph of the state containing only the scope (map contents)\n", " # we want to generate the code for.\n", " # state_id: The state in the SDFG the subgraph is taken from (i.e.,\n", From 2c414919ad31f486e86fe9ac685035c4c3c04fc9 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo Date: Fri, 1 Nov 2024 18:51:20 +0100 Subject: [PATCH 09/33] CI: Update checkout and setup-python actions (#1718) GitHub Actions workflows use outdated versions of - actions/checkout - actions/setup-python These actions are built for specific node versions, which are now end of life. While the workflows continue to run, GitHub issues a warning (visible in the online interface) and runs them with newer versions of node. ![image](https://github.com/user-attachments/assets/159f4d86-33f5-4d9c-ad45-a5657ad51a57) Since both, checkout and setup-python, are basic actions, none of the features that DaCe workflows are using changed. We might see slight speedup from out of the box caching added to setup-python in recent versions. Parent issue: https://github.com/GEOS-ESM/SMT-Nebulae/issues/89 Co-authored-by: Roman Cattaneo <> --- .github/workflows/fpga-ci.yml | 2 +- .github/workflows/general-ci.yml | 4 ++-- .github/workflows/gpu-ci.yml | 2 +- .github/workflows/hardware_test.yml | 2 +- .github/workflows/heterogeneous-ci.yml | 2 +- .github/workflows/pyFV3-ci.yml | 6 +++--- .github/workflows/verilator_compatibility.yml | 4 ++-- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/fpga-ci.yml b/.github/workflows/fpga-ci.yml index ef8e5348da..2d6d42514f 100644 --- a/.github/workflows/fpga-ci.yml +++ b/.github/workflows/fpga-ci.yml @@ -16,7 +16,7 @@ jobs: if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-ci') }} runs-on: [self-hosted, linux, intel-fpga, xilinx-fpga] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: 'recursive' - name: Install dependencies diff --git a/.github/workflows/general-ci.yml b/.github/workflows/general-ci.yml index faf0a727be..2044639e5f 100644 --- a/.github/workflows/general-ci.yml +++ b/.github/workflows/general-ci.yml @@ -18,11 +18,11 @@ jobs: simplify: [0,1,autoopt] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: 'recursive' - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml index 527e004478..b3af9c8c05 100644 --- a/.github/workflows/gpu-ci.yml +++ b/.github/workflows/gpu-ci.yml @@ -19,7 +19,7 @@ jobs: if: "!contains(github.event.pull_request.labels.*.name, 'no-ci')" runs-on: [self-hosted, gpu] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: 'recursive' - name: Install dependencies diff --git a/.github/workflows/hardware_test.yml b/.github/workflows/hardware_test.yml index 3fe32aaab7..e319c72587 100644 --- a/.github/workflows/hardware_test.yml +++ b/.github/workflows/hardware_test.yml @@ -4,7 +4,7 @@ jobs: test-rtl: runs-on: [self-hosted, linux, xilinx-fpga] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: 'recursive' - name: Install dependencies diff --git a/.github/workflows/heterogeneous-ci.yml b/.github/workflows/heterogeneous-ci.yml index 99b566e21f..62887ad208 100644 --- a/.github/workflows/heterogeneous-ci.yml +++ b/.github/workflows/heterogeneous-ci.yml @@ -19,7 +19,7 @@ jobs: if: "!contains(github.event.pull_request.labels.*.name, 'no-ci')" runs-on: [self-hosted, linux] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: 'recursive' - name: Install dependencies diff --git a/.github/workflows/pyFV3-ci.yml b/.github/workflows/pyFV3-ci.yml index f58fdf85ac..85c864e475 100644 --- a/.github/workflows/pyFV3-ci.yml +++ b/.github/workflows/pyFV3-ci.yml @@ -21,18 +21,18 @@ jobs: python-version: [3.11.7] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: repository: 'NOAA-GFDL/PyFV3' ref: 'ci/DaCe' submodules: 'recursive' path: 'pyFV3' - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: path: 'dace' submodules: 'recursive' - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install library dependencies diff --git a/.github/workflows/verilator_compatibility.yml b/.github/workflows/verilator_compatibility.yml index 7f43565812..dce0c9b1fb 100644 --- a/.github/workflows/verilator_compatibility.yml +++ b/.github/workflows/verilator_compatibility.yml @@ -17,14 +17,14 @@ jobs: steps: - name: trigger reason run: echo "Trigger Reason:" ${{ github.event.inputs.reason }} - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: checkout submodules run: git submodule update --init --recursive - name: install apt packages run: sudo apt-get update && sudo apt-get -y install git make autoconf g++ flex bison libfl2 libfl-dev - name: compile verilator run: git clone https://github.com/verilator/verilator.git && cd verilator && git fetch origin && if [ ! "${{ matrix.verilator_version }}" == "master" ]; then git checkout v${{ matrix.verilator_version }}; fi && autoconf && ./configure && make -j2 && sudo make install - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: '3.8' architecture: 'x64' From 636811dcacf768b4b8817c55f5e7f0eabb87973c Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Fri, 1 Nov 2024 21:03:52 -0700 Subject: [PATCH 10/33] Bump version and update dependencies (#1722) Removes websockets dependency and makes jinja2 dependency optional --- dace/cli/dacelab.py | 5 ----- dace/cli/sdfg_diff.py | 6 +++++- dace/cli/sdfv.py | 6 +++++- dace/version.py | 2 +- requirements.txt | 25 ++++++++----------------- setup.py | 2 +- 6 files changed, 20 insertions(+), 26 deletions(-) diff --git a/dace/cli/dacelab.py b/dace/cli/dacelab.py index 27a3215e09..647ec31a3d 100644 --- a/dace/cli/dacelab.py +++ b/dace/cli/dacelab.py @@ -2,11 +2,6 @@ # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. import argparse -import numpy -import pickle -import json - -import dace from dace.frontend.octave import parse from dace.sdfg.nodes import AccessNode diff --git a/dace/cli/sdfg_diff.py b/dace/cli/sdfg_diff.py index 9c40e59f10..2ec0a3adf4 100644 --- a/dace/cli/sdfg_diff.py +++ b/dace/cli/sdfg_diff.py @@ -9,7 +9,6 @@ import tempfile from typing import Dict, Literal, Set, Tuple, Union -import jinja2 import dace from dace import memlet as mlt from dace.sdfg import nodes as nd @@ -179,6 +178,11 @@ def main(): diff_sets = _sdfg_diff(sdfg_A, sdfg_B, eq_strategy) if args.graphical: + try: + import jinja2 + except (ImportError, ModuleNotFoundError): + raise ImportError('Graphical SDFG diff requires jinja2, please install by running `pip install jinja2`') + basepath = os.path.join(os.path.dirname(os.path.realpath(dace.__file__)), 'viewer') template_loader = jinja2.FileSystemLoader(searchpath=os.path.join(basepath, 'templates')) template_env = jinja2.Environment(loader=template_loader) diff --git a/dace/cli/sdfv.py b/dace/cli/sdfv.py index 49255a1e7e..2012debe82 100644 --- a/dace/cli/sdfv.py +++ b/dace/cli/sdfv.py @@ -13,7 +13,6 @@ import dace import tempfile -import jinja2 def partialclass(cls, *args, **kwds): @@ -48,6 +47,11 @@ def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None, verbose: b os.close(fd) return + try: + import jinja2 + except (ImportError, ModuleNotFoundError): + raise ImportError('SDFG.view() requires jinja2, please install by running `pip install jinja2`') + if type(sdfg) is dace.SDFG: sdfg = dace.serialize.dumps(sdfg.to_json()) diff --git a/dace/version.py b/dace/version.py index 9513287c94..1f356cc57b 100644 --- a/dace/version.py +++ b/dace/version.py @@ -1 +1 @@ -__version__ = '0.16.1' +__version__ = '1.0.0' diff --git a/requirements.txt b/requirements.txt index 3cc37cc468..b902968b73 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,12 @@ -aenum==3.1.12 +aenum==3.1.15 astunparse==1.6.3 -certifi==2024.7.4 -charset-normalizer==3.1.0 -click==8.1.3 -dill==0.3.6 -fparser==0.1.3 -idna==3.7 -importlib-metadata==6.6.0 -Jinja2==3.1.4 -MarkupSafe==2.1.3 +dill==0.3.9 +fparser==0.1.4 mpmath==1.3.0 -networkx==3.1 -numpy==1.26.1 +networkx==3.4.2 +numpy==1.26.4 +packaging==24.1 ply==3.11 -PyYAML==6.0.1 +PyYAML==6.0.2 six==1.16.0 -sympy==1.9 -urllib3==2.2.2 -websockets==11.0.3 -zipp==3.15.0 +sympy==1.13.3 diff --git a/setup.py b/setup.py index 6e8635bdf6..c228ae4558 100644 --- a/setup.py +++ b/setup.py @@ -73,7 +73,7 @@ }, include_package_data=True, install_requires=[ - 'numpy < 2.0', 'networkx >= 2.5', 'astunparse', 'sympy >= 1.9', 'pyyaml', 'ply', 'websockets', 'jinja2', + 'numpy < 2.0', 'networkx >= 2.5', 'astunparse', 'sympy >= 1.9', 'pyyaml', 'ply', 'fparser >= 0.1.3', 'aenum >= 3.1', 'dataclasses; python_version < "3.7"', 'dill', 'pyreadline;platform_system=="Windows"', 'typing-compat; python_version < "3.8"', 'packaging' ] + cmake_requires, From b27024b57eeb679fe1326b00525228d581ca369e Mon Sep 17 00:00:00 2001 From: Philipp Schaad Date: Mon, 4 Nov 2024 17:44:37 +0100 Subject: [PATCH 11/33] Various Cutout Fixes (#1662) - [x] Fix cutouts w.r.t. the use of UIDs, allowing them to be preserved or re-generated depending on an input parameter - [x] Fix singlestate cutout extraction when memlets access struct members. --- dace/sdfg/analysis/cutout.py | 56 ++++++++++++++++++++++++++++++------ dace/sdfg/nodes.py | 10 +++++++ dace/sdfg/sdfg.py | 10 +++++++ 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/dace/sdfg/analysis/cutout.py b/dace/sdfg/analysis/cutout.py index 5d2eae7c6f..ec95157989 100644 --- a/dace/sdfg/analysis/cutout.py +++ b/dace/sdfg/analysis/cutout.py @@ -118,7 +118,7 @@ def from_json(cls, json_obj, context=None): def from_transformation( cls, sdfg: SDFG, transformation: Union[PatternTransformation, SubgraphTransformation], make_side_effects_global = True, use_alibi_nodes: bool = True, reduce_input_config = True, - symbols_map: Optional[Dict[str, Any]] = None + symbols_map: Optional[Dict[str, Any]] = None, preserve_guids: bool = False ) -> Union['SDFGCutout', SDFG]: """ Create a cutout from a transformation's set of affected graph elements. @@ -130,6 +130,9 @@ def from_transformation( :param reduce_input_config: Whether to reduce the input configuration where possible in singlestate cutouts. :param symbols_map: A mapping of symbols to values to use for the cutout. Optional, only used when reducing the input configuration. + :param preserve_guids: If True, ensures that the GUIDs of graph elements contained in the cutout remain + identical to the ones in their original graph. If False, new GUIDs will be generated. + False by default. :return: The cutout. """ affected_nodes = _transformation_determine_affected_nodes(sdfg, transformation) @@ -150,11 +153,12 @@ def from_transformation( state = target_sdfg.node(transformation.state_id) cutout = cls.singlestate_cutout(state, *affected_nodes, make_side_effects_global=make_side_effects_global, use_alibi_nodes=use_alibi_nodes, reduce_input_config=reduce_input_config, - symbols_map=symbols_map) + symbols_map=symbols_map, preserve_guids=preserve_guids) cutout.translate_transformation_into(transformation) return cutout elif isinstance(transformation, MultiStateTransformation): - cutout = cls.multistate_cutout(*affected_nodes, make_side_effects_global=make_side_effects_global) + cutout = cls.multistate_cutout(*affected_nodes, make_side_effects_global=make_side_effects_global, + preserve_guids=preserve_guids) # If the cutout is an SDFG, there's no need to translate the transformation. if isinstance(cutout, SDFGCutout): cutout.translate_transformation_into(transformation) @@ -169,14 +173,15 @@ def singlestate_cutout(cls, make_side_effects_global: bool = True, use_alibi_nodes: bool = True, reduce_input_config: bool = False, - symbols_map: Optional[Dict[str, Any]] = None) -> 'SDFGCutout': + symbols_map: Optional[Dict[str, Any]] = None, + preserve_guids: bool = False) -> 'SDFGCutout': """ Cut out a subgraph of a state from an SDFG to run separately for localized testing or optimization. The subgraph defined by the list of nodes will be extended to include access nodes of data containers necessary to run the graph separately. In addition, all transient data containers that may contain data when the cutout is executed are made global, as well as any transient data containers which are written to inside the cutout but may be read after the cutout. - + :param state: The SDFG state in which the subgraph resides. :param nodes: The nodes in the subgraph to cut out. :param make_copy: If True, deep-copies every SDFG element in the copy. Otherwise, original references are kept. @@ -188,17 +193,29 @@ def singlestate_cutout(cls, :param reduce_input_config: Whether to reduce the input configuration where possible in singlestate cutouts. :param symbols_map: A mapping of symbols to values to use for the cutout. Optional, only used when reducing the input configuration. + :param preserve_guids: If True, ensures that the GUIDs of graph elements contained in the cutout remain + identical to the ones in their original graph. If False, new GUIDs will be generated. + False by default - if make_copy is False, this has no effect by extension. :return: The created SDFGCutout. """ if reduce_input_config: nodes = _reduce_in_configuration(state, nodes, use_alibi_nodes, symbols_map) - create_element = copy.deepcopy if make_copy else (lambda x: x) + + def clone_f(x: Union[Memlet, InterstateEdge, nd.Node, ControlFlowBlock]): + ret = copy.deepcopy(x) + if preserve_guids: + ret.guid = x.guid + return ret + + create_element = clone_f if make_copy else (lambda x: x) sdfg = state.parent subgraph: StateSubgraphView = StateSubgraphView(state, nodes) subgraph = _extend_subgraph_with_access_nodes(state, subgraph, use_alibi_nodes) # Make a new SDFG with the included constants, used symbols, and data containers. cutout = SDFGCutout(sdfg.name + '_cutout', sdfg.constants_prop) + if preserve_guids: + cutout.guid = sdfg.guid cutout._base_sdfg = sdfg defined_syms = subgraph.defined_symbols() freesyms = subgraph.free_symbols @@ -213,11 +230,24 @@ def singlestate_cutout(cls, memlet = edge.data if memlet.data in cutout.arrays: continue - new_desc = sdfg.arrays[memlet.data].clone() - cutout.add_datadesc(memlet.data, new_desc) + dataname = memlet.data + if '.' in dataname: + # This is an access to a struct memeber, which typically happens for the memlets between an access node + # pointing to a struct (or view thereof), and a view pointing to the member. Assert that this is indeed + # the case (i.e., only one '.' is found in the name of the data being accessed), and if so, clone the + # struct (or struct view) data descriptor instad. + parts = dataname.split('.') + if len(parts) == 2: + dataname = parts[0] + else: + raise RuntimeError('Attempting to add invalid multi-nested data ' + memlet.data + ' to a cutout') + new_desc = sdfg.arrays[dataname].clone() + cutout.add_datadesc(dataname, new_desc) # Add a single state with the extended subgraph new_state = cutout.add_state(state.label, is_start_state=True) + if preserve_guids: + new_state.guid = state.guid in_translation = dict() out_translation = dict() for e in sg_edges: @@ -322,6 +352,7 @@ def singlestate_cutout(cls, def multistate_cutout(cls, *states: SDFGState, make_side_effects_global: bool = True, + preserve_guids: bool = False, override_start_block: Optional[ControlFlowBlock] = None) -> Union['SDFGCutout', SDFG]: """ Cut out a multi-state subgraph from an SDFG to run separately for localized testing or optimization. @@ -337,12 +368,19 @@ def multistate_cutout(cls, :param make_side_effects_global: If True, all transient data containers which are read inside the cutout but may be written to _before_ the cutout, or any data containers which are written to inside the cutout but may be read _after_ the cutout, are made global. + :param preserve_guids: If True, ensures that the GUIDs of graph elements contained in the cutout remain + identical to the ones in their original graph. If False, new GUIDs will be generated. + False by default - if make_copy is False, this has no effect by extension. :param override_start_block: If set, explicitly force a given control flow block to be the start block. If left None (default), the start block is automatically determined based on domination relationships in the original graph. :return: The created SDFGCutout or the original SDFG where no smaller cutout could be obtained. """ - create_element = copy.deepcopy + def create_element(x: Union[ControlFlowBlock, InterstateEdge]) -> Union[ControlFlowBlock, InterstateEdge]: + ret = copy.deepcopy(x) + if preserve_guids: + ret.guid = x.guid + return ret # Check that all states are inside the same SDFG. sdfg = list(states)[0].parent diff --git a/dace/sdfg/nodes.py b/dace/sdfg/nodes.py index 4ae91d5ea0..d29b1a22e4 100644 --- a/dace/sdfg/nodes.py +++ b/dace/sdfg/nodes.py @@ -55,6 +55,16 @@ def __str__(self): else: return type(self).__name__ + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + if k == 'guid': # Skip ID + continue + setattr(result, k, dcpy(v, memo)) + return result + def validate(self, sdfg, state): pass diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index cb8a7d5c2d..19d2a47295 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -205,6 +205,16 @@ def __setattr__(self, name: str, value: Any) -> None: super().__setattr__('_uncond', None) return super().__setattr__(name, value) + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + if k == 'guid': # Skip ID + continue + setattr(result, k, copy.deepcopy(v, memo)) + return result + @staticmethod def _convert_assignment(assignment) -> str: if isinstance(assignment, ast.AST): From 64d76799bf4f27f128b5301d97aecb78b634df2f Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Mon, 4 Nov 2024 08:44:54 -0800 Subject: [PATCH 12/33] Various stability improvements and convenience APIs (#1724) * Various minor code generation and runtime fixes * Minor API improvements to CompiledSDFG, sdfg.view(), and subset offsetting * Minor memlet propagation fix * Various simplify pass fixes that pertain to use of views, references, and tasklets with side effects * Symbolic support for shift and ternary expressions (fixes #1315) * Pass permissiveness into transformations --- dace/cli/sdfgcc.py | 2 +- dace/cli/sdfv.py | 6 +- dace/codegen/compiled_sdfg.py | 5 + dace/codegen/compiler.py | 2 +- dace/codegen/cppunparse.py | 8 +- dace/codegen/tools/type_inference.py | 2 + dace/dtypes.py | 2 + dace/memlet.py | 4 +- dace/properties.py | 2 +- dace/runtime/include/dace/stream.h | 8 +- dace/sdfg/graph.py | 2 +- dace/sdfg/propagation.py | 6 +- dace/sdfg/sdfg.py | 9 +- dace/sdfg/utils.py | 67 ++++++++-- dace/sdfg/validation.py | 4 +- dace/subsets.py | 26 ++-- dace/symbolic.py | 117 ++++++++++++++---- .../dataflow/redundant_array.py | 10 ++ .../transformation/dataflow/tasklet_fusion.py | 102 +++++++++++++-- dace/transformation/helpers.py | 3 + dace/transformation/interstate/loop_to_map.py | 5 +- .../interstate/multistate_inline.py | 3 + .../transformation/interstate/sdfg_nesting.py | 3 + .../transformation/interstate/state_fusion.py | 9 ++ .../passes/analysis/analysis.py | 55 +++++++- .../transformation/passes/pattern_matching.py | 2 + .../transformation/passes/scalar_to_symbol.py | 25 ++-- tests/transformations/tasklet_fusion_test.py | 35 +++++- 28 files changed, 440 insertions(+), 84 deletions(-) diff --git a/dace/cli/sdfgcc.py b/dace/cli/sdfgcc.py index 1df7604b4b..0d04950be7 100644 --- a/dace/cli/sdfgcc.py +++ b/dace/cli/sdfgcc.py @@ -48,7 +48,7 @@ def main(): sdfg = SDFGOptimizer(sdfg).optimize() # Compile SDFG - sdfg.compile(outpath) + sdfg.compile(outpath, return_program_handle=False) # Copying header file to optional path if outpath is not None: diff --git a/dace/cli/sdfv.py b/dace/cli/sdfv.py index 2012debe82..d14059468f 100644 --- a/dace/cli/sdfv.py +++ b/dace/cli/sdfv.py @@ -43,7 +43,11 @@ def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None, verbose: b ): fd, filename = tempfile.mkstemp(suffix='.sdfg') sdfg.save(filename) - os.system(f'code {filename}') + if platform.system() == 'Darwin': + # Special case for MacOS + os.system(f'open {filename}') + else: + os.system(f'code {filename}') os.close(fd) return diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py index 9bfcc439e0..332db028ae 100644 --- a/dace/codegen/compiled_sdfg.py +++ b/dace/codegen/compiled_sdfg.py @@ -518,6 +518,9 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: # Otherwise, None values are passed as null pointers below elif isinstance(arg, ctypes._Pointer): pass + elif isinstance(arg, str): + # Cast to bytes + arglist[i] = ctypes.c_char_p(arg.encode('utf-8')) else: raise TypeError(f'Passing an object (type {type(arg).__name__}) to an array in argument "{a}"') elif is_array and not is_dtArray: @@ -550,6 +553,8 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: pass elif isinstance(arg, float) and atype.dtype.type == np.float64: pass + elif isinstance(arg, bool) and atype.dtype.type == np.bool_: + pass elif (isinstance(arg, str) or arg is None) and atype.dtype == dtypes.string: if arg is None: arglist[i] = ctypes.c_char_p(None) diff --git a/dace/codegen/compiler.py b/dace/codegen/compiler.py index 350e141606..236f832cac 100644 --- a/dace/codegen/compiler.py +++ b/dace/codegen/compiler.py @@ -213,7 +213,7 @@ def configure_and_compile(program_folder, program_name=None, output_stream=None) # Clean CMake directory and try once more if Config.get_bool('debugprint'): print('Cleaning CMake build folder and retrying...') - shutil.rmtree(build_folder) + shutil.rmtree(build_folder, ignore_errors=True) os.makedirs(build_folder) try: _run_liveoutput(cmake_command, shell=True, cwd=build_folder, output_stream=output_stream) diff --git a/dace/codegen/cppunparse.py b/dace/codegen/cppunparse.py index edeb5270ca..c375147930 100644 --- a/dace/codegen/cppunparse.py +++ b/dace/codegen/cppunparse.py @@ -555,7 +555,11 @@ def _write_constant(self, value): if result.find("b'") >= 0: self.write(result) else: - self.write(result.replace('\'', '\"')) + towrite = result + if result.startswith("'"): + towrite = result[1:-1].replace('"', '\\"') + towrite = f'"{towrite}"' + self.write(towrite) def _Constant(self, t): value = t.value @@ -1187,6 +1191,8 @@ def py2cpp(code, expr_semicolon=True, defined_symbols=None): return cppunparse(ast.parse(symbolic.symstr(code, cpp_mode=True)), expr_semicolon, defined_symbols=defined_symbols) + elif isinstance(code, int): + return str(code) elif code.__class__.__name__ == 'function': try: code_str = inspect.getsource(code) diff --git a/dace/codegen/tools/type_inference.py b/dace/codegen/tools/type_inference.py index 8f8dd84151..26b369fa9d 100644 --- a/dace/codegen/tools/type_inference.py +++ b/dace/codegen/tools/type_inference.py @@ -375,6 +375,8 @@ def _Compare(t, symbols, inferred_symbols): for o, e in zip(t.ops, t.comparators): if o.__class__.__name__ not in cppunparse.CPPUnparser.cmpops: continue + if isinstance(e, ast.Constant) and e.value is None: + continue inf_type = _dispatch(e, symbols, inferred_symbols) if isinstance(inf_type, dtypes.vector): # Make sure all occuring vectors are of same size diff --git a/dace/dtypes.py b/dace/dtypes.py index a016ac60e2..d0c6f23e03 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -404,6 +404,8 @@ def __init__(self, wrapped_type, typename=None): wrapped_type = numpy.bool_ elif getattr(wrapped_type, '__name__', '') == 'bool_' and typename is None: typename = 'bool' + elif wrapped_type is type(None): + wrapped_type = None self.type = wrapped_type # Type in Python self.ctype = _CTYPES[wrapped_type] # Type in C diff --git a/dace/memlet.py b/dace/memlet.py index f78da3a6b7..85bd0a348d 100644 --- a/dace/memlet.py +++ b/dace/memlet.py @@ -555,9 +555,9 @@ def used_symbols(self, all_symbols: bool, edge=None) -> Set[str]: from dace.sdfg import nodes if isinstance(edge.dst, nodes.CodeNode) or isinstance(edge.src, nodes.CodeNode): view_edge = True - elif edge.dst_conn == 'views' and isinstance(edge.dst, nodes.AccessNode): + elif edge.dst_conn and isinstance(edge.dst, nodes.AccessNode): view_edge = True - elif edge.src_conn == 'views' and isinstance(edge.src, nodes.AccessNode): + elif edge.src_conn and isinstance(edge.src, nodes.AccessNode): view_edge = True if not view_edge: diff --git a/dace/properties.py b/dace/properties.py index 09439ce4f8..82be72f9fd 100644 --- a/dace/properties.py +++ b/dace/properties.py @@ -329,7 +329,7 @@ def initialize_properties(obj, *args, **kwargs): for name, prop in own_properties.items(): # Only assign our own properties, so we don't overwrite what's been # set by the base class - if hasattr(obj, name): + if hasattr(obj, '_' + name): raise PropertyError("Property {} already assigned in {}".format(name, type(obj).__name__)) if not prop.indirected: if prop.allow_none or prop.default is not None: diff --git a/dace/runtime/include/dace/stream.h b/dace/runtime/include/dace/stream.h index 255e16ec2b..1f8134fae6 100644 --- a/dace/runtime/include/dace/stream.h +++ b/dace/runtime/include/dace/stream.h @@ -338,7 +338,7 @@ namespace dace { template struct Consume { - template