diff --git a/src/gt4py/next/otf/languages.py b/src/gt4py/next/otf/languages.py index 2397878271..6c8e8b8313 100644 --- a/src/gt4py/next/otf/languages.py +++ b/src/gt4py/next/otf/languages.py @@ -57,6 +57,11 @@ class Python(LanguageTag): ... +class SDFG(LanguageTag): + settings_class = LanguageSettings + ... + + class NanobindSrcL(LanguageTag): ... diff --git a/src/gt4py/next/program_processors/runners/dace.py b/src/gt4py/next/program_processors/runners/dace.py new file mode 100644 index 0000000000..2291541dd6 --- /dev/null +++ b/src/gt4py/next/program_processors/runners/dace.py @@ -0,0 +1,88 @@ +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2023, ETH Zurich +# All rights reserved. +# +# This file is part of the GT4Py project and the GridTools framework. +# GT4Py is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import functools + +import factory + +import gt4py._core.definitions as core_defs +from gt4py.next import config +from gt4py.next.otf import recipes, stages +from gt4py.next.program_processors.runners.dace_iterator.workflow import ( + DaCeCompilationStepFactory, + DaCeTranslationStepFactory, + convert_args, +) +from gt4py.next.program_processors.runners.gtfn import GTFNBackendFactory + + +def _no_bindings(inp: stages.ProgramSource) -> stages.CompilableSource: + return stages.CompilableSource(program_source=inp, binding_source=None) + + +class DaCeWorkflowFactory(factory.Factory): + class Meta: + model = recipes.OTFCompileWorkflow + + class Params: + device_type: core_defs.DeviceType = core_defs.DeviceType.CPU + cmake_build_type: config.CMakeBuildType = factory.LazyFunction( + lambda: config.CMAKE_BUILD_TYPE + ) + use_field_canonical_representation: bool = False + + translation = factory.SubFactory( + DaCeTranslationStepFactory, + device_type=factory.SelfAttribute("..device_type"), + use_field_canonical_representation=factory.SelfAttribute( + "..use_field_canonical_representation" + ), + ) + bindings = _no_bindings + compilation = factory.SubFactory( + DaCeCompilationStepFactory, + cache_lifetime=factory.LazyFunction(lambda: config.BUILD_CACHE_LIFETIME), + cmake_build_type=factory.SelfAttribute("..cmake_build_type"), + ) + decoration = factory.LazyAttribute( + lambda o: functools.partial( + convert_args, + device=o.device_type, + use_field_canonical_representation=o.use_field_canonical_representation, + ) + ) + + +class DaCeBackendFactory(GTFNBackendFactory): + class Params: + otf_workflow = factory.SubFactory( + DaCeWorkflowFactory, + device_type=factory.SelfAttribute("..device_type"), + use_field_canonical_representation=factory.SelfAttribute( + "..use_field_canonical_representation" + ), + ) + name = factory.LazyAttribute( + lambda o: f"run_dace_{o.name_device}{o.name_temps}{o.name_cached}{o.name_postfix}" + ) + auto_optimize = factory.Trait( + otf_workflow__translation__auto_optimize=True, + name_temps="_opt", + ) + use_field_canonical_representation: bool = False + + +run_dace_cpu = DaCeBackendFactory(cached=True, auto_optimize=True) + +run_dace_gpu = DaCeBackendFactory(gpu=True, cached=True, auto_optimize=True) diff --git a/src/gt4py/next/program_processors/runners/dace_iterator/workflow.py b/src/gt4py/next/program_processors/runners/dace_iterator/workflow.py new file mode 100644 index 0000000000..e5ae52bb7c --- /dev/null +++ b/src/gt4py/next/program_processors/runners/dace_iterator/workflow.py @@ -0,0 +1,181 @@ +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2023, ETH Zurich +# All rights reserved. +# +# This file is part of the GT4Py project and the GridTools framework. +# GT4Py is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or any later +# version. See the LICENSE.txt file at the top-level directory of this +# distribution for a copy of the license or check . +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import annotations + +import dataclasses +from typing import Callable, Optional, cast + +import dace +import factory +from dace.codegen.compiled_sdfg import CompiledSDFG + +from gt4py._core import definitions as core_defs +from gt4py.next import common, config +from gt4py.next.common import Dimension +from gt4py.next.iterator import ir as itir +from gt4py.next.iterator.transforms import LiftMode +from gt4py.next.otf import languages, stages, step_types, workflow +from gt4py.next.otf.binding import interface +from gt4py.next.otf.compilation import cache +from gt4py.next.otf.languages import LanguageSettings +from gt4py.next.type_system import type_translation as tt + +from . import build_sdfg_from_itir, get_sdfg_args + + +@dataclasses.dataclass(frozen=True) +class DaCeTranslator( + workflow.ChainableWorkflowMixin[ + stages.ProgramCall, + stages.ProgramSource[languages.SDFG, languages.LanguageSettings], + ], + step_types.TranslationStep[languages.SDFG, languages.LanguageSettings], +): + auto_optimize: bool = False + lift_mode: LiftMode = LiftMode.FORCE_INLINE + device_type: core_defs.DeviceType = core_defs.DeviceType.CPU + temporary_extraction_heuristics: Optional[ + Callable[[itir.StencilClosure], Callable[[itir.Expr], bool]] + ] = None + use_field_canonical_representation: bool = False + + def _language_settings(self) -> languages.LanguageSettings: + return languages.LanguageSettings( + formatter_key="", + formatter_style="", + file_extension="sdfg", + ) + + def __call__( + self, + inp: stages.ProgramCall, + ) -> stages.ProgramSource[languages.SDFG, LanguageSettings]: + """Generate DaCe SDFG file from the ITIR definition.""" + program: itir.FencilDefinition = inp.program + on_gpu = True if self.device_type == core_defs.DeviceType.CUDA else False + + # ITIR parameters + column_axis: Optional[Dimension] = inp.kwargs.get("column_axis", None) + offset_provider = inp.kwargs["offset_provider"] + + sdfg = build_sdfg_from_itir( + program, + *inp.args, + offset_provider=offset_provider, + auto_optimize=self.auto_optimize, + on_gpu=on_gpu, + column_axis=column_axis, + lift_mode=self.lift_mode, + load_sdfg_from_file=False, + save_sdfg=False, + use_field_canonical_representation=self.use_field_canonical_representation, + ) + + arg_types = tuple( + interface.Parameter(param, tt.from_value(arg)) + for param, arg in zip(sdfg.arg_names, inp.args) + ) + + module: stages.ProgramSource[languages.SDFG, languages.LanguageSettings] = ( + stages.ProgramSource( + entry_point=interface.Function(program.id, arg_types), + source_code=sdfg.to_json(), + library_deps=tuple(), + language=languages.SDFG, + language_settings=self._language_settings(), + ) + ) + return module + + +class DaCeTranslationStepFactory(factory.Factory): + class Meta: + model = DaCeTranslator + + +@dataclasses.dataclass(frozen=True) +class DaCeCompiler( + workflow.ChainableWorkflowMixin[ + stages.CompilableSource[languages.SDFG, languages.LanguageSettings, languages.Python], + stages.CompiledProgram, + ], + workflow.ReplaceEnabledWorkflowMixin[ + stages.CompilableSource[languages.SDFG, languages.LanguageSettings, languages.Python], + stages.CompiledProgram, + ], + step_types.CompilationStep[languages.SDFG, languages.LanguageSettings, languages.Python], +): + """Use the dace build system to compile a GT4Py program to a ``gt4py.next.otf.stages.CompiledProgram``.""" + + cache_lifetime: config.BuildCacheLifetime + device_type: core_defs.DeviceType = core_defs.DeviceType.CPU + cmake_build_type: config.CMakeBuildType = config.CMakeBuildType.DEBUG + + def __call__( + self, + inp: stages.CompilableSource[languages.SDFG, languages.LanguageSettings, languages.Python], + ) -> stages.CompiledProgram: + sdfg = dace.SDFG.from_json(inp.program_source.source_code) + + src_dir = cache.get_cache_folder(inp, self.cache_lifetime) + sdfg.build_folder = src_dir / ".dacecache" + + with dace.config.temporary_config(): + dace.config.Config.set("compiler", "build_type", value=self.cmake_build_type.value) + if self.device_type == core_defs.DeviceType.CPU: + compiler_args = dace.config.Config.get("compiler", "cpu", "args") + # disable finite-math-only in order to support isfinite/isinf/isnan builtins + if "-ffast-math" in compiler_args: + compiler_args += " -fno-finite-math-only" + if "-ffinite-math-only" in compiler_args: + compiler_args.replace("-ffinite-math-only", "") + + dace.config.Config.set("compiler", "cpu", "args", value=compiler_args) + sdfg_program = sdfg.compile(validate=False) + + return sdfg_program + + +class DaCeCompilationStepFactory(factory.Factory): + class Meta: + model = DaCeCompiler + + +def convert_args( + inp: stages.CompiledProgram, + device: core_defs.DeviceType = core_defs.DeviceType.CPU, + use_field_canonical_representation: bool = False, +) -> stages.CompiledProgram: + sdfg_program = cast(CompiledSDFG, inp) + on_gpu = True if device == core_defs.DeviceType.CUDA else False + sdfg = sdfg_program.sdfg + + def decorated_program( + *args, offset_provider: dict[str, common.Connectivity | common.Dimension] + ): + sdfg_args = get_sdfg_args( + sdfg, + *args, + check_args=False, + offset_provider=offset_provider, + on_gpu=on_gpu, + use_field_canonical_representation=use_field_canonical_representation, + ) + + with dace.config.temporary_config(): + dace.config.Config.set("compiler", "allow_view_arguments", value=True) + return inp(**sdfg_args) + + return decorated_program diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py index a8ba72f366..e1341f99ae 100644 --- a/tests/next_tests/definitions.py +++ b/tests/next_tests/definitions.py @@ -83,20 +83,8 @@ class EmbeddedIds(_PythonObjectIdMixin, str, enum.Enum): class OptionalProgramBackendId(_PythonObjectIdMixin, str, enum.Enum): - DACE_CPU = "gt4py.next.program_processors.runners.dace_iterator.run_dace_cpu" - DACE_GPU = "gt4py.next.program_processors.runners.dace_iterator.run_dace_gpu" - - -class ProgramExecutorId(_PythonObjectIdMixin, str, enum.Enum): - GTFN_CPU_EXECUTOR = f"{ProgramBackendId.GTFN_CPU}.executor" - GTFN_CPU_IMPERATIVE_EXECUTOR = f"{ProgramBackendId.GTFN_CPU_IMPERATIVE}.executor" - GTFN_CPU_WITH_TEMPORARIES = f"{ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES}.executor" - ROUNDTRIP = f"{ProgramBackendId.ROUNDTRIP}.executor" - DOUBLE_ROUNDTRIP = f"{ProgramBackendId.DOUBLE_ROUNDTRIP}.executor" - - -class OptionalProgramExecutorId(_PythonObjectIdMixin, str, enum.Enum): - DACE_CPU_EXECUTOR = f"{OptionalProgramBackendId.DACE_CPU}.executor" + DACE_CPU = "gt4py.next.program_processors.runners.dace.run_dace_cpu" + DACE_GPU = "gt4py.next.program_processors.runners.dace.run_dace_gpu" class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum):