Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat[next][dace]: Add gt4py workflow for the DaCe backend #1477

Merged
merged 3 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/gt4py/next/otf/languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ class Python(LanguageTag):
...


class SDFG(LanguageTag):
settings_class = LanguageSettings
...


class NanobindSrcL(LanguageTag): ...


Expand Down
88 changes: 88 additions & 0 deletions src/gt4py/next/program_processors/runners/dace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# GT4Py - GridTools Framework
#
# Copyright (c) 2014-2023, ETH Zurich
# All rights reserved.
#
# This file is part of the GT4Py project and the GridTools framework.
# GT4Py is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or any later
# version. See the LICENSE.txt file at the top-level directory of this
# distribution for a copy of the license or check <https://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: GPL-3.0-or-later

import functools

import factory

import gt4py._core.definitions as core_defs
from gt4py.next import config
from gt4py.next.otf import recipes, stages
from gt4py.next.program_processors.runners.dace_iterator.workflow import (
DaCeCompilationStepFactory,
DaCeTranslationStepFactory,
convert_args,
)
from gt4py.next.program_processors.runners.gtfn import GTFNBackendFactory


def _no_bindings(inp: stages.ProgramSource) -> stages.CompilableSource:
return stages.CompilableSource(program_source=inp, binding_source=None)


class DaCeWorkflowFactory(factory.Factory):
class Meta:
model = recipes.OTFCompileWorkflow

class Params:
device_type: core_defs.DeviceType = core_defs.DeviceType.CPU
cmake_build_type: config.CMakeBuildType = factory.LazyFunction(
lambda: config.CMAKE_BUILD_TYPE
)
use_field_canonical_representation: bool = False

translation = factory.SubFactory(
DaCeTranslationStepFactory,
device_type=factory.SelfAttribute("..device_type"),
use_field_canonical_representation=factory.SelfAttribute(
"..use_field_canonical_representation"
),
)
bindings = _no_bindings
compilation = factory.SubFactory(
DaCeCompilationStepFactory,
cache_lifetime=factory.LazyFunction(lambda: config.BUILD_CACHE_LIFETIME),
cmake_build_type=factory.SelfAttribute("..cmake_build_type"),
)
decoration = factory.LazyAttribute(
lambda o: functools.partial(
convert_args,
device=o.device_type,
use_field_canonical_representation=o.use_field_canonical_representation,
)
)


class DaCeBackendFactory(GTFNBackendFactory):
class Params:
otf_workflow = factory.SubFactory(
DaCeWorkflowFactory,
device_type=factory.SelfAttribute("..device_type"),
use_field_canonical_representation=factory.SelfAttribute(
"..use_field_canonical_representation"
),
)
name = factory.LazyAttribute(
lambda o: f"run_dace_{o.name_device}{o.name_temps}{o.name_cached}{o.name_postfix}"
)
auto_optimize = factory.Trait(
otf_workflow__translation__auto_optimize=True,
name_temps="_opt",
)
use_field_canonical_representation: bool = False


run_dace_cpu = DaCeBackendFactory(cached=True, auto_optimize=True)

run_dace_gpu = DaCeBackendFactory(gpu=True, cached=True, auto_optimize=True)
181 changes: 181 additions & 0 deletions src/gt4py/next/program_processors/runners/dace_iterator/workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# GT4Py - GridTools Framework
#
# Copyright (c) 2014-2023, ETH Zurich
# All rights reserved.
#
# This file is part of the GT4Py project and the GridTools framework.
# GT4Py is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or any later
# version. See the LICENSE.txt file at the top-level directory of this
# distribution for a copy of the license or check <https://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: GPL-3.0-or-later

from __future__ import annotations

import dataclasses
from typing import Callable, Optional, cast

import dace
import factory
from dace.codegen.compiled_sdfg import CompiledSDFG

from gt4py._core import definitions as core_defs
from gt4py.next import common, config
from gt4py.next.common import Dimension
from gt4py.next.iterator import ir as itir
from gt4py.next.iterator.transforms import LiftMode
from gt4py.next.otf import languages, stages, step_types, workflow
from gt4py.next.otf.binding import interface
from gt4py.next.otf.compilation import cache
from gt4py.next.otf.languages import LanguageSettings
from gt4py.next.type_system import type_translation as tt

from . import build_sdfg_from_itir, get_sdfg_args


@dataclasses.dataclass(frozen=True)
class DaCeTranslator(
workflow.ChainableWorkflowMixin[
stages.ProgramCall,
stages.ProgramSource[languages.SDFG, languages.LanguageSettings],
],
step_types.TranslationStep[languages.SDFG, languages.LanguageSettings],
):
auto_optimize: bool = False
lift_mode: LiftMode = LiftMode.FORCE_INLINE
device_type: core_defs.DeviceType = core_defs.DeviceType.CPU
temporary_extraction_heuristics: Optional[
Callable[[itir.StencilClosure], Callable[[itir.Expr], bool]]
] = None
use_field_canonical_representation: bool = False

def _language_settings(self) -> languages.LanguageSettings:
return languages.LanguageSettings(
formatter_key="",
formatter_style="",
file_extension="sdfg",
)

def __call__(
self,
inp: stages.ProgramCall,
) -> stages.ProgramSource[languages.SDFG, LanguageSettings]:
"""Generate DaCe SDFG file from the ITIR definition."""
program: itir.FencilDefinition = inp.program
on_gpu = True if self.device_type == core_defs.DeviceType.CUDA else False

# ITIR parameters
column_axis: Optional[Dimension] = inp.kwargs.get("column_axis", None)
offset_provider = inp.kwargs["offset_provider"]

sdfg = build_sdfg_from_itir(
program,
*inp.args,
offset_provider=offset_provider,
auto_optimize=self.auto_optimize,
on_gpu=on_gpu,
column_axis=column_axis,
lift_mode=self.lift_mode,
load_sdfg_from_file=False,
save_sdfg=False,
use_field_canonical_representation=self.use_field_canonical_representation,
)

arg_types = tuple(
interface.Parameter(param, tt.from_value(arg))
for param, arg in zip(sdfg.arg_names, inp.args)
)

module: stages.ProgramSource[languages.SDFG, languages.LanguageSettings] = (
stages.ProgramSource(
entry_point=interface.Function(program.id, arg_types),
source_code=sdfg.to_json(),
library_deps=tuple(),
language=languages.SDFG,
language_settings=self._language_settings(),
)
)
return module


class DaCeTranslationStepFactory(factory.Factory):
class Meta:
model = DaCeTranslator


@dataclasses.dataclass(frozen=True)
class DaCeCompiler(
workflow.ChainableWorkflowMixin[
stages.CompilableSource[languages.SDFG, languages.LanguageSettings, languages.Python],
stages.CompiledProgram,
],
workflow.ReplaceEnabledWorkflowMixin[
stages.CompilableSource[languages.SDFG, languages.LanguageSettings, languages.Python],
stages.CompiledProgram,
],
step_types.CompilationStep[languages.SDFG, languages.LanguageSettings, languages.Python],
):
"""Use the dace build system to compile a GT4Py program to a ``gt4py.next.otf.stages.CompiledProgram``."""

cache_lifetime: config.BuildCacheLifetime
device_type: core_defs.DeviceType = core_defs.DeviceType.CPU
cmake_build_type: config.CMakeBuildType = config.CMakeBuildType.DEBUG

def __call__(
self,
inp: stages.CompilableSource[languages.SDFG, languages.LanguageSettings, languages.Python],
) -> stages.CompiledProgram:
sdfg = dace.SDFG.from_json(inp.program_source.source_code)

src_dir = cache.get_cache_folder(inp, self.cache_lifetime)
sdfg.build_folder = src_dir / ".dacecache"

with dace.config.temporary_config():
dace.config.Config.set("compiler", "build_type", value=self.cmake_build_type.value)
if self.device_type == core_defs.DeviceType.CPU:
compiler_args = dace.config.Config.get("compiler", "cpu", "args")
# disable finite-math-only in order to support isfinite/isinf/isnan builtins
if "-ffast-math" in compiler_args:
compiler_args += " -fno-finite-math-only"
if "-ffinite-math-only" in compiler_args:
compiler_args.replace("-ffinite-math-only", "")

dace.config.Config.set("compiler", "cpu", "args", value=compiler_args)
sdfg_program = sdfg.compile(validate=False)

return sdfg_program


class DaCeCompilationStepFactory(factory.Factory):
class Meta:
model = DaCeCompiler


def convert_args(
inp: stages.CompiledProgram,
device: core_defs.DeviceType = core_defs.DeviceType.CPU,
use_field_canonical_representation: bool = False,
) -> stages.CompiledProgram:
sdfg_program = cast(CompiledSDFG, inp)
on_gpu = True if device == core_defs.DeviceType.CUDA else False
sdfg = sdfg_program.sdfg

def decorated_program(
*args, offset_provider: dict[str, common.Connectivity | common.Dimension]
):
sdfg_args = get_sdfg_args(
sdfg,
*args,
check_args=False,
offset_provider=offset_provider,
on_gpu=on_gpu,
use_field_canonical_representation=use_field_canonical_representation,
)

with dace.config.temporary_config():
dace.config.Config.set("compiler", "allow_view_arguments", value=True)
return inp(**sdfg_args)

return decorated_program
16 changes: 2 additions & 14 deletions tests/next_tests/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,20 +83,8 @@ class EmbeddedIds(_PythonObjectIdMixin, str, enum.Enum):


class OptionalProgramBackendId(_PythonObjectIdMixin, str, enum.Enum):
DACE_CPU = "gt4py.next.program_processors.runners.dace_iterator.run_dace_cpu"
DACE_GPU = "gt4py.next.program_processors.runners.dace_iterator.run_dace_gpu"


class ProgramExecutorId(_PythonObjectIdMixin, str, enum.Enum):
GTFN_CPU_EXECUTOR = f"{ProgramBackendId.GTFN_CPU}.executor"
GTFN_CPU_IMPERATIVE_EXECUTOR = f"{ProgramBackendId.GTFN_CPU_IMPERATIVE}.executor"
GTFN_CPU_WITH_TEMPORARIES = f"{ProgramBackendId.GTFN_CPU_WITH_TEMPORARIES}.executor"
ROUNDTRIP = f"{ProgramBackendId.ROUNDTRIP}.executor"
DOUBLE_ROUNDTRIP = f"{ProgramBackendId.DOUBLE_ROUNDTRIP}.executor"


class OptionalProgramExecutorId(_PythonObjectIdMixin, str, enum.Enum):
DACE_CPU_EXECUTOR = f"{OptionalProgramBackendId.DACE_CPU}.executor"
DACE_CPU = "gt4py.next.program_processors.runners.dace.run_dace_cpu"
DACE_GPU = "gt4py.next.program_processors.runners.dace.run_dace_gpu"


class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum):
Expand Down
Loading