Skip to content

Commit

Permalink
feat: support merging output from multiple file keys when writing to …
Browse files Browse the repository at this point in the history
…stdout (#115)

Co-authored-by: Bradley Dice <[email protected]>
  • Loading branch information
jameslamb and bdice authored Oct 22, 2024
1 parent 7d641b7 commit 6c2ae6d
Show file tree
Hide file tree
Showing 7 changed files with 329 additions and 21 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ ENV_NAME="cudf_test"
rapids-dependency-file-generator \
--file-key "test" \
--output "conda" \
--matrix "cuda=11.5;arch=$(arch)" > env.yaml
--matrix "cuda=12.5;arch=$(arch)" > env.yaml
mamba env create --file env.yaml
mamba activate "$ENV_NAME"
Expand All @@ -335,6 +335,16 @@ The `--file-key`, `--output`, and `--matrix` flags must be used together. `--mat

Where multiple values for the same key are passed to `--matrix`, e.g. `cuda_suffixed=true;cuda_suffixed=false`, only the last value will be used.

Where `--file-key` is supplied multiple times in the same invocation, the output printed to `stdout` will contain a union (without duplicates) of all of the corresponding dependencies. For example:

```shell
rapids-dependency-file-generator \
--file-key "test" \
--file-key "test_notebooks" \
--output "conda" \
--matrix "cuda=12.5;arch=$(arch)" > env.yaml
```

The `--prepend-channel` argument accepts additional channels to use, like `rapids-dependency-file-generator --prepend-channel my_channel --prepend-channel my_other_channel`.
If both `--output` and `--prepend-channel` are provided, the output format must be conda.
Prepending channels can be useful for adding local channels with packages to be tested in CI workflows.
Expand Down
8 changes: 6 additions & 2 deletions src/rapids_dependency_file_generator/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ def validate_args(argv):
codependent_args = parser.add_argument_group("optional, but codependent")
codependent_args.add_argument(
"--file-key",
help="The file key from `dependencies.yaml` to generate.",
action="append",
help=(
"The file key from `dependencies.yaml` to generate. "
"If supplied multiple times, dependency lists from all requested file keys will be merged."
),
)
codependent_args.add_argument(
"--output",
Expand Down Expand Up @@ -109,7 +113,7 @@ def main(argv=None) -> None:
to_stdout = all([args.file_key, args.output, args.matrix is not None])

if to_stdout:
file_keys = [args.file_key]
file_keys = args.file_key
output = {Output(args.output)}
else:
file_keys = list(parsed_config.files.keys())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import textwrap
import typing
from collections.abc import Generator
from dataclasses import dataclass

import tomlkit
import yaml
Expand Down Expand Up @@ -95,27 +96,33 @@ def grid(gridspec: dict[str, list[str]]) -> Generator[dict[str, str], None, None
def make_dependency_file(
*,
file_type: _config.Output,
name: os.PathLike,
conda_env_name: typing.Union[str, None],
file_name: str,
config_file: os.PathLike,
output_dir: os.PathLike,
conda_channels: list[str],
dependencies: typing.Sequence[typing.Union[str, dict[str, list[str]]]],
extras: typing.Union[_config.FileExtras, None],
):
) -> str:
"""Generate the contents of the dependency file.
Parameters
----------
file_type : Output
An Output value used to determine the file type.
name : PathLike
The name of the file to write.
conda_env_name : str | None
Name to put in the 'name: ' field when generating conda environment YAML files.
If ``None``, the generated conda environment file will not have a 'name:' entry.
Only used when ``file_type`` is CONDA.
file_name : str
Name of a file in ``output_dir`` to read in.
Only used when ``file_type`` is PYPROJECT.
config_file : PathLike
The full path to the dependencies.yaml file.
output_dir : PathLike
The path to the directory where the dependency files will be written.
conda_channels : list[str]
The channels to include in the file. Only used when `file_type` is
The channels to include in the file. Only used when ``file_type`` is
CONDA.
dependencies : Sequence[str | dict[str, list[str]]]
The dependencies to include in the file.
Expand All @@ -135,13 +142,13 @@ def make_dependency_file(
"""
)
if file_type == _config.Output.CONDA:
file_contents += yaml.dump(
{
"name": os.path.splitext(name)[0],
"channels": conda_channels,
"dependencies": dependencies,
}
)
env_dict = {
"channels": conda_channels,
"dependencies": dependencies,
}
if conda_env_name is not None:
env_dict["name"] = conda_env_name
file_contents += yaml.dump(env_dict)
elif file_type == _config.Output.REQUIREMENTS:
for dep in dependencies:
if isinstance(dep, dict):
Expand Down Expand Up @@ -173,7 +180,7 @@ def make_dependency_file(
key = extras.key

# This file type needs to be modified in place instead of built from scratch.
with open(os.path.join(output_dir, name)) as f:
with open(os.path.join(output_dir, file_name)) as f:
file_contents_toml = tomlkit.load(f)

toml_deps = tomlkit.array()
Expand Down Expand Up @@ -320,6 +327,32 @@ def should_use_specific_entry(matrix_combo: dict[str, str], specific_entry_matri
)


@dataclass
class _DependencyCollection:
str_deps: set[str]
# e.g. {"pip": ["dgl", "pyg"]}, used in conda envs
dict_deps: dict[str, list[str]]

def update(self, deps: typing.Sequence[typing.Union[str, dict[str, list[str]]]]) -> None:
for dep in deps:
if isinstance(dep, dict):
for k, v in dep.items():
if k in self.dict_deps:
self.dict_deps[k].extend(v)
self.dict_deps[k] = sorted(set(self.dict_deps[k]))
else:
self.dict_deps[k] = v
else:
self.str_deps.add(dep)

@property
def deps_list(self) -> typing.Sequence[typing.Union[str, dict[str, list[str]]]]:
if self.dict_deps:
return [*sorted(self.str_deps), self.dict_deps]

return [*sorted(self.str_deps)]


def make_dependency_files(
*,
parsed_config: _config.Config,
Expand Down Expand Up @@ -360,6 +393,19 @@ def make_dependency_files(
If the file is malformed. There are numerous different error cases
which are described by the error messages.
"""
if to_stdout and len(file_keys) > 1 and output is not None and _config.Output.PYPROJECT in output:
raise ValueError(
f"Using --file-key multiple times together with '--output {_config.Output.PYPROJECT.value}' "
"when writing to stdout is not supported."
)

# the list of conda channels does not depend on individual file keys
conda_channels = prepend_channels + parsed_config.channels

# initialize a container for "all dependencies found across all files", to support
# passing multiple files keys and writing a merged result to stdout
all_dependencies = _DependencyCollection(str_deps=set(), dict_deps={})

for file_key in file_keys:
file_config = parsed_config.files[file_key]
file_types_to_generate = file_config.output if output is None else output
Expand Down Expand Up @@ -438,18 +484,50 @@ def make_dependency_files(
)
contents = make_dependency_file(
file_type=file_type,
name=full_file_name,
conda_env_name=os.path.splitext(full_file_name)[0],
file_name=full_file_name,
config_file=parsed_config.path,
output_dir=output_dir,
conda_channels=prepend_channels + parsed_config.channels,
conda_channels=conda_channels,
dependencies=deduped_deps,
extras=file_config.extras,
)

if to_stdout:
print(contents)
if len(file_keys) == 1:
print(contents)
else:
all_dependencies.update(deduped_deps)
else:
os.makedirs(output_dir, exist_ok=True)
file_path = os.path.join(output_dir, full_file_name)
with open(file_path, "w") as f:
f.write(contents)

# create one unified output from all the file_keys, and print it to stdout
if to_stdout and len(file_keys) > 1:
# convince mypy that 'output' is not None here
#
# 'output' is technically a set because of https://github.com/rapidsai/dependency-file-generator/pull/74,
# but since https://github.com/rapidsai/dependency-file-generator/pull/79 it's only ever one of the following:
#
# - an exactly-1-item set (stdout=True, or when used by rapids-build-backend)
# - 'None' (stdout=False)
#
err_msg = (
"Exactly 1 output type should be provided when asking rapids-dependency-file-generator to write to stdout. "
"If you see this, you've found a bug. Please report it."
)
assert output is not None, err_msg

contents = make_dependency_file(
file_type=output.pop(),
conda_env_name=None,
file_name="ignored-because-multiple-pyproject-files-are-not-supported",
config_file=parsed_config.path,
output_dir=parsed_config.path,
conda_channels=conda_channels,
dependencies=all_dependencies.deps_list,
extras=None,
)
print(contents)
82 changes: 82 additions & 0 deletions tests/examples/overlapping-deps/dependencies.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
files:
build_deps:
output: [pyproject]
pyproject_dir: output/actual
extras:
table: build-system
includes:
- rapids_build_skbuild
- depends_on_numpy
even_more_build_deps:
output: [pyproject]
pyproject_dir: output/actual
extras:
table: tool.rapids-build-backend
key: requires
includes:
- depends_on_numpy
- depends_on_pandas
test_deps:
output: none
includes:
- depends_on_numpy
- depends_on_pandas
even_more_test_deps:
output: none
includes:
- depends_on_numpy
- test_python
test_with_sklearn:
output: none
includes:
- depends_on_scikit_learn
channels:
- rapidsai
- conda-forge
dependencies:
depends_on_numpy:
common:
- output_types: [requirements, pyproject]
packages:
- numpy>=2.0
# using 'pip' intentionally to test handling of that nested list
- output_types: [conda]
packages:
- pip
- pip:
- numpy>=2.0
depends_on_pandas:
common:
- output_types: [conda, requirements, pyproject]
packages:
- pandas<3.0
depends_on_scikit_learn:
common:
- output_types: [conda, requirements, pyproject]
packages:
- scikit-learn>=1.5
test_python:
common:
- output_types: [conda, requirements, pyproject]
packages:
- matplotlib
- output_types: [conda]
packages:
- pip
# intentional overlap (numpy) with depends_on_numpy's pip list, to
# test that pip dependencies don't have duplicates
- pip:
# intentionally not in alphabetical order
- numpy>=2.0
- folium
rapids_build_skbuild:
common:
- output_types: [conda, requirements, pyproject]
packages:
- rapids-build-backend>=0.3.1
- output_types: [requirements, pyproject]
packages:
- scikit-build-core[pyproject]>=0.9.0
- output_types: [conda]
packages:
- scikit-build-core>=0.9.0
20 changes: 20 additions & 0 deletions tests/examples/overlapping-deps/output/expected/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[build-system]
build-backend = "rapids_build_backend.build_meta"
requires = [
"numpy>=2.0",
"rapids-build-backend>=0.3.1",
"scikit-build-core[pyproject]>=0.9.0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

[project]
name = "libbeepboop"
version = "0.1.2"
dependencies = [
"scipy",
]

[tool.rapids-build-backend]
requires = [
"numpy>=2.0",
"pandas<3.0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
44 changes: 44 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,47 @@ def test_validate_args():
"all",
]
)

# Valid, with 2 files for --output requirements
validate_args(
[
"--output",
"requirements",
"--matrix",
"cuda=12.5",
"--file-key",
"all",
"--file-key",
"test_python",
]
)

# Valid, with 2 files for --output conda
validate_args(
[
"--output",
"conda",
"--matrix",
"cuda=12.5",
"--file-key",
"all",
"--file-key",
"test_python",
]
)

# Valid, with 3 files
validate_args(
[
"--output",
"requirements",
"--matrix",
"cuda=12.5",
"--file-key",
"all",
"--file-key",
"test_python",
"--file-key",
"build_python",
]
)
Loading

0 comments on commit 6c2ae6d

Please sign in to comment.