diff --git a/config-lngs.yaml b/.github/dataflow-config-ci.yaml similarity index 87% rename from config-lngs.yaml rename to .github/dataflow-config-ci.yaml index 5b04441..6a8a2fe 100644 --- a/config-lngs.yaml +++ b/.github/dataflow-config-ci.yaml @@ -1,6 +1,5 @@ paths: - sandbox_path: /data1/shared/l200-p13/sandbox - tier_daq: $_/../daq/generated/tier/daq + sandbox_path: $_/sandbox tier_raw_blind: "" workflow: $_/workflow @@ -13,6 +12,7 @@ paths: detector_db: $_/inputs/hardware/detectors tier: $_/generated/tier + tier_daq: $_/generated/tier/daq tier_raw: $_/generated/tier/raw tier_tcm: $_/generated/tier/tcm tier_dsp: $_/generated/tier/dsp @@ -58,8 +58,6 @@ table_format: tcm: hardware_tcm_1 execenv: - cmd: apptainer exec - arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20241110203225.sif env: PRODENV: $PRODENV HDF5_USE_FILE_LOCKING: "False" @@ -69,4 +67,4 @@ execenv: PYGAMA_FASTMATH: "false" DISABLE_TQDM: "True" -legend_metadata_version: v0.5.7 +legend_metadata_version: refactor diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 0000000..9d1e098 --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,5 @@ +changelog: + exclude: + authors: + - dependabot + - pre-commit-ci diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c080083..288715d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,4 +1,4 @@ -name: lgdo +name: legend-dataflow on: workflow_dispatch: @@ -16,8 +16,8 @@ env: FORCE_COLOR: 3 jobs: - build-and-test: - name: Test lgdo with Python + unit-tests: + name: Run legend-dataflow unit tests runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -39,6 +39,56 @@ jobs: run: | python -m pytest + run-dataflow: + if: github.event.pull_request.head.repo.fork == false + name: Run the Snakemake workflow + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.11", "3.12"] + os: [ubuntu-latest, macos-13] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Set LEGEND_METADATA variable + run: | + echo "LEGEND_METADATA=$GITHUB_WORKSPACE/inputs" >> $GITHUB_ENV + - name: Clone legend-metadata + uses: actions/checkout@v4 + with: + repository: legend-exp/legend-metadata + ref: refactor # REMOVE ME + fetch-depth: 0 + submodules: recursive + token: ${{ secrets.CLONE_LEGEND_METADATA }} + path: ${{ env.LEGEND_METADATA }} + + - name: Get dependencies and install legend-dataflow + run: | + python -m pip install --upgrade uv + python -m uv pip install --upgrade .[runprod] + + - name: Set the PRODENV variable + run: | + echo "PRODENV=$(realpath $GITHUB_WORKSPACE/..)" >> $GITHUB_ENV + + - name: test prodenv scripts + run: | + cp .github/dataflow-config-ci.yaml ./dataflow-config.yaml + + - name: run workflows in dry-run mode + run: | + snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-daq.gen + snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-raw.gen + snakemake --workflow-profile workflow/profiles/lngs -n all-*-evt.gen + snakemake --workflow-profile workflow/profiles/lngs -n all-*-skm.gen + test-coverage: name: Calculate and upload test coverage runs-on: ubuntu-latest @@ -48,7 +98,7 @@ jobs: fetch-depth: 2 - uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Generate Report run: | @@ -57,3 +107,5 @@ jobs: python -m pytest --cov=legenddataflow --cov-report=xml - name: Upload Coverage to codecov.io uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/config-nersc.yaml b/config-nersc.yaml deleted file mode 100644 index f94d8ff..0000000 --- a/config-nersc.yaml +++ /dev/null @@ -1,69 +0,0 @@ -setups: - l200: - paths: - sandbox_path: "" - tier_daq: $_/generated/tier/daq - tier_raw_blind: "" - workflow: $_/workflow - metadata: $_/inputs - config: $_/inputs/dataprod/config - par_overwrite: $_/inputs/dataprod/overrides - chan_map: $_/inputs/hardware/configuration - detector_db: $_/inputs/hardware/detectors - tier: $_/generated/tier - tier_raw: /dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-raw/generated/tier/raw - tier_tcm: /dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-v2.0.0/generated/tier/tcm - tier_dsp: $_/generated/tier/dsp - tier_hit: $_/generated/tier/hit - tier_evt: $_/generated/tier/evt - tier_psp: $_/generated/tier/psp - tier_pht: $_/generated/tier/pht - tier_pet: $_/generated/tier/pet - tier_skm: $_/generated/tier/skm - par: $_/generated/par - par_raw: $_/generated/par/raw - par_tcm: $_/generated/par/tcm - par_dsp: $_/generated/par/dsp - par_hit: $_/generated/par/hit - par_evt: $_/generated/par/evt - par_psp: $_/generated/par/psp - par_pht: $_/generated/par/pht - par_pet: $_/generated/par/pet - plt: $_/generated/plt - log: $_/generated/log - tmp_plt: $_/generated/tmp/plt - tmp_log: $_/generated/tmp/log - tmp_filelists: $_/generated/tmp/filelists - tmp_par: $_/generated/tmp/par - src: $_/software/python/src - install: $_/software/python/install - cache: $_/software/python/cache - table_format: - raw: ch{ch:07d}/raw - dsp: ch{ch:07d}/dsp - psp: ch{ch:07d}/dsp - hit: ch{ch:07d}/hit - pht: ch{ch:07d}/hit - evt: "{grp}/evt" - pet: "{grp}/evt" - skm: "{grp}/skm" - tcm: hardware_tcm_1 - execenv: - cmd: shifter - arg: " --image legendexp/legend-base:latest" - env: - HDF5_USE_FILE_LOCKING: "FALSE" - LGDO_BOUNDSCHECK: "false" - DSPEED_BOUNDSCHECK: "false" - PYGAMA_PARALLEL: "false" - PYGAMA_FASTMATH: "false" - pkg_versions: - pygama: pygama==2.0.3 - pylegendmeta: pylegendmeta==0.10.2 - dspeed: dspeed==1.6.1 - legend-pydataobj: legend-pydataobj==1.10.0 - legend-daq2lh5: legend-daq2lh5==1.2.1 - tensorflow: tensorflow==2.17 - keras: keras==3.6.0 - jax: jax==0.4.30 - meta_version: v0.5.7 diff --git a/dataflow-config.yaml b/dataflow-config.yaml new file mode 100644 index 0000000..6a8a2fe --- /dev/null +++ b/dataflow-config.yaml @@ -0,0 +1,70 @@ +paths: + sandbox_path: $_/sandbox + tier_raw_blind: "" + + workflow: $_/workflow + + metadata: $_/inputs + config: $_/inputs/dataprod/config + par_overwrite: $_/inputs/dataprod/overrides + chan_map: $_/inputs/hardware/configuration + detector_status: $_/inputs/datasets + detector_db: $_/inputs/hardware/detectors + + tier: $_/generated/tier + tier_daq: $_/generated/tier/daq + tier_raw: $_/generated/tier/raw + tier_tcm: $_/generated/tier/tcm + tier_dsp: $_/generated/tier/dsp + tier_hit: $_/generated/tier/hit + tier_ann: $_/generated/tier/ann + tier_evt: $_/generated/tier/evt + tier_psp: $_/generated/tier/psp + tier_pht: $_/generated/tier/pht + tier_pan: $_/generated/tier/pan + tier_pet: $_/generated/tier/pet + tier_skm: $_/generated/tier/skm + + par: $_/generated/par + par_raw: $_/generated/par/raw + par_tcm: $_/generated/par/tcm + par_dsp: $_/generated/par/dsp + par_hit: $_/generated/par/hit + par_evt: $_/generated/par/evt + par_psp: $_/generated/par/psp + par_pht: $_/generated/par/pht + par_pet: $_/generated/par/pet + + plt: $_/generated/plt + log: $_/generated/log + + tmp_plt: $_/generated/tmp/plt + tmp_log: $_/generated/tmp/log + tmp_filelists: $_/generated/tmp/filelists + tmp_par: $_/generated/tmp/par + + src: $_/software/python/src + install: $_/.snakemake/legend-dataflow/venv + +table_format: + raw: ch{ch:07d}/raw + dsp: ch{ch:07d}/dsp + psp: ch{ch:07d}/dsp + hit: ch{ch:07d}/hit + pht: ch{ch:07d}/hit + evt: "{grp}/evt" + pet: "{grp}/evt" + skm: "{grp}/skm" + tcm: hardware_tcm_1 + +execenv: + env: + PRODENV: $PRODENV + HDF5_USE_FILE_LOCKING: "False" + LGDO_BOUNDSCHECK: "false" + DSPEED_BOUNDSCHECK: "false" + PYGAMA_PARALLEL: "false" + PYGAMA_FASTMATH: "false" + DISABLE_TQDM: "True" + +legend_metadata_version: refactor diff --git a/pyproject.toml b/pyproject.toml index 1c538d6..1924126 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ dependencies = [ "pylegendmeta==1.2.0a2", "legend-pydataobj>=1.11.6", "legend-daq2lh5>=1.4", + "pip", ] [project.optional-dependencies] diff --git a/workflow/profiles/default/config.yaml b/workflow/profiles/default/config.yaml index ba92572..b7c1508 100644 --- a/workflow/profiles/default/config.yaml +++ b/workflow/profiles/default/config.yaml @@ -1,4 +1,5 @@ cores: all +configfile: dataflow-config.yaml snakefile: ./workflow/Snakefile keep-going: true rerun-incomplete: true diff --git a/workflow/profiles/lngs-build-raw/config.yaml b/workflow/profiles/lngs-build-raw/config.yaml index 14be322..fd920aa 100644 --- a/workflow/profiles/lngs-build-raw/config.yaml +++ b/workflow/profiles/lngs-build-raw/config.yaml @@ -1,7 +1,7 @@ cores: 30 resources: - mem_swap=3500 -configfile: config-lngs.yaml +configfile: dataflow-config.yaml snakefile: ./workflow/Snakefile-build-raw keep-going: true rerun-incomplete: true diff --git a/workflow/profiles/lngs/config.yaml b/workflow/profiles/lngs/config.yaml index c72c8fc..142501e 100644 --- a/workflow/profiles/lngs/config.yaml +++ b/workflow/profiles/lngs/config.yaml @@ -2,7 +2,7 @@ cores: 125 restart-times: 2 resources: - mem_swap=3500 -configfile: config-lngs.yaml +configfile: dataflow-config.yaml snakefile: ./workflow/Snakefile keep-going: true rerun-incomplete: true diff --git a/workflow/rules/filelist_gen.smk b/workflow/rules/filelist_gen.smk index d92a5aa..3e1a5c4 100644 --- a/workflow/rules/filelist_gen.smk +++ b/workflow/rules/filelist_gen.smk @@ -128,9 +128,9 @@ def concat_phy_filenames(config, phy_filenames, tier): """ This function concatenates the files from the same run together """ - fn_pattern = patt.get_pattern(config, tier) + fn_pattern = get_pattern(config, tier) # group files by run - sorted_phy_filenames = patt.run_grouper(phy_filenames) + sorted_phy_filenames = run_grouper(phy_filenames) phy_filenames = [] for run in sorted_phy_filenames: diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index c11b372..fc3bc47 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -6,6 +6,7 @@ import shlex import shutil import subprocess +import sys from pathlib import Path from typing import Iterable, Mapping @@ -50,7 +51,8 @@ def execenv_prefix( config = AttrsDict(config) cmdline = [] - if "env" in config.execenv: + cmdenv = {} + if "execenv" in config and "env" in config.execenv: cmdenv = config.execenv.env if "execenv" in config and "cmd" in config.execenv and "arg" in config.execenv: @@ -91,9 +93,9 @@ def execenv_prefix( def execenv_pyexe( config: AttrsDict, exename: str, as_string: bool = True ) -> str | tuple[list, dict]: - """Returns the Python interpreter command. + """Returns the path to an executable installed in the virtualenv. - For example: `apptainer run image.sif python` + For example: `apptainer run image.sif path/to/venv/bin/{exename}` Note ---- @@ -204,9 +206,12 @@ def install(args) -> None: ignore_missing=False, ) + # path to virtualenv location path_install = config_dict.paths.install if args.remove and Path(path_install).exists(): + msg = f"removing: {path_install}" + log.info(msg) shutil.rmtree(path_install) def _runcmd(cmd_expr, cmd_env, **kwargs): @@ -216,32 +221,39 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): subprocess.run(cmd_expr, env=cmd_env, check=True, **kwargs) cmd_prefix, cmd_env = execenv_prefix(config_dict, as_string=False) + # HACK: get the full path to this python interpreter in case there is no execenv prefix + python = sys.executable if cmd_prefix == [] else "python" + python_venv, _ = execenv_pyexe(config_dict, "python", as_string=False) has_uv = False try: + # is uv already available? _runcmd( [*cmd_prefix, "uv", "--version"], cmd_env, capture_output=True, ) has_uv = True + # we'll use the existing uv + uv_expr = [*cmd_prefix, "uv", "--version"] except (subprocess.CalledProcessError, FileNotFoundError): - pass + # we'll use uv from the virtualenv (installed below) + uv_expr = [*python_venv, "-m", "uv"] # configure venv if has_uv: + # if uv is available, just use it to create the venv cmd_expr = [*cmd_prefix, "uv", "venv", path_install] else: - cmd_expr = [*cmd_prefix, "python3", "-m", "venv", path_install] + # otherwise use python-venv + cmd_expr = [*cmd_prefix, python, "-m", "venv", path_install] log.info(f"configuring virtual environment in {path_install}") _runcmd(cmd_expr, cmd_env) - python, cmd_env = execenv_pyexe(config_dict, "python", as_string=False) - if not has_uv: cmd_expr = [ - *python, + *python_venv, "-m", "pip", "--no-cache-dir", @@ -255,7 +267,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): # install uv cmd_expr = [ - *python, + *python_venv, "-m", "pip", "--no-cache-dir", @@ -268,15 +280,14 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): _runcmd(cmd_expr, cmd_env) # and finally install legenddataflow with all dependencies + # this must be done within the execenv, since jobs will be run within it cmd_expr = [ - *python, - "-m", - "uv", + *uv_expr, "pip", "--no-cache", "install", - str(config_loc), # +"[dataprod]" + str(config_loc), ] if args.editable: cmd_expr.insert(-1, "--editable")