✅ Test workflow v2 on Alzheimer dataset

- once this passes, add ald analysis to website (for a reasonable subset of models) - maybe only showcase PIMMS models with a handful of other models
RasmussenLab · May 21, 2024 · abd06fd · abd06fd
1 parent c5e8862
commit abd06fd
Show file tree

Hide file tree

Showing 3 changed files with 85 additions and 6 deletions.
diff --git a/.github/workflows/ci_workflow.yaml b/.github/workflows/ci_workflow.yaml
@@ -40,14 +40,16 @@ jobs:
         run: |
           conda info
           conda list
+      - name: Dry-run workflow
+        run: |
+          cd project
+          snakemake -p -c1 --configfile config/single_dev_dataset/example/config.yaml -n --use-conda
       - name: Run demo workflow (integration test)
         continue-on-error: true
         run: |
           cd project
-          snakemake -p -c1 --configfile config/single_dev_dataset/example/config.yaml -n --use-conda
           snakemake -p -c4 -k --configfile config/single_dev_dataset/example/config.yaml --use-conda
       - name: Run demo workflow again (in case of installation issues)
         run: |
           cd project
-          snakemake -p -c1 -n --configfile config/single_dev_dataset/example/config.yaml --use-conda
           snakemake -p -c1 -k --configfile config/single_dev_dataset/example/config.yaml --use-conda
diff --git a/.github/workflows/workflow_website.yaml b/.github/workflows/workflow_website.yaml
@@ -29,17 +29,19 @@ jobs:
         activate-environment: vaep
         auto-activate-base: true
         # auto-update-conda: true
+    - name: Dry-run workflow
+      run: | 
+        cd project
+        snakemake -s workflow/Snakefile_v2 --configfile config/alzheimer_study/config.yaml -p -c1 -n
     - name: Run demo workflow (integration test)
       continue-on-error: true
       run: | 
         cd project
-        snakemake -p -c1 -n
-        snakemake -p -c4 -k
+        snakemake -s workflow/Snakefile_v2 --configfile config/alzheimer_study/config.yaml -p -c4 -k
     - name: Run demo workflow again (in case of installation issues)
       run: | 
         cd project
-        snakemake -p -c1 -n
-        snakemake -p -c4 -k
+        snakemake -s workflow/Snakefile_v2 --configfile config/alzheimer_study/config.yaml -p -c4 -k
     - name: Install website dependencies
       run: |
         pip install .[docs]

diff --git a/project/config/alzheimer_study/config.yaml b/project/config/alzheimer_study/config.yaml
@@ -0,0 +1,75 @@
+# config for Snakefile_v2.smk
+config_split: runs/alzheimer_study_2023_11_v2/split.yaml # ! will be build
+config_train: runs/alzheimer_study_2023_11_v2/train_{model}.yaml # ! will be build
+folder_experiment: runs/alzheimer_study_2023_11_v2
+fn_rawfile_metadata: https://raw.githubusercontent.com/RasmussenLab/njab/HEAD/docs/tutorial/data/alzheimer/meta.csv
+cuda: False
+file_format: csv
+split_data:
+    FN_INTENSITIES: https://raw.githubusercontent.com/RasmussenLab/njab/HEAD/docs/tutorial/data/alzheimer/proteome.csv
+    sample_completeness: 0.5
+    feat_prevalence: 0.25
+    column_names:
+        - protein groups
+    index_col: 0
+    meta_cat_col: _collection site
+    meta_date_col: null
+    frac_mnar: 0.25
+    frac_non_train: 0.1
+models:
+    - Median:
+          model: Median
+    - CF:
+          model: CF
+          latent_dim: 50
+          batch_size: 1024
+          epochs_max: 100
+          sample_idx_position: 0
+          cuda: False
+          save_pred_real_na: True
+    - DAE:
+          model: DAE
+          latent_dim: 10
+          batch_size: 64
+          epochs_max: 300
+          hidden_layers: "64"
+          sample_idx_position: 0
+          cuda: False
+          save_pred_real_na: True
+    - VAE:
+          model: VAE
+          latent_dim: 10
+          batch_size: 64
+          epochs_max: 300
+          hidden_layers: "64"
+          sample_idx_position: 0
+          cuda: False
+          save_pred_real_na: True
+    - KNN:
+          model: KNN
+          neighbors: 3
+          file_format: csv
+NAGuideR_methods:
+    - BPCA
+    - COLMEDIAN
+    - IMPSEQ
+    - IMPSEQROB
+    - IRM
+    - KNN_IMPUTE
+    - LLS
+    - MICE-CART
+    - MICE-NORM
+    - MINDET
+    - MINIMUM
+    - MINPROB
+    - MLE
+    - MSIMPUTE
+    - MSIMPUTE_MNAR
+    - PI
+    - QRILC
+    - RF
+    - ROWMEDIAN
+    # - SEQKNN # Error in x[od, ismiss, drop = FALSE]: subscript out of bounds
+    - SVDMETHOD
+    - TRKNN
+    - ZERO