diff --git a/kidney/config.py b/kidney/config.py deleted file mode 100644 index 5ae109f..0000000 --- a/kidney/config.py +++ /dev/null @@ -1,35 +0,0 @@ -from pydantic import BaseModel -from typing import List - - -class DataConfig(BaseModel): - data_path: str - batch_size: int = 32 - augment: bool = True - balance: bool = True - - -class ModelConfig(BaseModel): - type: str = "vgg" - image_size: int = 128 - num_classes: int = 2 - input_fmaps: int = 1 - fmaps: int = 32 - - -class TrainingConfig(BaseModel): - epochs: int = 10 - learning_rate: float = 1e-4 - checkpoint_dir: str = "checkpoints" - device: str = "cuda" - - -class ExperimentConfig(BaseModel): - project: str - notes: str - tags: List[str] = ["quac"] - log_images: int = 8 # Number of images to log to wandb - data: DataConfig - model: ModelConfig - training: TrainingConfig - val_data: DataConfig = None \ No newline at end of file diff --git a/kidney/configs/resnet18.yml b/kidney/configs/resnet18.yml deleted file mode 100644 index a55f9b5..0000000 --- a/kidney/configs/resnet18.yml +++ /dev/null @@ -1,20 +0,0 @@ -project: "kidney-classification" -notes: "ResNet18 model for kidney cortex cells classification" -tags: ["kidney", "quac", "resnet18", "classification"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33/Mask_2D_Maximum" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44/Mask_2D_Maximum" - batch_size: 64 - augment: False - balance: False -model: - type: "resnet18" - image_size: 32 - num_classes: 12 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/resnet18" - device: "cuda" diff --git a/kidney/configs/resnet18_v1.yml b/kidney/configs/resnet18_v1.yml deleted file mode 100644 index e225b3a..0000000 --- a/kidney/configs/resnet18_v1.yml +++ /dev/null @@ -1,20 +0,0 @@ -project: "kidney-classification" -notes: "Resnet18 model for kidney cortex cells classification. This time with an 8-class problem, added metrics, and added augmentations." -tags: ["kidney", "quac", "resnet18", "classification", "noise_augment", "translations", "8 class"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33_8class/Mask_2D_Maximum" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44_8class/Mask_2D_Maximum" - batch_size: 64 - augment: False - balance: False -model: - type: "resnet18" - image_size: 32 - num_classes: 8 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/resnet18_v1" - device: "cuda" diff --git a/kidney/configs/resnet18_v2.yml b/kidney/configs/resnet18_v2.yml deleted file mode 100644 index ab68945..0000000 --- a/kidney/configs/resnet18_v2.yml +++ /dev/null @@ -1,21 +0,0 @@ -project: "kidney-classification" -notes: "Resnet18 8 class, with the All 3D data, where we treat z as a channel dimension." -tags: ["kidney", "quac", "resnet18", "classification", "noise_augment", "translations", "8 class", "All 3D"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33_8class/All_3D" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44_8class/All_3D" - batch_size: 64 - augment: False - balance: False -model: - type: "resnet18" - image_size: 32 - num_classes: 8 - input_fmaps: 7 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/resnet18_v2" - device: "cuda" diff --git a/kidney/configs/resnet18_v3.yml b/kidney/configs/resnet18_v3.yml deleted file mode 100644 index c136c65..0000000 --- a/kidney/configs/resnet18_v3.yml +++ /dev/null @@ -1,20 +0,0 @@ -project: "kidney-classification" -notes: "Resnet18 model for kidney cortex cells classification. This time with an 8-class problem, added metrics, and added augmentations." -tags: ["kidney", "quac", "resnet18", "classification", "translations", "8 class", "model_ema"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33_8class/Mask_2D_Maximum" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44_8class/Mask_2D_Maximum" - batch_size: 64 - augment: False - balance: False -model: - type: "resnet18" - image_size: 32 - num_classes: 8 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/resnet18_v3" - device: "cuda" diff --git a/kidney/configs/resnet18_v4.yml b/kidney/configs/resnet18_v4.yml deleted file mode 100644 index 486eaf5..0000000 --- a/kidney/configs/resnet18_v4.yml +++ /dev/null @@ -1,21 +0,0 @@ -project: "kidney-classification" -notes: "Resnet18 8 class, with the All 3D data, where we treat z as a channel dimension." -tags: ["kidney", "quac", "resnet18", "classification", "noise_augment", "translations", "8 class", "All 3D", "model_ema"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33_8class/All_3D" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44_8class/All_3D" - batch_size: 64 - augment: False - balance: False -model: - type: "resnet18" - image_size: 32 - num_classes: 8 - input_fmaps: 7 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/resnet18_v4" - device: "cuda" diff --git a/kidney/configs/resnet18_v4_all_samples.yml b/kidney/configs/resnet18_v4_all_samples.yml deleted file mode 100644 index d7b5f8e..0000000 --- a/kidney/configs/resnet18_v4_all_samples.yml +++ /dev/null @@ -1,16 +0,0 @@ -project: "kidney-classification" -notes: "Resnet18 8 class, with the All 3D data, where we treat z as a channel dimension." -tags: ["kidney", "quac", "resnet18", "classification", "noise_augment_small", "translations", "8 class", "All 3D", "model_ema", "all_samples"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/All_8class/All_3D" - batch_size: 64 -model: - type: "resnet18" - image_size: 32 - num_classes: 8 - input_fmaps: 7 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/resnet18_all_samples" - device: "cuda" diff --git a/kidney/configs/resnet34.yml b/kidney/configs/resnet34.yml deleted file mode 100644 index 430f628..0000000 --- a/kidney/configs/resnet34.yml +++ /dev/null @@ -1,20 +0,0 @@ -project: "kidney-classification" -notes: "ResNet34 model for kidney cortex cells classification" -tags: ["kidney", "quac", "resnet34", "classification"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33/Mask_2D_Maximum" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44/Mask_2D_Maximum" - batch_size: 64 - augment: False - balance: False -model: - type: "resnet34" - image_size: 32 - num_classes: 12 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/resnet34" - device: "cuda" diff --git a/kidney/configs/resnet34_v1.yml b/kidney/configs/resnet34_v1.yml deleted file mode 100644 index fbe2035..0000000 --- a/kidney/configs/resnet34_v1.yml +++ /dev/null @@ -1,20 +0,0 @@ -project: "kidney-classification" -notes: "Resnet34 model for kidney cortex cells classification. This time with an 8-class problem, added metrics, and added augmentations." -tags: ["kidney", "quac", "resnet34", "classification", "noise_augment", "translations", "8 class"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33_8class/Mask_2D_Maximum" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44_8class/Mask_2D_Maximum" - batch_size: 64 - augment: False - balance: False -model: - type: "resnet34" - image_size: 32 - num_classes: 8 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/resnet34_v1" - device: "cuda" diff --git a/kidney/configs/resnet34_v2.yml b/kidney/configs/resnet34_v2.yml deleted file mode 100644 index 7ea0770..0000000 --- a/kidney/configs/resnet34_v2.yml +++ /dev/null @@ -1,21 +0,0 @@ -project: "kidney-classification" -notes: "Resnet34 8 class, with the All 3D data, where we treat z as a channel dimension." -tags: ["kidney", "quac", "resnet34", "classification", "noise_augment", "translations", "8 class", "All 3D"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33_8class/All_3D" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44_8class/All_3D" - batch_size: 64 - augment: False - balance: False -model: - type: "resnet34" - image_size: 32 - num_classes: 8 - input_fmaps: 7 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/resnet34_v2" - device: "cuda" diff --git a/kidney/configs/vgg.yml b/kidney/configs/vgg.yml deleted file mode 100644 index 309f513..0000000 --- a/kidney/configs/vgg.yml +++ /dev/null @@ -1,20 +0,0 @@ -project: "kidney-classification" -notes: "VGG16 model for kidney cortex cells classification" -tags: ["kidney", "quac", "vgg16", "classification"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33/Mask_2D_Maximum" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44/Mask_2D_Maximum" - batch_size: 64 - augment: False - balance: False -model: - type: "vgg" - image_size: 32 - num_classes: 12 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/vgg16" - device: "cuda" diff --git a/kidney/configs/vgg_v1.yml b/kidney/configs/vgg_v1.yml deleted file mode 100644 index e3de1f6..0000000 --- a/kidney/configs/vgg_v1.yml +++ /dev/null @@ -1,20 +0,0 @@ -project: "kidney-classification" -notes: "VGG16 model for kidney cortex cells classification. This time with an 8-class problem, added metrics, and added augmentations." -tags: ["kidney", "quac", "vgg16", "classification", "noise_augment", "translations", "8 class"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33_8class/Mask_2D_Maximum" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44_8class/Mask_2D_Maximum" - batch_size: 64 - augment: False - balance: False -model: - type: "vgg" - image_size: 32 - num_classes: 8 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/vgg16_v1" - device: "cuda" diff --git a/kidney/configs/vgg_v2.yml b/kidney/configs/vgg_v2.yml deleted file mode 100644 index e1fcfd6..0000000 --- a/kidney/configs/vgg_v2.yml +++ /dev/null @@ -1,21 +0,0 @@ -project: "kidney-classification" -notes: "VGG16 8 class, with the All 3D data, where we treat z as a channel dimension." -tags: ["kidney", "quac", "vgg16", "classification", "noise_augment", "translations", "8 class", "All 3D"] -data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F33_8class/All_3D" - batch_size: 64 -val_data: - data_path: "/nrs/funke/adjavond/data/kidney_cortex_cells/F44_8class/All_3D" - batch_size: 64 - augment: False - balance: False -model: - type: "vgg" - image_size: 32 - num_classes: 8 - input_fmaps: 7 -training: - epochs: 100 - learning_rate: 0.0001 - checkpoint_dir: "/nrs/funke/adjavond/projects/quac/kidney-classification/vgg16_v2" - device: "cuda" diff --git a/kidney/environment.yml b/kidney/environment.yml deleted file mode 100644 index 7b4d0d6..0000000 --- a/kidney/environment.yml +++ /dev/null @@ -1,253 +0,0 @@ -name: quac -channels: - - pytorch - - nvidia - - conda-forge -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_kmp_llvm - - asttokens=2.4.1=pyhd8ed1ab_0 - - blas=2.116=mkl - - blas-devel=3.9.0=16_linux64_mkl - - brotli-python=1.1.0=py311hb755f60_1 - - bzip2=1.0.8=hd590300_5 - - ca-certificates=2023.11.17=hbcca054_0 - - certifi=2023.11.17=pyhd8ed1ab_0 - - charset-normalizer=3.3.2=pyhd8ed1ab_0 - - comm=0.2.1=pyhd8ed1ab_0 - - cuda-cudart=11.8.89=0 - - cuda-cupti=11.8.87=0 - - cuda-libraries=11.8.0=0 - - cuda-nvrtc=11.8.89=0 - - cuda-nvtx=11.8.86=0 - - cuda-runtime=11.8.0=0 - - debugpy=1.8.0=py311hb755f60_1 - - decorator=5.1.1=pyhd8ed1ab_0 - - exceptiongroup=1.2.0=pyhd8ed1ab_2 - - executing=2.0.1=pyhd8ed1ab_0 - - ffmpeg=4.3=hf484d3e_0 - - filelock=3.13.1=pyhd8ed1ab_0 - - freetype=2.12.1=h267a509_2 - - gmp=6.3.0=h59595ed_0 - - gmpy2=2.1.2=py311h6a5fa03_1 - - gnutls=3.6.13=h85f3911_1 - - icu=73.2=h59595ed_0 - - idna=3.6=pyhd8ed1ab_0 - - importlib-metadata=7.0.1=pyha770c72_0 - - importlib_metadata=7.0.1=hd8ed1ab_0 - - ipykernel=6.29.0=pyhd33586a_0 - - ipython=8.20.0=pyh707e725_0 - - jedi=0.19.1=pyhd8ed1ab_0 - - jinja2=3.1.3=pyhd8ed1ab_0 - - jpeg=9e=h166bdaf_2 - - jupyter_client=8.6.0=pyhd8ed1ab_0 - - jupyter_core=5.7.1=py311h38be061_0 - - lame=3.100=h166bdaf_1003 - - lcms2=2.15=hfd0df8a_0 - - ld_impl_linux-64=2.40=h41732ed_0 - - lerc=4.0.0=h27087fc_0 - - libblas=3.9.0=16_linux64_mkl - - libcblas=3.9.0=16_linux64_mkl - - libcublas=11.11.3.6=0 - - libcufft=10.9.0.58=0 - - libcufile=1.8.1.2=0 - - libcurand=10.3.4.107=0 - - libcusolver=11.4.1.48=0 - - libcusparse=11.7.5.86=0 - - libdeflate=1.17=h0b41bf4_0 - - libexpat=2.5.0=hcb278e6_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-ng=13.2.0=h807b86a_3 - - libgfortran-ng=13.2.0=h69a702a_3 - - libgfortran5=13.2.0=ha4646dd_3 - - libhwloc=2.9.3=default_h554bfaf_1009 - - libiconv=1.17=hd590300_2 - - libjpeg-turbo=2.0.0=h9bf148f_0 - - liblapack=3.9.0=16_linux64_mkl - - liblapacke=3.9.0=16_linux64_mkl - - libnpp=11.8.0.86=0 - - libnsl=2.0.1=hd590300_0 - - libnvjpeg=11.9.0.86=0 - - libpng=1.6.39=h753d276_0 - - libsodium=1.0.18=h36c2ea0_1 - - libsqlite=3.44.2=h2797004_0 - - libstdcxx-ng=13.2.0=h7e041cc_3 - - libtiff=4.5.0=h6adf6a1_2 - - libuuid=2.38.1=h0b41bf4_0 - - libwebp-base=1.3.2=hd590300_0 - - libxcb=1.13=h7f98852_1004 - - libxcrypt=4.4.36=hd590300_1 - - libxml2=2.12.4=h232c23b_1 - - libzlib=1.2.13=hd590300_5 - - llvm-openmp=15.0.7=h0cdce71_0 - - markupsafe=2.1.3=py311h459d7ec_1 - - matplotlib-inline=0.1.6=pyhd8ed1ab_0 - - mkl=2022.1.0=h84fe81f_915 - - mkl-devel=2022.1.0=ha770c72_916 - - mkl-include=2022.1.0=h84fe81f_915 - - mpc=1.3.1=hfe3b2da_0 - - mpfr=4.2.1=h9458935_0 - - mpmath=1.3.0=pyhd8ed1ab_0 - - ncurses=6.4=h59595ed_2 - - nest-asyncio=1.6.0=pyhd8ed1ab_0 - - nettle=3.6=he412f7d_0 - - networkx=3.2.1=pyhd8ed1ab_0 - - numpy=1.26.3=py311h64a7726_0 - - openh264=2.1.1=h780b84a_0 - - openjpeg=2.5.0=hfec8fc6_2 - - openssl=3.2.0=hd590300_1 - - packaging=23.2=pyhd8ed1ab_0 - - parso=0.8.3=pyhd8ed1ab_0 - - pexpect=4.9.0=pyhd8ed1ab_0 - - pickleshare=0.7.5=py_1003 - - pillow=9.4.0=py311h50def17_1 - - pip=23.3.2=pyhd8ed1ab_0 - - platformdirs=4.1.0=pyhd8ed1ab_0 - - prompt-toolkit=3.0.42=pyha770c72_0 - - psutil=5.9.8=py311h459d7ec_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - ptyprocess=0.7.0=pyhd3deb0d_0 - - pure_eval=0.2.2=pyhd8ed1ab_0 - - pygments=2.17.2=pyhd8ed1ab_0 - - pysocks=1.7.1=pyha2e5f31_6 - - python=3.11.7=hab00c5b_1_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python_abi=3.11=4_cp311 - - pytorch=2.1.2=py3.11_cuda11.8_cudnn8.7.0_0 - - pytorch-cuda=11.8=h7e8668a_5 - - pytorch-mutex=1.0=cuda - - pyyaml=6.0.1=py311h459d7ec_1 - - pyzmq=25.1.2=py311h34ded2d_0 - - readline=8.2=h8228510_1 - - requests=2.31.0=pyhd8ed1ab_0 - - setuptools=69.0.3=pyhd8ed1ab_0 - - six=1.16.0=pyh6c4a22f_0 - - stack_data=0.6.2=pyhd8ed1ab_0 - - sympy=1.12=pypyh9d50eac_103 - - tbb=2021.11.0=h00ab1b0_0 - - tk=8.6.13=noxft_h4845f30_101 - - torchtriton=2.1.0=py311 - - torchvision=0.16.2=py311_cu118 - - tornado=6.3.3=py311h459d7ec_1 - - traitlets=5.14.1=pyhd8ed1ab_0 - - typing_extensions=4.9.0=pyha770c72_0 - - urllib3=2.1.0=pyhd8ed1ab_0 - - wcwidth=0.2.13=pyhd8ed1ab_0 - - wheel=0.42.0=pyhd8ed1ab_0 - - xorg-libxau=1.0.11=hd590300_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xz=5.2.6=h166bdaf_0 - - yaml=0.2.5=h7f98852_2 - - zeromq=4.3.5=h59595ed_0 - - zipp=3.17.0=pyhd8ed1ab_0 - - zlib=1.2.13=hd590300_5 - - zstd=1.5.5=hfc55251_0 - - pip: - - annotated-types==0.6.0 - - anyio==4.2.0 - - appdirs==1.4.4 - - arrow==1.3.0 - - babel==2.14.0 - - binaryornot==0.4.4 - - black==24.1.1 - - captum==0.7.0 - - cffi==1.16.0 - - cfgv==3.4.0 - - chardet==5.2.0 - - click==8.1.7 - - colorama==0.4.6 - - configargparse==1.7 - - contourpy==1.2.0 - - cookiecutter==2.5.0 - - cruft==2.15.0 - - cryptography==42.0.1 - - cycler==0.12.1 - - distlib==0.3.8 - - docker-pycreds==0.4.0 - - editables==0.5 - - fonttools==4.47.2 - - fsspec==2023.12.2 - - funlib-learn-torch==0.1 - - ghp-import==2.1.0 - - gitdb==4.0.11 - - gitpython==3.1.41 - - griffe==0.39.1 - - h11==0.14.0 - - hatch==1.9.3 - - hatchling==1.21.1 - - httpcore==1.0.2 - - httpx==0.26.0 - - hyperlink==21.0.0 - - identify==2.5.33 - - imageio==2.33.1 - - iniconfig==2.0.0 - - jaraco-classes==3.3.0 - - jeepney==0.8.0 - - joblib==1.3.2 - - keyring==24.3.0 - - kiwisolver==1.4.5 - - lazy-loader==0.3 - - lightning-utilities==0.10.1 - - markdown==3.5.2 - - markdown-it-py==3.0.0 - - matplotlib==3.8.2 - - mdurl==0.1.2 - - mergedeep==1.3.4 - - mkdocs==1.5.3 - - mkdocs-autorefs==0.5.0 - - mkdocs-material==9.5.6 - - mkdocs-material-extensions==1.3.1 - - mkdocstrings==0.24.0 - - mkdocstrings-python==1.8.0 - - more-itertools==10.2.0 - - mypy==1.8.0 - - mypy-extensions==1.0.0 - - nodeenv==1.8.0 - - opencv-python==4.9.0.80 - - paginate==0.5.6 - - pandas==2.2.0 - - pathspec==0.12.1 - - pdoc==14.4.0 - - pluggy==1.3.0 - - pre-commit==3.6.0 - - protobuf==4.25.2 - - pycparser==2.21 - - pydantic==2.6.1 - - pydantic-core==2.16.2 - - pymdown-extensions==10.7 - - pyparsing==3.1.1 - - pytest==7.4.4 - - python-slugify==8.0.1 - - pytz==2024.1 - - pyyaml-env-tag==0.1 - - quac==0.0.0 - - quac-experiments==0.1 - - regex==2023.12.25 - - rich==13.7.0 - - scikit-image==0.22.0 - - scikit-learn==1.4.0 - - scipy==1.12.0 - - seaborn==0.13.2 - - secretstorage==3.3.3 - - sentry-sdk==1.40.3 - - setproctitle==1.3.3 - - shellingham==1.5.4 - - smmap==5.0.1 - - sniffio==1.3.0 - - text-unidecode==1.3 - - threadpoolctl==3.2.0 - - tifffile==2024.1.30 - - tomli-w==1.0.0 - - tomlkit==0.12.3 - - torchmetrics==1.3.0.post0 - - tqdm==4.66.1 - - trove-classifiers==2024.1.8 - - typer==0.9.0 - - types-python-dateutil==2.8.19.20240106 - - tzdata==2023.4 - - userpath==1.9.1 - - virtualenv==20.25.0 - - wandb==0.16.3 - - watchdog==3.0.0 - - zstandard==0.22.0 -prefix: /home/adjavond@hhmi.org/mambaforge/envs/quac diff --git a/kidney/noise_augment.py b/kidney/noise_augment.py deleted file mode 100644 index 057271d..0000000 --- a/kidney/noise_augment.py +++ /dev/null @@ -1,16 +0,0 @@ -import torch - - -class AddGaussianNoise: - def __init__(self, mean=0., std=1., clip=True): - self.std = std - self.mean = mean - self.clip = clip - - def __call__(self, tensor): - if self.clip: - return torch.clamp(tensor + torch.randn(tensor.size()) * self.std + self.mean, 0, 1) - return tensor + torch.randn(tensor.size()) * self.std + self.mean - - def __repr__(self): - return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std) \ No newline at end of file diff --git a/kidney/plot_confusion.py b/kidney/plot_confusion.py deleted file mode 100644 index ed8415c..0000000 --- a/kidney/plot_confusion.py +++ /dev/null @@ -1,14 +0,0 @@ -import seaborn as sns -import matplotlib.pyplot as plt - - -def plot_confusion_matrix(confusion_matrix, labels, title=""): - ax = sns.heatmap(confusion_matrix, annot=True, fmt=".2f", cmap="YlGnBu", - vmin=0, vmax=1, square=True, cbar=False, - xticklabels=labels, yticklabels=labels) - ax.set_xlabel("Predicted") - ax.set_ylabel("True") - ax.set_title(title) - # Remove ticks, but keep labels - ax.tick_params(left=False, bottom=False) - return plt.gcf(), ax diff --git a/kidney/requirements.txt b/kidney/requirements.txt deleted file mode 100644 index 5854771..0000000 --- a/kidney/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -matplotlib -pydantic -seaborn -tifffile -torchmetrics -tqdm -typer -wandb diff --git a/kidney/resnet.py b/kidney/resnet.py deleted file mode 100644 index a336ba8..0000000 --- a/kidney/resnet.py +++ /dev/null @@ -1,102 +0,0 @@ -"""ResNet Models""" - -""" -2D ResNet implementation based on code from https://github.com/funkelab/dapi/blob/main/dapi_networks/ResNet.py -""" -import torch -from torch import nn - - -class ResNet2D(nn.Module): - def __init__(self, output_classes, input_channels=1, start_channels=12, layers=[2, 2, 2, 2]): - """ - Args: - output_classes: Number of output classes - - input_size: Size of input images - - input_channels: Number of input channels - - start_channels: Number of channels in first convolutional layer - """ - super(ResNet2D, self).__init__() - self.in_channels = start_channels - self.conv = nn.Conv2d(input_channels, self.in_channels, kernel_size=3, - padding=1, stride=1, bias=True) - self.bn = nn.BatchNorm2d(self.in_channels) - self.relu = nn.ReLU() - - current_channels = self.in_channels - self.layer1 = self.make_layer(ResidualBlock, current_channels, layers[0], stride=2) - current_channels *= 2 - self.layer2 = self.make_layer(ResidualBlock, current_channels, layers[1], stride=2) - current_channels *= 2 - self.layer3 = self.make_layer(ResidualBlock, current_channels, layers[2], stride=2) - current_channels *= 2 - self.layer4 = self.make_layer(ResidualBlock, current_channels, layers[3], stride=2) - - self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) - self.fc = nn.Linear(current_channels, output_classes) - - def make_layer(self, block, out_channels, blocks, stride=1): - downsample = None - if (stride != 1) or self.in_channels != out_channels: - downsample = nn.Sequential( - nn.Conv2d(self.in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=True), - nn.BatchNorm2d(out_channels)) - layers = [] - layers.append(block(self.in_channels, out_channels, stride, downsample)) - self.in_channels = out_channels - for i in range(1, blocks): - layers.append(block(out_channels, out_channels)) - return nn.Sequential(*layers) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - out = self.conv(x) - out = self.bn(out) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = self.avgpool(out) - out = torch.flatten(out, 1) - out = self.fc(out) - return out - - -# Residual block -class ResidualBlock(nn.Module): - def __init__(self, in_channels, out_channels, stride=1, downsample=None): - super(ResidualBlock, self).__init__() - # Biases are handled by BN layers - self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, - padding=1, stride=stride, bias=True) - self.bn1 = nn.BatchNorm2d(out_channels) - self.relu = nn.ReLU() - self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, - padding=1, bias=True) - self.bn2 = nn.BatchNorm2d(out_channels) - self.downsample = downsample - - def forward(self, x): - residual = x - out = self.conv1(x) - out = self.bn1(out) - out = nn.ReLU()(out) - out = self.conv2(out) - out = self.bn2(out) - if self.downsample: - residual = self.downsample(x) - out += residual - out = nn.ReLU()(out) - return out - - -def resnet18(output_classes, input_channels=1, start_channels=12): - return ResNet2D(output_classes, input_channels, start_channels, [2, 2, 2, 2]) - - -def resnet34(output_classes, input_channels=1, start_channels=12): - return ResNet2D(output_classes, input_channels, start_channels, [3, 4, 6, 3]) - -# TODO ResNet50, which adds a bottleneck layer \ No newline at end of file diff --git a/kidney/train_classifier.py b/kidney/train_classifier.py deleted file mode 100644 index 1076b66..0000000 --- a/kidney/train_classifier.py +++ /dev/null @@ -1,267 +0,0 @@ -from config import ExperimentConfig, ModelConfig, DataConfig, TrainingConfig -from funlib.learn.torch.models import Vgg2D -import matplotlib.pyplot as plt -from noise_augment import AddGaussianNoise -import numpy as np -from pathlib import Path -from plot_confusion import plot_confusion_matrix -from resnet import ResNet2D -import timm -import tifffile -from torchvision import transforms -from torchvision.datasets import ImageFolder -from torchvision.utils import make_grid -from torch.utils.data import DataLoader -import torch -import torchmetrics -from tqdm import tqdm -import typer -import wandb -import yaml -# TODO Make a Training Class - - -def imread(path): - data = tifffile.imread(path) - if data.ndim == 2: - data = data[np.newaxis, ...] - return torch.from_numpy(data / 255.0).float() - - -def change_first_layer_channels(model, channels): - """ - Change the number of channels in the first layer of a model. - """ - for name, child in model.named_children(): - if isinstance(child, torch.nn.Conv2d): - kwargs = { - 'out_channels': child.out_channels, - 'kernel_size': child.kernel_size, - 'stride': child.stride, - 'padding': child.padding, - 'bias': False if child.bias == None else True - } - model._modules[name] = torch.nn.Conv2d(channels, **kwargs) - return True - else: - if(change_first_layer_channels(child, channels)): - return True - return False - - -def initialize_model(config: ModelConfig): - """ - type: str = "vgg", img_size: int = 128, num_classes: int = 2, device: str = "cuda" - """ - if config.type == "vgg": - model = Vgg2D( - (config.image_size, config.image_size), - output_classes=config.num_classes, - input_fmaps=config.input_fmaps, - fmaps=config.fmaps, - ) - elif config.type == "resnet18": - model = timm.create_model("resnet18", pretrained=False, num_classes=config.num_classes) - assert change_first_layer_channels(model, config.input_fmaps) - elif config.type == "resnet34": - model = timm.create_model("resnet34", pretrained=False, num_classes=config.num_classes) - assert change_first_layer_channels(model, config.input_fmaps) - else: - raise ValueError(f"Unknown model type: {type}") - return model - - -def initialize_dataloader(config: DataConfig): - """ - data_path: str, batch_size: int = 32, augment: bool = True - """ - if config.augment: - transform = transforms.Compose( - [ - transforms.RandomHorizontalFlip(), - transforms.RandomVerticalFlip(), - # Translations - transforms.RandomAffine(0, translate=(0.1, 0.1)), - transforms.RandomRotation(90), - # Noise - AddGaussianNoise(mean=0.0, std=0.001, clip=True), - ] - ) - else: - transform = None - dataset = ImageFolder(root=config.data_path, transform=transform, loader=imread) - sampler = None - if config.balance: - # Balance samples by inverse class frequency - _, count = np.unique(dataset.targets, return_counts=True) - sample_counts = np.array([count[i] for i in dataset.targets]) - weights = 1 / sample_counts - sampler = torch.utils.data.WeightedRandomSampler( - weights=weights, num_samples=len(dataset), replacement=True - ) - dataloader = DataLoader(dataset, batch_size=config.batch_size, sampler=sampler, drop_last=config.balance) - return dataloader - - -def save_checkpoint(checkpoint_dir, i, model, model_ema, optimizer, avg_loss, acc, val_acc=None): - checkpoint_dir = Path(checkpoint_dir) - checkpoint_dir.mkdir(exist_ok=True, parents=True) - - checkpoint = { - "epoch": i, - "model_state_dict": model.state_dict(), - "model_ema_state_dict": model_ema.module.state_dict(), - "optimizer_state_dict": optimizer.state_dict(), - "loss": avg_loss, - "accuracy": acc, - } - if val_acc is not None: - checkpoint["val_accuracy"] = val_acc - torch.save(checkpoint, checkpoint_dir / f"checkpoint_{i}.pt") - - -def train_classifier( - config: ExperimentConfig, -): - """ - data_path: str = None, - type: str = "vgg", - image_size: int = 128, - num_classes: int = 2, - batch_size: int = 32, - epochs: int = 10, - lr: float = 1e-4, - device: str = "cuda", - val_data_path: str = None, - checkpoint_dir: str = "checkpoints", - """ - - log_config = dict() - log_config.update(config.model.dict()) - log_config.update(config.training.dict()) - - run = wandb.init( - project=config.project, notes=config.notes, tags=config.tags, config=log_config - ) - - checkpoint_dir = Path(config.training.checkpoint_dir) - - dataloader = initialize_dataloader(config.data) - validation = False - if config.val_data is not None: - validation = True - val_dataloader = initialize_dataloader(config.val_data) - - model = initialize_model(config.model).to(config.training.device) - model_ema = timm.utils.ModelEmaV2(model) - model_ema.eval() - - loss_fn = torch.nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), lr=config.training.learning_rate) - - metric = torchmetrics.Accuracy( - task="multiclass", num_classes=config.model.num_classes - ).to(config.training.device) - if validation: - val_metric = torchmetrics.Accuracy( - task="multiclass", num_classes=config.model.num_classes - ).to(config.training.device) - val_confusion = torchmetrics.ConfusionMatrix( - task="multiclass", num_classes=config.model.num_classes, normalize="true" - ).to(config.training.device) - val_accuracy_macro = torchmetrics.Accuracy( - task="multiclass", num_classes=config.model.num_classes, average="macro" - ).to(config.training.device) - - for i in range(config.training.epochs): - avg_loss = 0 - for batch in tqdm( - dataloader, - total=len(dataloader), - desc=f"Epoch {i+1}/{config.training.epochs}", - ): - optimizer.zero_grad() - inputs, targets = batch - outputs = model(inputs.to(config.training.device)) - # compute loss and update model - loss = loss_fn(outputs, targets.to(config.training.device)) - loss.backward() - optimizer.step() - avg_loss += loss.item() - metric.update(outputs, targets.to(config.training.device)) - if i == 0: - model_ema.set(model) - model_ema.eval() - else: - model_ema.update(model) - acc = metric.compute() - avg_loss /= len(dataloader) - # Log metrics - # print(f"Epoch {i+1}/{config.training.epochs} - avg loss: {avg_loss}") - # print(f"Epoch {i+1}/{config.training.epochs} - accuracy: {acc}") - metric.reset() - - # Check number of channels - sample = inputs[: config.log_images].cpu() - if sample.shape[1] != 1: - sample = sample[:, 0:1, ...] - sample = make_grid(sample) - - # Log to wandb - metrics = { - "avg_loss": avg_loss, - "accuracy": acc, - "sample": wandb.Image(sample), - } - - # Validation - if validation: - with torch.no_grad(): - for batch in tqdm( - val_dataloader, - total=len(val_dataloader), - desc=f"Epoch {i+1}/{config.training.epochs} - validation", - ): - inputs, targets = batch - outputs = model_ema.module(inputs.to(config.training.device)) - val_metric.update(outputs, targets.to(config.training.device)) - val_confusion.update(outputs, targets.to(config.training.device)) - val_accuracy_macro.update( - outputs, targets.to(config.training.device) - ) - val_acc = val_metric.compute() - val_acc_macro = val_accuracy_macro.compute() - val_metric.reset() - val_accuracy_macro.reset() - # print(f"Epoch {i+1}/{config.training.epochs} - val accuracy: {val_acc}") - metrics["val_accuracy"] = val_acc - metrics["val_accuracy_macro"] = val_acc_macro - # Plotting the confusion matrix - fig, _ = plot_confusion_matrix( - val_confusion.compute().cpu().numpy(), - val_dataloader.dataset.classes, - title=f"Validation Confusion Matrix {i+1}/{config.training.epochs}", - ) - metrics["validation_confusion"] = wandb.Image(fig) - val_confusion.reset() - plt.close(fig) - - else: - val_acc = None - # Save checkpoint - save_checkpoint(checkpoint_dir, i, model, model_ema, optimizer, avg_loss, acc, val_acc) - - # Log to wandb - run.log(metrics) - run.finish() - - -def main(config: str = "vgg.yml"): - with open(config, "r") as fd: - config = yaml.safe_load(fd) - experiment = ExperimentConfig(**config) - train_classifier(experiment) - - -if __name__ == "__main__": - typer.run(main) diff --git a/kidney/train_classifier_v2.py b/kidney/train_classifier_v2.py deleted file mode 100644 index 23bac9c..0000000 --- a/kidney/train_classifier_v2.py +++ /dev/null @@ -1,288 +0,0 @@ -""" -This version just does a train-test split on the dataset instead of using a separate validation set. -""" -from config import ExperimentConfig, ModelConfig, DataConfig, TrainingConfig -from funlib.learn.torch.models import Vgg2D -import matplotlib.pyplot as plt -from noise_augment import AddGaussianNoise -import numpy as np -from pathlib import Path -from plot_confusion import plot_confusion_matrix -from resnet import ResNet2D -from sklearn.model_selection import train_test_split -import timm -import tifffile -from torchvision import transforms -from torchvision.datasets import ImageFolder -from torchvision.utils import make_grid -from torch.utils.data import DataLoader -import torch -import torchmetrics -from tqdm import tqdm -import typer -import wandb -import yaml -# TODO Make a Training Class - - -def imread(path): - data = tifffile.imread(path) - if data.ndim == 2: - data = data[np.newaxis, ...] - return torch.from_numpy(data / 255.0).float() - - -def change_first_layer_channels(model, channels): - """ - Change the number of channels in the first layer of a model. - """ - for name, child in model.named_children(): - if isinstance(child, torch.nn.Conv2d): - kwargs = { - 'out_channels': child.out_channels, - 'kernel_size': child.kernel_size, - 'stride': child.stride, - 'padding': child.padding, - 'bias': False if child.bias == None else True - } - model._modules[name] = torch.nn.Conv2d(channels, **kwargs) - return True - else: - if(change_first_layer_channels(child, channels)): - return True - return False - - -def initialize_model(config: ModelConfig): - """ - type: str = "vgg", img_size: int = 128, num_classes: int = 2, device: str = "cuda" - """ - if config.type == "vgg": - model = Vgg2D( - (config.image_size, config.image_size), - output_classes=config.num_classes, - input_fmaps=config.input_fmaps, - fmaps=config.fmaps, - ) - elif config.type == "resnet18": - model = timm.create_model("resnet18", pretrained=False, num_classes=config.num_classes) - assert change_first_layer_channels(model, config.input_fmaps) - elif config.type == "resnet34": - model = timm.create_model("resnet34", pretrained=False, num_classes=config.num_classes) - assert change_first_layer_channels(model, config.input_fmaps) - else: - raise ValueError(f"Unknown model type: {type}") - return model - - -def initialize_dataloader(config: DataConfig, seed:int=42): - """ - data_path: str, batch_size: int = 32, augment: bool = True - """ - if config.augment: - train_transform = transforms.Compose( - [ - transforms.RandomHorizontalFlip(), - transforms.RandomVerticalFlip(), - # Translations - transforms.RandomAffine(0, translate=(0.1, 0.1)), - transforms.RandomRotation(90), - # Noise - AddGaussianNoise(mean=0.0, std=0.001, clip=True), - ] - ) - else: - train_transform = None - full_dataset = ImageFolder(root=config.data_path, loader=imread) - - samples = list(range(len(full_dataset))) - train_idx, val_idx = train_test_split(samples, test_size=0.5, stratify=full_dataset.targets, random_state=seed) - - # Create a subset of the dataset, which is used for training - dataset = torch.utils.data.Subset(full_dataset, train_idx) - dataset.transform = train_transform - - # Another subset of the dataset, which is used for validation - val_dataset = torch.utils.data.Subset(full_dataset, val_idx) - - sampler = None - if config.balance: - # Balance samples by inverse class frequency - train_ds_targets = np.array(full_dataset.targets)[train_idx] - _, count = np.unique(train_ds_targets, return_counts=True) - sample_counts = np.array([count[i] for i in train_ds_targets]) - weights = 1 / sample_counts - sampler = torch.utils.data.WeightedRandomSampler( - weights=weights, num_samples=len(dataset), replacement=True - ) - dataloader = DataLoader(dataset, batch_size=config.batch_size, sampler=sampler, drop_last=config.balance) - - val_dataloader = DataLoader(val_dataset, batch_size=config.batch_size, drop_last=False) - return dataloader, val_dataloader - - -def save_checkpoint(checkpoint_dir, i, model, model_ema, optimizer, avg_loss, acc, val_acc=None): - checkpoint_dir = Path(checkpoint_dir) - checkpoint_dir.mkdir(exist_ok=True, parents=True) - - checkpoint = { - "epoch": i, - "model_state_dict": model.state_dict(), - "model_ema_state_dict": model_ema.module.state_dict(), - "optimizer_state_dict": optimizer.state_dict(), - "loss": avg_loss, - "accuracy": acc, - } - if val_acc is not None: - checkpoint["val_accuracy"] = val_acc - torch.save(checkpoint, checkpoint_dir / f"checkpoint_{i}.pt") - - -def train_classifier( - config: ExperimentConfig, -): - """ - data_path: str = None, - type: str = "vgg", - image_size: int = 128, - num_classes: int = 2, - batch_size: int = 32, - epochs: int = 10, - lr: float = 1e-4, - device: str = "cuda", - val_data_path: str = None, - checkpoint_dir: str = "checkpoints", - """ - - log_config = dict() - log_config.update(config.model.dict()) - log_config.update(config.training.dict()) - - config.tags.append("train_classifier_v2") - checkpoint_dir = Path(config.training.checkpoint_dir) - - dataloader, val_dataloader = initialize_dataloader(config.data) - validation = True - # if config.val_data is not None: - # validation = True - # val_dataloader = initialize_dataloader(config.val_data) - - model = initialize_model(config.model).to(config.training.device) - model_ema = timm.utils.ModelEmaV2(model) - model_ema.eval() - - loss_fn = torch.nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), lr=config.training.learning_rate) - - metric = torchmetrics.Accuracy( - task="multiclass", num_classes=config.model.num_classes - ).to(config.training.device) - if validation: - val_metric = torchmetrics.Accuracy( - task="multiclass", num_classes=config.model.num_classes - ).to(config.training.device) - val_confusion = torchmetrics.ConfusionMatrix( - task="multiclass", num_classes=config.model.num_classes, normalize="true" - ).to(config.training.device) - val_accuracy_macro = torchmetrics.Accuracy( - task="multiclass", num_classes=config.model.num_classes, average="macro" - ).to(config.training.device) - - # Only create the run after everything has initialized without errors - run = wandb.init( - project=config.project, notes=config.notes, tags=config.tags, config=log_config - ) - - - for i in range(config.training.epochs): - avg_loss = 0 - for batch in tqdm( - dataloader, - total=len(dataloader), - desc=f"Epoch {i+1}/{config.training.epochs}", - ): - optimizer.zero_grad() - inputs, targets = batch - outputs = model(inputs.to(config.training.device)) - # compute loss and update model - loss = loss_fn(outputs, targets.to(config.training.device)) - loss.backward() - optimizer.step() - avg_loss += loss.item() - metric.update(outputs, targets.to(config.training.device)) - if i == 0: - model_ema.set(model) - model_ema.eval() - else: - model_ema.update(model) - acc = metric.compute() - avg_loss /= len(dataloader) - # Log metrics - # print(f"Epoch {i+1}/{config.training.epochs} - avg loss: {avg_loss}") - # print(f"Epoch {i+1}/{config.training.epochs} - accuracy: {acc}") - metric.reset() - - # Check number of channels - sample = inputs[: config.log_images].cpu() - if sample.shape[1] != 1: - sample = sample[:, 0:1, ...] - sample = make_grid(sample) - - # Log to wandb - metrics = { - "avg_loss": avg_loss, - "accuracy": acc, - "sample": wandb.Image(sample), - } - - # Validation - if validation: - with torch.no_grad(): - for batch in tqdm( - val_dataloader, - total=len(val_dataloader), - desc=f"Epoch {i+1}/{config.training.epochs} - validation", - ): - inputs, targets = batch - outputs = model_ema.module(inputs.to(config.training.device)) - val_metric.update(outputs, targets.to(config.training.device)) - val_confusion.update(outputs, targets.to(config.training.device)) - val_accuracy_macro.update( - outputs, targets.to(config.training.device) - ) - val_acc = val_metric.compute() - val_acc_macro = val_accuracy_macro.compute() - val_metric.reset() - val_accuracy_macro.reset() - # print(f"Epoch {i+1}/{config.training.epochs} - val accuracy: {val_acc}") - metrics["val_accuracy"] = val_acc - metrics["val_accuracy_macro"] = val_acc_macro - # Plotting the confusion matrix - fig, _ = plot_confusion_matrix( - val_confusion.compute().cpu().numpy(), - val_dataloader.dataset.dataset.classes, - title=f"Validation Confusion Matrix {i+1}/{config.training.epochs}", - ) - metrics["validation_confusion"] = wandb.Image(fig) - val_confusion.reset() - plt.close(fig) - - else: - val_acc = None - # Save checkpoint - save_checkpoint(checkpoint_dir, i, model, model_ema, optimizer, avg_loss, acc, val_acc) - - # Log to wandb - run.log(metrics) - run.finish() - - -def main(config: str = "vgg.yml"): - with open(config, "r") as fd: - config = yaml.safe_load(fd) - experiment = ExperimentConfig(**config) - train_classifier(experiment) - - -if __name__ == "__main__": - typer.run(main)