From 8fea1de1d26b7c2343d0af85c8a5b08c672b9f7c Mon Sep 17 00:00:00 2001 From: Bhuvanesh Prasad Date: Thu, 4 Jul 2024 12:37:43 +0530 Subject: [PATCH] added dvc pipeline --- .dvc/.gitignore | 3 ++ .dvc/config | 0 .dvcignore | 3 ++ dvc.lock | 117 ++++++++++++++++++++++++++++++++++++++++++++++++ dvc.yaml | 55 +++++++++++++++++++++++ 5 files changed, 178 insertions(+) create mode 100644 .dvc/.gitignore create mode 100644 .dvc/config create mode 100644 .dvcignore create mode 100644 dvc.lock diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 0000000..e69de29 diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 0000000..5197305 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 0000000..0b5acdc --- /dev/null +++ b/dvc.lock @@ -0,0 +1,117 @@ +schema: '2.0' +stages: + data_ingestion: + cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py + deps: + - path: config/config.yaml + hash: md5 + md5: a9764e833ce576f6efcba23087928d77 + size: 658 + - path: src/cnnClassifier/pipeline/stage_01_data_ingestion.py + hash: md5 + md5: c01c6884a5df3d9054df342d7d577896 + size: 918 + outs: + - path: artifacts/data_ingestion/seti-sub + hash: md5 + md5: ef32db442a5e77a761e24f1e86ae9fca.dir + size: 324670039 + nfiles: 630 + prepare_base_model: + cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py + deps: + - path: config/config.yaml + hash: md5 + md5: a9764e833ce576f6efcba23087928d77 + size: 658 + - path: src/cnnClassifier/pipeline/stage_02_prepare_base_model.py + hash: md5 + md5: a2214c8ac045002603afaf56b9bdd677 + size: 1011 + params: + params.yaml: + AUGMENTATION: true + BATCH_SIZE: 32 + CLASSES: 7 + IMAGE_SIZE: + - 256 + - 256 + - 3 + INCLUDE_TOP: false + LEARNING_RATE: 0.01 + WEIGHTS: imagenet + outs: + - path: artifacts/prepare_base_model + hash: md5 + md5: 74f41e6b2e27a81060ebb71b71f3c6b9.dir + size: 238292708 + nfiles: 2 + model_training: + cmd: python src/cnnClassifier/pipeline/stage_03_model_training.py + deps: + - path: artifacts/data_ingestion/seti-sub + hash: md5 + md5: ef32db442a5e77a761e24f1e86ae9fca.dir + size: 324670039 + nfiles: 630 + - path: artifacts/prepare_base_model + hash: md5 + md5: 74f41e6b2e27a81060ebb71b71f3c6b9.dir + size: 238292708 + nfiles: 2 + - path: config/config.yaml + hash: md5 + md5: a9764e833ce576f6efcba23087928d77 + size: 658 + - path: src/cnnClassifier/pipeline/stage_03_model_training.py + hash: md5 + md5: aefe6cbf1d4a67f9cbb7eb51f2944389 + size: 940 + params: + params.yaml: + AUGMENTATION: true + BATCH_SIZE: 32 + CLASSES: 7 + EPOCHS: 1 + IMAGE_SIZE: + - 256 + - 256 + - 3 + outs: + - path: artifacts/model_training/model.keras + hash: md5 + md5: 420a56dff3e05daf6f3fdae79c2a745c + size: 447669227 + model_evaluation: + cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py + deps: + - path: artifacts/data_ingestion/seti-sub + hash: md5 + md5: ef32db442a5e77a761e24f1e86ae9fca.dir + size: 324670039 + nfiles: 630 + - path: artifacts/model_training/model.keras + hash: md5 + md5: 420a56dff3e05daf6f3fdae79c2a745c + size: 447669227 + - path: config/config.yaml + hash: md5 + md5: a9764e833ce576f6efcba23087928d77 + size: 658 + - path: src/cnnClassifier/pipeline/stage_04_model_evaluation.py + hash: md5 + md5: a13afbdfb7c097aeaa50af416a9e1eee + size: 1336 + params: + params.yaml: + BATCH_SIZE: 32 + CLASSES: 7 + IMAGE_SIZE: + - 256 + - 256 + - 3 + outs: + - path: scores.json + hash: md5 + md5: ce6fc4eecb123b98930a6d8dd974c51d + size: 77 diff --git a/dvc.yaml b/dvc.yaml index e69de29..5037883 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -0,0 +1,55 @@ +stages: + data_ingestion: + cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py + deps: + - src/cnnClassifier/pipeline/stage_01_data_ingestion.py + - config/config.yaml + outs: + - artifacts/data_ingestion/seti-sub + + prepare_base_model: + cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py + deps: + - src/cnnClassifier/pipeline/stage_02_prepare_base_model.py + - config/config.yaml + params: + - IMAGE_SIZE + - INCLUDE_TOP + - CLASSES + - WEIGHTS + - LEARNING_RATE + - AUGMENTATION + - BATCH_SIZE + outs: + - artifacts/prepare_base_model + + model_training: + cmd: python src/cnnClassifier/pipeline/stage_03_model_training.py + deps: + - src/cnnClassifier/pipeline/stage_03_model_training.py + - config/config.yaml + - artifacts/data_ingestion/seti-sub + - artifacts/prepare_base_model + params: + - IMAGE_SIZE + - EPOCHS + - BATCH_SIZE + - AUGMENTATION + - CLASSES + outs: + - artifacts/model_training/model.keras + + model_evaluation: + cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py + deps: + - src/cnnClassifier/pipeline/stage_04_model_evaluation.py + - config/config.yaml + - artifacts/data_ingestion/seti-sub + - artifacts/model_training/model.keras + params: + - IMAGE_SIZE + - BATCH_SIZE + - CLASSES + metrics: + - scores.json: + cache: false \ No newline at end of file