From ed7ed887d8f8707965d3390ea1122c8278f79874 Mon Sep 17 00:00:00 2001 From: Chih-Yu Chan <alex870521@gmail.com> Date: Tue, 20 Aug 2024 09:57:37 +0800 Subject: [PATCH] release: version 0.1.11 BREAKING CHANGE: Complete overhaul from v0.1.9.0 to v0.1.11 Major Changes: - Complete overhaul of RawDataReader module with improved architecture - Migration from setup.py to pyproject.toml for modern Python packaging - Removal of deprecated AeroViz.process directory Features & Improvements: 1. RawDataReader Enhancements: - Improved file handling with proper resource management - Added robust error handling for missing data - Optimized data processing performance - Enhanced logging system with proper file closure - Updated data validation logic - Added support for new data formats - Set SMPS default size range (11.8, 593.5) - Added kwargs for customizable size range 2. Project Infrastructure: - Migrated to pyproject.toml for package configuration - Enhanced test coverage configuration and reporting - Optimized CI/CD pipeline - Updated GitHub Actions workflows for PyPI publishing - Centralized dependencies management - Improved package data handling 3. Visualization & Logging: - Enhanced progress bar visualization - Optimized ANSI support for Windows - Added new HYSPLIT plotting method - Improved VOC processing 4. Documentation: - Updated project documentation - Improved code syntax and structure - Added comprehensive test coverage documentation This release represents a significant upgrade focusing on performance, reliability, and maintainability of the AeroViz package. --- .github/workflows/cleanup.yml | 103 +++ .github/workflows/publish.yml | 135 +++ .github/workflows/pytest.yml | 49 + .gitignore | 7 +- AeroViz/__init__.py | 13 +- AeroViz/{config => data}/DEFAULT_DATA.csv | 2 +- .../{config => data}/DEFAULT_PNSD_DATA.csv | 0 AeroViz/data/hysplit_example_data.txt | 101 +++ AeroViz/dataProcess/Chemistry/__init__.py | 85 +- AeroViz/dataProcess/Chemistry/_calculate.py | 30 +- AeroViz/dataProcess/Chemistry/_isoropia.py | 137 +-- AeroViz/dataProcess/Chemistry/_mass_volume.py | 318 ++++--- AeroViz/dataProcess/Chemistry/_ocec.py | 256 +++--- AeroViz/dataProcess/Chemistry/_partition.py | 37 +- AeroViz/dataProcess/Chemistry/_teom.py | 20 +- AeroViz/dataProcess/Chemistry/isrpia2.exe | Bin .../dataProcess/Optical/Angstrom_exponent.py | 20 + .../dataProcess/Optical/PyMieScatt_update.py | 560 ++++++++++++ AeroViz/dataProcess/Optical/_IMPROVE.py | 82 +- AeroViz/dataProcess/Optical/__init__.py | 73 +- AeroViz/dataProcess/Optical/_absorption.py | 68 +- AeroViz/dataProcess/Optical/_extinction.py | 56 +- AeroViz/dataProcess/Optical/_mie.py | 12 +- AeroViz/dataProcess/Optical/_mie_sd.py | 179 ++-- AeroViz/dataProcess/Optical/_scattering.py | 39 +- AeroViz/dataProcess/Optical/mie_theory.py | 260 ++++++ AeroViz/dataProcess/SizeDistr/__init__.py | 82 +- AeroViz/dataProcess/SizeDistr/__merge.py | 317 +++---- AeroViz/dataProcess/SizeDistr/_merge.py | 309 +++---- AeroViz/dataProcess/SizeDistr/_merge_v1.py | 323 +++---- AeroViz/dataProcess/SizeDistr/_merge_v2.py | 305 +++---- AeroViz/dataProcess/SizeDistr/_merge_v3.py | 654 +++++++------- AeroViz/dataProcess/SizeDistr/_merge_v4.py | 548 ++++++----- AeroViz/dataProcess/SizeDistr/_size_distr.py | 106 +-- AeroViz/dataProcess/SizeDistr/prop.py | 62 ++ AeroViz/dataProcess/VOC/__init__.py | 21 +- AeroViz/dataProcess/VOC/_potential_par.py | 142 +-- .../VOC/{voc_par.json => support_voc.json} | 660 +++++++------- AeroViz/dataProcess/VOC/voc_par.pkl | Bin 4446 -> 0 bytes AeroViz/dataProcess/__init__.py | 34 +- AeroViz/dataProcess/core/__init__.py | 124 ++- AeroViz/plot/__init__.py | 10 +- AeroViz/plot/bar.py | 126 +++ AeroViz/plot/box.py | 69 ++ AeroViz/plot/distribution/distribution.py | 848 +++++++++--------- AeroViz/plot/improve/__init__.py | 1 - AeroViz/plot/improve/improve.py | 240 ----- AeroViz/plot/meteorology/CBPF.py | 295 ++++++ AeroViz/plot/meteorology/__init__.py | 4 +- AeroViz/plot/meteorology/hysplit.py | 90 ++ AeroViz/plot/meteorology/meteorology.py | 317 ------- AeroViz/plot/meteorology/wind_rose.py | 77 ++ AeroViz/plot/optical/__init__.py | 1 - AeroViz/plot/optical/aethalometer.py | 77 -- AeroViz/plot/optical/optical.py | 600 ++++++------- AeroViz/plot/pie.py | 210 +++++ AeroViz/plot/radar.py | 184 ++++ AeroViz/plot/regression.py | 196 ++++ AeroViz/plot/scatter.py | 174 ++++ AeroViz/plot/templates/__init__.py | 6 +- AeroViz/plot/templates/ammonium_rich.py | 34 + AeroViz/plot/templates/contour.py | 50 +- AeroViz/plot/templates/corr_matrix.py | 179 ++-- AeroViz/plot/templates/diurnal_pattern.py | 85 +- AeroViz/plot/templates/event_evolution.py | 65 -- AeroViz/plot/templates/koschmieder.py | 185 ++-- AeroViz/plot/templates/metal_heatmap.py | 172 +++- AeroViz/plot/templates/regression.py | 256 ------ AeroViz/plot/templates/scatter.py | 130 --- AeroViz/plot/templates/templates.py | 398 -------- AeroViz/plot/timeseries/__init__.py | 1 + AeroViz/plot/timeseries/template.py | 47 + AeroViz/plot/timeseries/timeseries.py | 628 +++++++------ AeroViz/plot/utils/__init__.py | 3 +- AeroViz/plot/utils/_color.py | 114 +-- AeroViz/plot/utils/_decorator.py | 74 -- AeroViz/plot/utils/_unit.py | 96 +- AeroViz/plot/utils/plt_utils.py | 92 ++ AeroViz/plot/utils/sklearn_utils.py | 49 + AeroViz/plot/utils/units.json | 5 + AeroViz/plot/violin.py | 80 ++ AeroViz/process/__init__.py | 31 - AeroViz/process/core/DataProc.py | 19 - AeroViz/process/core/SizeDist.py | 90 -- AeroViz/process/core/__init__.py | 4 - AeroViz/process/method/PyMieScatt_update.py | 567 ------------ AeroViz/process/method/__init__.py | 2 - AeroViz/process/method/mie_theory.py | 258 ------ AeroViz/process/method/prop.py | 62 -- AeroViz/process/script/AbstractDistCalc.py | 143 --- AeroViz/process/script/Chemical.py | 176 ---- AeroViz/process/script/IMPACT.py | 49 - AeroViz/process/script/IMPROVE.py | 161 ---- AeroViz/process/script/Others.py | 65 -- AeroViz/process/script/PSD.py | 103 --- AeroViz/process/script/PSD_dry.py | 94 -- AeroViz/process/script/__init__.py | 5 - AeroViz/process/script/retrieve_RI.py | 70 -- AeroViz/rawDataReader/__init__.py | 171 ++-- AeroViz/rawDataReader/config/__init__.py | 0 .../config/supported_instruments.py | 170 ++++ AeroViz/rawDataReader/core/__init__.py | 639 ++++++------- AeroViz/rawDataReader/core/logger.py | 100 +++ AeroViz/rawDataReader/core/qc.py | 184 ++++ AeroViz/rawDataReader/script/AE33.py | 42 +- AeroViz/rawDataReader/script/AE43.py | 44 +- AeroViz/rawDataReader/script/APS_3321.py | 62 +- AeroViz/rawDataReader/script/Aurora.py | 54 +- AeroViz/rawDataReader/script/BC1054.py | 83 +- AeroViz/rawDataReader/script/EPA.py | 41 + AeroViz/rawDataReader/script/EPA_vertical.py | 18 - AeroViz/rawDataReader/script/GRIMM.py | 39 +- AeroViz/rawDataReader/script/IGAC.py | 75 ++ AeroViz/rawDataReader/script/IGAC_TH.py | 104 --- AeroViz/rawDataReader/script/IGAC_ZM.py | 90 -- AeroViz/rawDataReader/script/MA350.py | 81 +- AeroViz/rawDataReader/script/Minion.py | 215 +++++ AeroViz/rawDataReader/script/NEPH.py | 150 ++-- AeroViz/rawDataReader/script/OCEC.py | 92 ++ AeroViz/rawDataReader/script/OCEC_LCRES.py | 34 - AeroViz/rawDataReader/script/OCEC_RES.py | 28 - AeroViz/rawDataReader/script/SMPS.py | 83 ++ AeroViz/rawDataReader/script/SMPS_TH.py | 41 - AeroViz/rawDataReader/script/SMPS_aim11.py | 51 -- AeroViz/rawDataReader/script/SMPS_genr.py | 51 -- AeroViz/rawDataReader/script/TEOM.py | 64 +- AeroViz/rawDataReader/script/Table.py | 28 - AeroViz/rawDataReader/script/VOC.py | 33 + AeroViz/rawDataReader/script/VOC_TH.py | 30 - AeroViz/rawDataReader/script/VOC_ZM.py | 37 - AeroViz/rawDataReader/script/XRF.py | 11 + AeroViz/rawDataReader/script/__init__.py | 36 +- AeroViz/rawDataReader/utils/config.py | 169 ---- AeroViz/tools/__init__.py | 1 - AeroViz/tools/database.py | 130 ++- AeroViz/tools/dataclassifier.py | 212 ++--- AeroViz/tools/dataprinter.py | 102 +-- AeroViz/tools/datareader.py | 66 -- README.md | 114 ++- asserts/diagram.py | 36 - {asserts => assets}/figure/IMPROVE_MLR.png | Bin {asserts => assets}/figure/IMPROVE_donut.png | Bin {asserts => assets}/figure/IMPROVE_donuts.png | Bin {asserts => assets}/figure/IMPROVE_pie.png | Bin {asserts => assets}/figure/Mie_MEE.png | Bin {asserts => assets}/figure/Mie_Q.png | Bin {asserts => assets}/figure/OverPSD.png | Bin {asserts => assets}/figure/corr_matrix.png | Bin {asserts => assets}/figure/psd_3D.png | Bin {asserts => assets}/figure/scatter.png | Bin {asserts => assets}/figure/windrose_CBPF.png | Bin .../media/logo-social-discord.png | Bin .../media/logo-social-github.png | Bin .../media/logo-social-instagram.png | Bin .../media/logo-social-linkedin.png | Bin .../media/logo-social-medium.png | Bin .../media/logo-social-threads.png | Bin .../media/logo-social-tiktok.png | Bin .../media/logo-social-twitter.png | Bin .../media/logo-social-youtube.png | Bin .../media/logo-transparent.png | Bin docs/CHANGELOG.md | 12 + docs/api/RawDataReader.md | 203 +++++ docs/example/scatter_examples.py | 49 + docs/guide/RawDataReader.md | 170 ++++ docs/{user_guide.md => guide/plot.md} | 41 +- docs/guide/support_voc.md | 125 +++ docs/index.md | 133 +++ install_mac_linux.sh | 52 -- install_windows.bat | 71 -- pyproject.toml | 82 ++ requirements.txt | 8 - requirements/requirements-dev.txt | 1 + requirements/requirements-docs.txt | 1 + requirements/requirements.txt | 1 + scripts/install_unix.sh | 53 ++ scripts/install_windows.bat | 73 ++ setup.py | 35 +- tests/test_RawDataReader.py | 121 +++ tests/test_import.py | 19 + 180 files changed, 10138 insertions(+), 9419 deletions(-) create mode 100644 .github/workflows/cleanup.yml create mode 100644 .github/workflows/publish.yml create mode 100644 .github/workflows/pytest.yml rename AeroViz/{config => data}/DEFAULT_DATA.csv (99%) rename AeroViz/{config => data}/DEFAULT_PNSD_DATA.csv (100%) create mode 100644 AeroViz/data/hysplit_example_data.txt mode change 100644 => 100755 AeroViz/dataProcess/Chemistry/isrpia2.exe create mode 100644 AeroViz/dataProcess/Optical/Angstrom_exponent.py create mode 100644 AeroViz/dataProcess/Optical/PyMieScatt_update.py create mode 100644 AeroViz/dataProcess/Optical/mie_theory.py create mode 100644 AeroViz/dataProcess/SizeDistr/prop.py rename AeroViz/dataProcess/VOC/{voc_par.json => support_voc.json} (92%) delete mode 100644 AeroViz/dataProcess/VOC/voc_par.pkl create mode 100644 AeroViz/plot/bar.py create mode 100644 AeroViz/plot/box.py delete mode 100644 AeroViz/plot/improve/__init__.py delete mode 100644 AeroViz/plot/improve/improve.py create mode 100644 AeroViz/plot/meteorology/CBPF.py create mode 100644 AeroViz/plot/meteorology/hysplit.py delete mode 100644 AeroViz/plot/meteorology/meteorology.py create mode 100644 AeroViz/plot/meteorology/wind_rose.py delete mode 100644 AeroViz/plot/optical/aethalometer.py create mode 100644 AeroViz/plot/pie.py create mode 100644 AeroViz/plot/radar.py create mode 100644 AeroViz/plot/regression.py create mode 100644 AeroViz/plot/scatter.py create mode 100644 AeroViz/plot/templates/ammonium_rich.py delete mode 100644 AeroViz/plot/templates/event_evolution.py delete mode 100644 AeroViz/plot/templates/regression.py delete mode 100644 AeroViz/plot/templates/scatter.py delete mode 100644 AeroViz/plot/templates/templates.py create mode 100644 AeroViz/plot/timeseries/template.py delete mode 100644 AeroViz/plot/utils/_decorator.py create mode 100644 AeroViz/plot/utils/plt_utils.py create mode 100644 AeroViz/plot/utils/sklearn_utils.py create mode 100644 AeroViz/plot/violin.py delete mode 100644 AeroViz/process/__init__.py delete mode 100644 AeroViz/process/core/DataProc.py delete mode 100644 AeroViz/process/core/SizeDist.py delete mode 100644 AeroViz/process/core/__init__.py delete mode 100644 AeroViz/process/method/PyMieScatt_update.py delete mode 100644 AeroViz/process/method/__init__.py delete mode 100644 AeroViz/process/method/mie_theory.py delete mode 100644 AeroViz/process/method/prop.py delete mode 100644 AeroViz/process/script/AbstractDistCalc.py delete mode 100644 AeroViz/process/script/Chemical.py delete mode 100644 AeroViz/process/script/IMPACT.py delete mode 100644 AeroViz/process/script/IMPROVE.py delete mode 100644 AeroViz/process/script/Others.py delete mode 100644 AeroViz/process/script/PSD.py delete mode 100644 AeroViz/process/script/PSD_dry.py delete mode 100644 AeroViz/process/script/__init__.py delete mode 100644 AeroViz/process/script/retrieve_RI.py create mode 100644 AeroViz/rawDataReader/config/__init__.py create mode 100644 AeroViz/rawDataReader/config/supported_instruments.py create mode 100644 AeroViz/rawDataReader/core/logger.py create mode 100644 AeroViz/rawDataReader/core/qc.py create mode 100644 AeroViz/rawDataReader/script/EPA.py delete mode 100644 AeroViz/rawDataReader/script/EPA_vertical.py create mode 100644 AeroViz/rawDataReader/script/IGAC.py delete mode 100644 AeroViz/rawDataReader/script/IGAC_TH.py delete mode 100644 AeroViz/rawDataReader/script/IGAC_ZM.py create mode 100644 AeroViz/rawDataReader/script/Minion.py create mode 100644 AeroViz/rawDataReader/script/OCEC.py delete mode 100644 AeroViz/rawDataReader/script/OCEC_LCRES.py delete mode 100644 AeroViz/rawDataReader/script/OCEC_RES.py create mode 100644 AeroViz/rawDataReader/script/SMPS.py delete mode 100644 AeroViz/rawDataReader/script/SMPS_TH.py delete mode 100644 AeroViz/rawDataReader/script/SMPS_aim11.py delete mode 100644 AeroViz/rawDataReader/script/SMPS_genr.py delete mode 100644 AeroViz/rawDataReader/script/Table.py create mode 100644 AeroViz/rawDataReader/script/VOC.py delete mode 100644 AeroViz/rawDataReader/script/VOC_TH.py delete mode 100644 AeroViz/rawDataReader/script/VOC_ZM.py create mode 100644 AeroViz/rawDataReader/script/XRF.py delete mode 100644 AeroViz/rawDataReader/utils/config.py delete mode 100644 AeroViz/tools/datareader.py delete mode 100644 asserts/diagram.py rename {asserts => assets}/figure/IMPROVE_MLR.png (100%) rename {asserts => assets}/figure/IMPROVE_donut.png (100%) rename {asserts => assets}/figure/IMPROVE_donuts.png (100%) rename {asserts => assets}/figure/IMPROVE_pie.png (100%) rename {asserts => assets}/figure/Mie_MEE.png (100%) rename {asserts => assets}/figure/Mie_Q.png (100%) rename {asserts => assets}/figure/OverPSD.png (100%) rename {asserts => assets}/figure/corr_matrix.png (100%) rename {asserts => assets}/figure/psd_3D.png (100%) rename {asserts => assets}/figure/scatter.png (100%) rename {asserts => assets}/figure/windrose_CBPF.png (100%) rename {asserts => assets}/media/logo-social-discord.png (100%) rename {asserts => assets}/media/logo-social-github.png (100%) rename {asserts => assets}/media/logo-social-instagram.png (100%) rename {asserts => assets}/media/logo-social-linkedin.png (100%) rename {asserts => assets}/media/logo-social-medium.png (100%) rename {asserts => assets}/media/logo-social-threads.png (100%) rename {asserts => assets}/media/logo-social-tiktok.png (100%) rename {asserts => assets}/media/logo-social-twitter.png (100%) rename {asserts => assets}/media/logo-social-youtube.png (100%) rename {asserts => assets}/media/logo-transparent.png (100%) create mode 100644 docs/CHANGELOG.md create mode 100644 docs/api/RawDataReader.md create mode 100644 docs/example/scatter_examples.py create mode 100644 docs/guide/RawDataReader.md rename docs/{user_guide.md => guide/plot.md} (80%) create mode 100644 docs/guide/support_voc.md create mode 100644 docs/index.md delete mode 100755 install_mac_linux.sh delete mode 100644 install_windows.bat create mode 100644 pyproject.toml delete mode 100644 requirements.txt create mode 100644 requirements/requirements-dev.txt create mode 100644 requirements/requirements-docs.txt create mode 100644 requirements/requirements.txt create mode 100755 scripts/install_unix.sh create mode 100644 scripts/install_windows.bat create mode 100644 tests/test_RawDataReader.py create mode 100644 tests/test_import.py diff --git a/.github/workflows/cleanup.yml b/.github/workflows/cleanup.yml new file mode 100644 index 0000000..8b17896 --- /dev/null +++ b/.github/workflows/cleanup.yml @@ -0,0 +1,103 @@ +# .github/workflows/cleanup.yml +name: Repository Cleanup + +on: + workflow_dispatch: + inputs: + action_type: + description: '選擇要執行的操作' + required: true + type: choice + options: + - 'Cleanup Workflow' + - 'Cleanup Deployments' + workflow_status: + description: '要清理的工作流程狀態 (僅在選擇 Cleanup Workflow 時需要)' + required: false + type: choice + options: + - 'disabled' # 已停用的工作流程 + - 'active' # 活躍的工作流程 + - 'all' # 所有工作流程 + environment: + description: '要清理的部署環境 (僅在選擇 Cleanup Deployments 時需要)' + required: false + type: choice + options: + - 'all' + - 'github-pages' + - 'pypi' + +jobs: + cleanup-workflows: + if: ${{ github.event.inputs.action_type == 'Cleanup Workflow' }} + runs-on: ubuntu-latest + permissions: + actions: write + steps: + - name: Cleanup workflows + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const status = '${{ github.event.inputs.workflow_status }}'; + console.log(`Cleaning up workflows with status: ${status}`); + + // 獲取所有工作流程 + const workflows = await github.rest.actions.listRepoWorkflows({ + owner: context.repo.owner, + repo: context.repo.repo + }); + + for (const workflow of workflows.data.workflows) { + // 根據選擇的狀態過濾工作流程 + if (status === 'all' || + (status === 'disabled' && !workflow.state === 'active') || + (status === 'active' && workflow.state === 'active')) { + + console.log(`Processing workflow: ${workflow.name} (${workflow.state})`); + + // 獲取此工作流程的所有運行 + const runs = await github.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: workflow.id, + }); + + // 刪除運行 + console.log(`Found ${runs.data.total_count} runs to delete`); + for (const run of runs.data.workflow_runs) { + console.log(`Deleting run #${run.run_number} of ${workflow.name}`); + await github.rest.actions.deleteWorkflowRun({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: run.id + }); + } + } + } + console.log('Cleanup completed'); + + cleanup-deployments: + if: ${{ github.event.inputs.action_type == 'Cleanup Deployments' }} + runs-on: ubuntu-latest + permissions: + deployments: write + actions: write + contents: write + steps: + - name: Delete github-pages deployments + if: ${{ github.event.inputs.environment == 'github-pages' || github.event.inputs.environment == 'all' }} + uses: strumwolf/delete-deployment-environment@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + environment: github-pages + onlyRemoveDeployments: true + + - name: Delete pypi deployments + if: ${{ github.event.inputs.environment == 'pypi' || github.event.inputs.environment == 'all' }} + uses: strumwolf/delete-deployment-environment@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + environment: pypi + onlyRemoveDeployments: true \ No newline at end of file diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..99602c0 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,135 @@ +name: Publish AeroViz + +on: + push: + tags: + - 'v*' + +jobs: + build-and-test: + strategy: + matrix: + python-version: [ "3.11", "3.12" ] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel build + pip install -e . + pip install -e ".[test]" + + - name: Run tests + run: | + pytest tests/ -m "not requires_data" + + - name: Verify package version matches tag + run: | + TAG_VERSION=${GITHUB_REF#refs/tags/v} + PACKAGE_VERSION=$(python setup.py --version) + + if [ "$PACKAGE_VERSION" != "$TAG_VERSION" ]; then + echo "Version mismatch:" + echo " - Tag version: $TAG_VERSION" + echo " - Package version: $PACKAGE_VERSION" + exit 1 + else + echo "Version match: $TAG_VERSION" + fi + + - name: Build package + run: python -m build + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions-${{ matrix.python-version }} + path: dist/ + + publish-test: + needs: build-and-test + runs-on: ubuntu-latest + environment: + name: test-pypi + url: https://test.pypi.org/p/AeroViz + permissions: + id-token: write + + steps: + # Download artifacts from Python 3.12 build only + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: python-package-distributions-3.12 + path: dist/ + + - name: Publish to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + + publish-prod: + needs: publish-test + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/AeroViz + permissions: + id-token: write + + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: python-package-distributions-3.12 + path: dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + github-release: + name: Create GitHub Release + needs: publish-prod + runs-on: ubuntu-latest + permissions: + contents: write + id-token: write + + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: python-package-distributions-3.12 + path: dist/ + + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v2.1.1 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release create + '${{ github.ref_name }}' + --repo '${{ github.repository }}' + --notes "Release ${{ github.ref_name }}" + + - name: Upload artifacts to GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release upload + '${{ github.ref_name }}' dist/** + --repo '${{ github.repository }}' \ No newline at end of file diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 0000000..99829c3 --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,49 @@ +name: Python Tests + +on: + push: + branches: [ main, master ] + pull_request: + branches: [ main, master ] + +jobs: + test: + strategy: + matrix: + python-version: [ "3.11", "3.12" ] + os: [ ubuntu-latest ] + + fail-fast: false + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.XX + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' # 啟用 pip 緩存加速安裝 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install -e ".[test]" + + - name: Run tests with coverage + run: | + pytest tests/ -m "not requires_data" \ + --cov=AeroViz \ + --cov-report=term-missing \ + --cov-report=xml \ + -v + + - name: Upload coverage reports + uses: actions/upload-artifact@v4 + with: + name: coverage-report-${{ matrix.python-version }}-${{ github.sha }} + path: coverage.xml + if-no-files-found: error diff --git a/.gitignore b/.gitignore index f2646fd..face19f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,7 @@ -# # MacOX product .DS_store -tests/ temp/ -example/ # Byte-compiled / optimized / DLL files __pycache__/ @@ -35,7 +32,7 @@ share/python-wheels/ MANIFEST # PyInstaller -# Usually these files are written by a python script from a templates +# Usually these files are written by a python script from a data # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec @@ -161,7 +158,7 @@ dmypy.json cython_debug/ # PyCharm -# JetBrains specific templates is maintained in a separate JetBrains.gitignore that can +# JetBrains specific data is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. diff --git a/AeroViz/__init__.py b/AeroViz/__init__.py index baa411e..8063006 100644 --- a/AeroViz/__init__.py +++ b/AeroViz/__init__.py @@ -1,12 +1,13 @@ # This file is used to import all the modules in the AeroViz package from AeroViz import plot -from AeroViz.dataProcess import Optical, SizeDistr, Chemistry, VOC +from AeroViz.dataProcess import DataProcess from AeroViz.rawDataReader import RawDataReader -from AeroViz.tools import DataBase, DataReader, DataClassifier +from AeroViz.tools import DataBase, DataClassifier __all__ = [ - 'plot', - 'RawDataReader', - 'Optical', 'SizeDistr', 'Chemistry', 'VOC', - 'DataBase', 'DataReader', 'DataClassifier' + 'plot', + 'RawDataReader', + 'DataProcess', + 'DataBase', + 'DataClassifier' ] diff --git a/AeroViz/config/DEFAULT_DATA.csv b/AeroViz/data/DEFAULT_DATA.csv similarity index 99% rename from AeroViz/config/DEFAULT_DATA.csv rename to AeroViz/data/DEFAULT_DATA.csv index d8efbe4..3305954 100644 --- a/AeroViz/config/DEFAULT_DATA.csv +++ b/AeroViz/data/DEFAULT_DATA.csv @@ -1,4 +1,4 @@ -Time,SO2,NO,NOx,NO2,CO,O3,THC,CH4,NMHC,PM10,PM25,WS,WD,AT,RH,RT,Benzene,Toluene,EthylBenzene,m/p-Xylene,o-Xylene,Si,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Ga,As,Se,Br,Sr,Ag,Cd,Sn,Sb,Ba,Hg,Tl,Pb,NH3,HF,HCl,HNO2,HNO3,G-SO2,Na+,NH4+,K+,Mg2+,Ca2+,F-,Cl-,NO2-,NO3-,PO43-,SO42-,Extinction,Scattering,Absorption,MEE,MSE,MAE,SSA,SAE450700,AAE370880,Vis_Naked,Vis_LPV,BC,VC,PBLH,T_OC,T_EC,O_OC,O_EC,POC,SOC,NOR,SOR,PM1,ALWC,pH,NH4_status,AS,AN,OM,Soil,SS,EC,SIA,total_mass,unknown_mass,AS_ratio,AN_ratio,OM_ratio,Soil_ratio,SS_ratio,EC_ratio,SIA_ratio,unknown_mass_ratio,AS_volume,AN_volume,OM_volume,Soil_volume,SS_volume,EC_volume,total_volume,AS_volume_ratio,AN_volume_ratio,OM_volume_ratio,Soil_volume_ratio,SS_volume_ratio,EC_volume_ratio,density,ALWC_volume_ratio,gRH,k_amb,k_dry,kappa_chem,kappa_vam,n_amb,n_dry,AS_ext_dry,AN_ext_dry,OM_ext_dry,Soil_ext_dry,SS_ext_dry,EC_ext_dry,total_ext_dry,AS_ext,AN_ext,OM_ext,Soil_ext,SS_ext,EC_ext,total_ext,ALWC_AS_ext,ALWC_AN_ext,ALWC_SS_ext,ALWC_ext,fRH_IMPR,ScatteringByGas,AbsorptionByGas,ExtinctionByGas,Number,GMDn,GSDn,mode_n,ultra_n,accum_n,coarse_n,Surface,GMDs,GSDs,mode_s,ultra_s,accum_s,coarse_s,Volume,GMDv,GSDv,mode_v,ultra_v,accum_v,coarse_v,Bext_internal,GMDext_in,GSDext_in,mode_ext_in,ultra_ext_in,accum_ext_in,coarse_ext_in,Bsca_internal,Babs_internal,Bext_external,GMDext_ex,GSDext_ex,mode_ext_ex,ultra_ext_ex,accum_ext_ex,coarse_ext_ex,Bsca_external,Babs_external,Bext_Fixed_PNSD,Bext_Fixed_RI,PG,MAC,Ox,N2O5_tracer,Vis_cal,OCEC_ratio,PM1/PM25,MEE_PNSD +Time,SO2,NO,NOx,NO2,CO,O3,THC,CH4,NMHC,PM10,PM2.5,WS,WD,AT,RH,RT,Benzene,Toluene,EthylBenzene,m/p-Xylene,o-Xylene,Si,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Ga,As,Se,Br,Sr,Ag,Cd,Sn,Sb,Ba,Hg,Tl,Pb,NH3,HF,HCl,HNO2,HNO3,G-SO2,Na+,NH4+,K+,Mg2+,Ca2+,F-,Cl-,NO2-,NO3-,PO43-,SO42-,Extinction,Scattering,Absorption,MEE,MSE,MAE,SSA,SAE450700,AAE370880,Vis_Naked,Vis_LPV,BC,VC,PBLH,T_OC,T_EC,O_OC,O_EC,POC,SOC,NOR,SOR,PM1,ALWC,pH,NH4_status,AS,AN,OM,Soil,SS,EC,SIA,total_mass,unknown_mass,AS_ratio,AN_ratio,OM_ratio,Soil_ratio,SS_ratio,EC_ratio,SIA_ratio,unknown_mass_ratio,AS_volume,AN_volume,OM_volume,Soil_volume,SS_volume,EC_volume,total_volume,AS_volume_ratio,AN_volume_ratio,OM_volume_ratio,Soil_volume_ratio,SS_volume_ratio,EC_volume_ratio,density,ALWC_volume_ratio,gRH,k_amb,k_dry,kappa_chem,kappa_vam,n_amb,n_dry,AS_ext_dry,AN_ext_dry,OM_ext_dry,Soil_ext_dry,SS_ext_dry,EC_ext_dry,total_ext_dry,AS_ext,AN_ext,OM_ext,Soil_ext,SS_ext,EC_ext,total_ext,ALWC_AS_ext,ALWC_AN_ext,ALWC_SS_ext,ALWC_ext,fRH_IMPR,ScatteringByGas,AbsorptionByGas,ExtinctionByGas,Number,GMDn,GSDn,mode_n,ultra_n,accum_n,coarse_n,Surface,GMDs,GSDs,mode_s,ultra_s,accum_s,coarse_s,Volume,GMDv,GSDv,mode_v,ultra_v,accum_v,coarse_v,Bext_internal,GMDext_in,GSDext_in,mode_ext_in,ultra_ext_in,accum_ext_in,coarse_ext_in,Bsca_internal,Babs_internal,Bext_external,GMDext_ex,GSDext_ex,mode_ext_ex,ultra_ext_ex,accum_ext_ex,coarse_ext_ex,Bsca_external,Babs_external,Bext_Fixed_PNSD,Bext_Fixed_RI,PG,MAC,Ox,N2O5_tracer,Vis_cal,OCEC_ratio,PM1/PM25,MEE_PNSD 2021-02-01 00:00:00,2.5,10.4,53.0,42.6,1.3,14.6,2.2,2.0,0.2,84.0,56.0,1.3,306.0,20.5,76.8,24.4,0.99,2.67,0.19,0.68,0.21,,,,,,,,,,,,,,,,,,,,,,,,12.5774,,0.5693,0.4759,,0.0714,0.4765,11.6625,0.0743,0.2798,0.2885,,0.1486,0.5551,6.4869,,2.9681,179.879,129.306,50.573,3.212125969,2.309035714,0.903090254,0.718849677,1.624,1.356,,2.4,3593.466667,48.54,37.339,0.540278143,0.169467395,,,7.108993122,2.413756878,0.056,0.229,35.56,15.537361,3.88663594,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.3805792163543,14.058,25.4385792163544,65073.8724853036,26.1370756001682,2.27387008559594,11.8,0.9,0.1,0.0,1056999978.60697,202.561522810954,2.46787275826095,175.1255164,0.17,0.8,0.03,51861790000.2695,421.511551165032,2.3298013391531,378.4899973,0.03,0.8,0.16,,,,,,,,,,,,,,,,,,,,,205.317579216354,298.423186359831,57.2,621.96,6.09298472862313,,0.635, 2021-02-01 01:00:00,2.1,1.8,36.7,34.8,0.9,21.1,2.2,2.1,0.1,73.0,49.0,1.2,291.0,19.7,80.5,24.4,1.14,1.84,0.12,0.43,0.11,,,,,,,,,,,,,,,,,,,,,,,,12.0403,,0.5965,0.3095,,0.0355,0.4456,11.057,0.0568,0.284,0.2534,,0.1092,0.2621,5.8583,,2.8003,162.183,120.322,41.861,3.309852291,2.45555102,0.854301271,0.741891421,1.668,1.285,10.0,2.4,3008.316667,50.13,41.775,0.466460746,0.148629793,,,6.036647232,1.923627768,0.061,0.25,32.2,18.378917,3.919787846,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.4116843184148,11.484,22.8956843184148,42275.3347651561,32.3417554250119,2.43890896537368,11.8,0.85,0.15,0.0,979132241.788556,213.495369564376,2.34216600590469,192.8357499,0.16,0.81,0.03,49066097131.0516,420.683242663998,2.27756054854188,378.4899973,0.03,0.81,0.15,,,,,,,,,,,,,,,,,,,,,185.078684318415,281.646089623498,55.9,734.28,6.75779828958646,,0.657142857142857, 2021-02-01 02:00:00,2.9,9.8,61.0,51.1,0.99,10.7,2.4,2.1,0.3,94.0,70.0,1.3,299.0,19.4,82.8,24.4,1.08,1.98,0.14,0.14,0.12,0.479322,0.013841,0.001037,0.002118,0.026962,0.49815,,,0.039141,0.140642,,0.008099,0.003098,0.023387,,,,0.018278,0.011566,0.005437,,,0.009238,12.0026,,0.3118,0.2484,,0.0514,0.424,12.8777,0.0656,0.2885,0.2404,,0.1137,0.4371,8.3928,,2.7932,208.59,158.844,49.746,2.979859428,2.2692,0.710659428,0.761512432,1.75,1.31,,2.55,3570.75,58.135,44.719,0.624357056,0.20177276,,,7.06735645,2.12126855,0.06,0.194,34.93,24.048226,3.879511152,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.4233926128591,16.863,28.2863926128591,62303.8712944327,30.3238605222433,2.49086526854534,11.8,0.85,0.15,0.0,1391459557.50536,211.541054105552,2.30001297386085,186.741787,0.15,0.82,0.02,67916033048.9499,402.359688194295,2.22606883392695,366.5290201,0.03,0.84,0.13,,,,,,,,,,,,,,,,,,,,,236.876392612859,246.544677289442,61.8,546.77,5.25432666954312,,0.499, diff --git a/AeroViz/config/DEFAULT_PNSD_DATA.csv b/AeroViz/data/DEFAULT_PNSD_DATA.csv similarity index 100% rename from AeroViz/config/DEFAULT_PNSD_DATA.csv rename to AeroViz/data/DEFAULT_PNSD_DATA.csv diff --git a/AeroViz/data/hysplit_example_data.txt b/AeroViz/data/hysplit_example_data.txt new file mode 100644 index 0000000..e888558 --- /dev/null +++ b/AeroViz/data/hysplit_example_data.txt @@ -0,0 +1,101 @@ + 1 1 + AWRF 24 1 25 0 0 + 4 BACKWARD OMEGA + 24 2 27 16 22.630 120.346 10.0 + 24 2 27 16 22.630 120.346 100.0 + 24 2 27 16 22.630 120.346 500.0 + 24 2 27 16 22.630 120.346 1000.0 + 1 PRESSURE + 1 1 24 2 27 16 0 808 0.0 22.630 120.346 10.0 1018.1 + 2 1 24 2 27 16 0 808 0.0 22.630 120.346 100.0 1007.1 + 3 1 24 2 27 16 0 808 0.0 22.630 120.346 500.0 961.2 + 4 1 24 2 27 16 0 808 0.0 22.630 120.346 1000.0 906.2 + 1 1 24 2 27 15 0 807 -1.0 22.700 120.337 13.4 1016.4 + 2 1 24 2 27 15 0 807 -1.0 22.779 120.325 115.8 1004.4 + 3 1 24 2 27 15 0 807 -1.0 22.615 120.313 471.0 964.8 + 4 1 24 2 27 15 0 807 -1.0 22.535 120.469 1035.2 902.9 + 1 1 24 2 27 14 0 806 -2.0 22.795 120.323 24.3 1015.4 + 2 1 24 2 27 14 0 806 -2.0 23.026 120.322 128.8 1001.6 + 3 1 24 2 27 14 0 806 -2.0 22.606 120.315 471.5 964.7 + 4 1 24 2 27 14 0 806 -2.0 22.484 120.626 1025.3 893.1 + 1 1 24 2 27 13 0 805 -3.0 22.936 120.308 56.6 1010.4 + 2 1 24 2 27 13 0 805 -3.0 23.275 120.355 142.2 1000.7 + 3 1 24 2 27 13 0 805 -3.0 22.612 120.341 492.9 961.7 + 4 1 24 2 27 13 0 805 -3.0 22.490 120.729 641.9 860.0 + 1 1 24 2 27 12 0 804 -4.0 23.133 120.283 74.9 1010.2 + 2 1 24 2 27 12 0 804 -4.0 23.491 120.413 187.7 994.2 + 3 1 24 2 27 12 0 804 -4.0 22.670 120.382 482.0 960.7 + 4 1 24 2 27 12 0 804 -4.0 22.538 120.774 830.3 801.8 + 1 1 24 2 27 11 0 803 -5.0 23.363 120.276 94.7 1007.0 + 2 1 24 2 27 11 0 803 -5.0 23.702 120.464 202.2 992.8 + 3 1 24 2 27 11 0 803 -5.0 22.708 120.455 327.6 977.5 + 4 1 24 2 27 11 0 803 -5.0 22.576 120.736 629.4 831.5 + 1 1 24 2 27 10 0 802 -6.0 23.611 120.281 123.4 1003.6 + 2 1 24 2 27 10 0 802 -6.0 23.924 120.507 235.0 988.9 + 3 1 24 2 27 10 0 802 -6.0 22.691 120.521 95.4 1004.1 + 4 1 24 2 27 10 0 802 -6.0 22.631 120.727 385.5 834.4 + 1 1 24 2 27 9 0 801 -7.0 23.910 120.338 185.0 996.9 + 2 1 24 2 27 9 0 801 -7.0 24.136 120.570 314.7 974.2 + 3 1 24 2 27 9 0 801 -7.0 22.702 120.585 125.1 999.7 + 4 1 24 2 27 9 0 801 -7.0 22.669 120.749 200.9 817.0 + 1 1 24 2 27 8 0 800 -8.0 24.287 120.521 321.6 982.5 + 2 1 24 2 27 8 0 800 -8.0 24.195 120.654 531.7 942.2 + 3 1 24 2 27 8 0 800 -8.0 22.707 120.608 170.4 993.9 + 4 1 24 2 27 8 0 800 -8.0 22.689 120.756 313.0 822.5 + 1 1 24 2 27 7 0 799 -9.0 24.612 120.810 428.8 969.2 + 2 1 24 2 27 7 0 799 -9.0 24.193 120.736 697.7 917.4 + 3 1 24 2 27 7 0 799 -9.0 22.719 120.610 229.7 986.5 + 4 1 24 2 27 7 0 799 -9.0 22.689 120.751 248.2 833.6 + 1 1 24 2 27 6 0 798 -10.0 24.859 121.146 538.3 932.4 + 2 1 24 2 27 6 0 798 -10.0 24.145 120.757 664.1 919.1 + 3 1 24 2 27 6 0 798 -10.0 22.738 120.605 305.9 978.9 + 4 1 24 2 27 6 0 798 -10.0 22.711 120.761 0.0 888.3 + 1 1 24 2 27 5 0 797 -11.0 24.984 121.430 392.9 975.2 + 2 1 24 2 27 5 0 797 -11.0 24.074 120.749 654.4 923.5 + 3 1 24 2 27 5 0 797 -11.0 22.731 120.603 495.6 958.3 + 4 1 24 2 27 5 0 797 -11.0 22.700 120.785 175.9 829.1 + 1 1 24 2 27 4 0 796 -12.0 25.158 121.638 503.0 954.6 + 2 1 24 2 27 4 0 796 -12.0 23.989 120.737 641.7 927.8 + 3 1 24 2 27 4 0 796 -12.0 22.693 120.592 648.1 943.6 + 4 1 24 2 27 4 0 796 -12.0 22.685 120.810 145.4 853.0 + 1 1 24 2 27 3 0 795 -13.0 25.451 121.825 386.1 978.5 + 2 1 24 2 27 3 0 795 -13.0 23.906 120.739 645.3 929.6 + 3 1 24 2 27 3 0 795 -13.0 22.657 120.583 682.1 940.8 + 4 1 24 2 27 3 0 795 -13.0 22.655 120.810 8.7 903.0 + 1 1 24 2 27 2 0 794 -14.0 25.789 121.989 450.1 971.0 + 2 1 24 2 27 2 0 794 -14.0 23.854 120.722 640.7 924.2 + 3 1 24 2 27 2 0 794 -14.0 22.618 120.552 686.5 941.2 + 4 1 24 2 27 2 0 794 -14.0 22.628 120.817 147.7 890.5 + 1 1 24 2 27 1 0 793 -15.0 26.127 122.136 554.7 959.4 + 2 1 24 2 27 1 0 793 -15.0 23.815 120.684 526.0 936.4 + 3 1 24 2 27 1 0 793 -15.0 22.537 120.538 763.0 933.1 + 4 1 24 2 27 1 0 793 -15.0 22.630 120.825 156.0 898.7 + 1 1 24 2 27 0 0 792 -16.0 26.425 122.289 711.1 940.7 + 2 1 24 2 27 0 0 792 -16.0 23.793 120.665 527.9 942.8 + 3 1 24 2 27 0 0 792 -16.0 22.498 120.642 901.2 897.5 + 4 1 24 2 27 0 0 792 -16.0 22.633 120.822 130.8 898.3 + 1 1 24 2 26 23 0 791 -17.0 26.705 122.454 872.6 921.4 + 2 1 24 2 26 23 0 791 -17.0 23.798 120.671 524.1 940.3 + 3 1 24 2 26 23 0 791 -17.0 22.514 120.689 734.8 863.9 + 4 1 24 2 26 23 0 791 -17.0 22.628 120.825 80.9 904.3 + 2 1 24 2 26 22 0 790 -18.0 23.816 120.688 493.1 939.0 + 3 1 24 2 26 22 0 790 -18.0 22.559 120.685 771.9 865.4 + 4 1 24 2 26 22 0 790 -18.0 22.623 120.829 56.0 907.8 + 2 1 24 2 26 21 0 789 -19.0 23.836 120.711 427.5 946.0 + 3 1 24 2 26 21 0 789 -19.0 22.557 120.666 660.4 899.0 + 4 1 24 2 26 21 0 789 -19.0 22.623 120.833 82.4 907.7 + 2 1 24 2 26 20 0 788 -20.0 23.865 120.751 489.8 941.5 + 3 1 24 2 26 20 0 788 -20.0 22.525 120.624 588.1 942.1 + 4 1 24 2 26 20 0 788 -20.0 22.636 120.848 98.3 914.3 + 2 1 24 2 26 19 0 787 -21.0 23.874 120.791 693.5 909.2 + 3 1 24 2 26 19 0 787 -21.0 22.492 120.608 624.9 943.0 + 4 1 24 2 26 19 0 787 -21.0 22.667 120.865 193.7 891.2 + 2 1 24 2 26 18 0 786 -22.0 23.874 120.793 743.4 902.1 + 3 1 24 2 26 18 0 786 -22.0 22.482 120.601 703.4 938.1 + 4 1 24 2 26 18 0 786 -22.0 22.747 120.984 287.0 906.5 + 2 1 24 2 26 17 0 785 -23.0 23.885 120.793 766.4 902.4 + 3 1 24 2 26 17 0 785 -23.0 22.471 120.582 842.0 922.5 + 4 1 24 2 26 17 0 785 -23.0 22.867 121.070 462.0 920.8 + 2 1 24 2 26 16 0 784 -24.0 23.895 120.778 859.2 898.6 + 3 1 24 2 26 16 0 784 -24.0 22.446 120.618 1090.1 893.0 + 4 1 24 2 26 16 0 784 -24.0 22.973 121.135 790.6 906.9 diff --git a/AeroViz/dataProcess/Chemistry/__init__.py b/AeroViz/dataProcess/Chemistry/__init__.py index 73657b1..30296d6 100644 --- a/AeroViz/dataProcess/Chemistry/__init__.py +++ b/AeroViz/dataProcess/Chemistry/__init__.py @@ -1,63 +1,64 @@ -from ..core import _writter, _run_process +from ..core import Writer, run_process -__all__ = [ +__all__ = ['Chemistry'] - 'Chemistry', -] +class Chemistry(Writer): + # Reconstruction + @run_process('Chemistry - reconstruction basic', 'reconstrc_basic') + def ReConstrc_basic(self, *df_chem, df_ref=None, df_water=None, df_density=None, nam_lst=None): + from ._mass_volume import _basic + if nam_lst is None: + nam_lst = ['NH4+', 'SO42-', 'NO3-', 'Fe', 'Na+', 'OC', 'EC'] -class Chemistry(_writter): + out = _basic(df_chem, df_ref, df_water, df_density, nam_lst=nam_lst) - ## Reconstruction - @_run_process('Chemistry - reconstruction basic', 'reconstrc_basic') - def ReConstrc_basic(self, *df_chem, df_ref=None, df_water=None, df_density=None, - nam_lst=['NH4+', 'SO42-', 'NO3-', 'Fe', 'Na+', 'OC', 'EC']): - from ._mass_volume import _basic + return self, out - out = _basic(df_chem, df_ref, df_water, df_density, nam_lst=nam_lst) + # Partition + @run_process('Chemistry - Partition', 'partition') + def Partition(self, *df_chem, nam_lst=None): + from ._partition import _basic - return self, out + if nam_lst is None: + nam_lst = ['NH4+', 'SO42-', 'NO3-', 'Cl-', 'NO2', 'HNO3', 'SO2', 'NH3', 'HCl', 'temp'] - ## Partition - @_run_process('Chemistry - Partition', 'partition') - def Partition(self, *df_chem, nam_lst=['NH4+', 'SO42-', 'NO3-', 'Cl-', 'NO2', 'HNO3', 'SO2', 'NH3', 'HCl', 'temp']): - from ._partition import _basic + out = _basic(df_chem, nam_lst=nam_lst) - out = _basic(df_chem, nam_lst=nam_lst) + return self, out - return self, out + # ISOROPIA + @run_process('Chemistry - ISOROPIA', 'isoropia') + def ISOROPIA(self, *df_chem, nam_lst=None): + from ._isoropia import _basic - ## ISOROPIA - @_run_process('Chemistry - ISOROPIA', 'isoropia') - def ISOROPIA(self, *df_chem, - nam_lst=['Na+', 'SO42-', 'NH4+', 'NO3-', 'Cl-', 'Ca2+', 'K+', 'Mg2+', 'NH3', 'HNO3', 'HCl', 'RH', - 'temp']): - from ._isoropia import _basic + if nam_lst is None: + nam_lst = ['Na+', 'SO42-', 'NH4+', 'NO3-', 'Cl-', 'Ca2+', + 'K+', 'Mg2+', 'NH3', 'HNO3', 'HCl', 'RH', 'temp'] - if self.path_out is None: - raise ValueError('Please Input "path_out" !!') + if self.path_out is None: + raise ValueError('Please Input "path_out" !!') - out = _basic(df_chem, self.path_out, nam_lst=nam_lst) + out = _basic(df_chem, self.path_out, nam_lst=nam_lst) - return self, out + return self, out - ## OCEC - @_run_process('Chemistry - OC/EC basic', 'ocec_basic') - def OCEC_basic(self, df_lcres, df_res, df_mass=None, ocec_ratio=None, ocec_ratio_month=1, hr_lim=200, - least_square_range=(0.1, 2.5, 0.1), WISOC_OC_range=(0.2, 0.7, 0.01), ): - from ._ocec import _basic + # OCEC + @run_process('Chemistry - OC/EC basic', 'ocec_basic') + def OCEC_basic(self, df_lcres, df_mass=None, ocec_ratio=None, ocec_ratio_month=1, hr_lim=200, + least_square_range=(0.1, 2.5, 0.1), WISOC_OC_range=(0.2, 0.7, 0.01), ): + from ._ocec import _basic - out = _basic(df_lcres, df_res, df_mass, ocec_ratio, ocec_ratio_month, hr_lim, least_square_range, - WISOC_OC_range) + out = _basic(df_lcres, df_mass, ocec_ratio, ocec_ratio_month, hr_lim, least_square_range, WISOC_OC_range) - return self, out + return self, out - ## TEOM - @_run_process('Chemistry - TEOM basic', 'teom_basic') - def TEOM_basic(self, df_teom, df_check=None): - from ._teom import _basic + # TEOM + @run_process('Chemistry - TEOM basic', 'teom_basic') + def TEOM_basic(self, df_teom, df_check=None): + from ._teom import _basic - out = _basic(df_teom, df_check) + out = _basic(df_teom, df_check) - return self, out + return self, out diff --git a/AeroViz/dataProcess/Chemistry/_calculate.py b/AeroViz/dataProcess/Chemistry/_calculate.py index 232ff57..f8e7c1b 100644 --- a/AeroViz/dataProcess/Chemistry/_calculate.py +++ b/AeroViz/dataProcess/Chemistry/_calculate.py @@ -1,27 +1,27 @@ -from pandas import concat, DataFrame +from pandas import concat # parameter _mol_wg = { - 'SO42-': 96.06, - 'NO3-': 62.00, - 'Cl-': 35.4, + 'SO42-': 96.06, + 'NO3-': 62.00, + 'Cl-': 35.4, - 'Ca2+': 40.078, - 'K+': 39.098, - 'Mg2+': 24.305, - 'Na+': 22.99, - 'NH4+': 18.04, + 'Ca2+': 40.078, + 'K+': 39.098, + 'Mg2+': 24.305, + 'Na+': 22.99, + 'NH4+': 18.04, } # ug -> umol def _ug2umol(_df): - _pt_ky = list(set(_df.keys()) & set(_mol_wg.keys())) - _gas_ky = list(set(_df.keys()) - set(_mol_wg.keys()) - set(['temp', 'RH'])) + _pt_ky = list(set(_df.keys()) & set(_mol_wg.keys())) + _gas_ky = list(set(_df.keys()) - set(_mol_wg.keys()) - set(['temp', 'RH'])) - _par = (_df['temp'].to_frame() + 273.15) * .082 + _par = (_df['temp'].to_frame() + 273.15) * .082 - _df_pt = concat([(_df[_ky] / _mol_wg[_ky]).copy() for _ky in _pt_ky], axis=1) - _df_gas = _df[_gas_ky] / _par.values + _df_pt = concat([(_df[_ky] / _mol_wg[_ky]).copy() for _ky in _pt_ky], axis=1) + _df_gas = _df[_gas_ky] / _par.values - return concat([_df_pt, _df_gas], axis=1) + return concat([_df_pt, _df_gas], axis=1) diff --git a/AeroViz/dataProcess/Chemistry/_isoropia.py b/AeroViz/dataProcess/Chemistry/_isoropia.py index d2c12ef..04d495d 100644 --- a/AeroViz/dataProcess/Chemistry/_isoropia.py +++ b/AeroViz/dataProcess/Chemistry/_isoropia.py @@ -1,99 +1,100 @@ +from pathlib import Path from subprocess import Popen, PIPE -from pandas import date_range, concat, DataFrame, to_numeric, read_csv -from ._calculate import _ug2umol + import numpy as np +from pandas import concat, DataFrame, to_numeric, read_csv -from pathlib import Path +from ._calculate import _ug2umol def _basic(df_che, path_out, nam_lst): - # parameter - df_all = concat(df_che, axis=1) - index = df_all.index.copy() - df_all.columns = nam_lst + # parameter + df_all = concat(df_che, axis=1) + index = df_all.index.copy() + df_all.columns = nam_lst - df_umol = _ug2umol(df_all) + df_umol = _ug2umol(df_all) - ## output - ## Na, SO4, NH3, NO3, Cl, Ca, K, Mg, RH, TEMP - df_input = DataFrame(index=index) - df_out = DataFrame(index=index) + # output + # Na, SO4, NH3, NO3, Cl, Ca, K, Mg, RH, TEMP + df_input = DataFrame(index=index) + df_out = DataFrame(index=index) - pth_input = path_out / '_temp_input.txt' - pth_output = path_out / '_temp_input.dat' + pth_input = path_out / '_temp_input.txt' + pth_output = path_out / '_temp_input.dat' - pth_input.unlink(missing_ok=True) - pth_output.unlink(missing_ok=True) + pth_input.unlink(missing_ok=True) + pth_output.unlink(missing_ok=True) - ## header - _header = 'Input units (0=umol/m3, 1=ug/m3)\n' + '0\n\n' + \ - 'Problem type (0=forward, 1=reverse); Phase state (0=solid+liquid, 1=metastable)\n' + '0, 1\n\n' + \ - 'NH4-SO4 system case\n' + # header + _header = 'Input units (0=umol/m3, 1=ug/m3)\n' + '0\n\n' + \ + 'Problem type (0=forward, 1=reverse); Phase state (0=solid+liquid, 1=metastable)\n' + '0, 1\n\n' + \ + 'NH4-SO4 system case\n' - ## software - path_iso = Path(__file__).parent / 'isrpia2.exe' + # software + path_iso = Path(__file__).parent / 'isrpia2.exe' - # make input file and output temp input (without index) - ## NH3 - df_input['NH3'] = df_umol['NH4+'].fillna(0).copy() + df_umol['NH3'] + # make input file and output temp input (without index) + # NH3 + df_input['NH3'] = df_umol['NH4+'].fillna(0).copy() + df_umol['NH3'] - ## NO3 - df_input['NO3'] = df_umol['HNO3'].fillna(0).copy() + df_umol['NO3-'] + # NO3 + df_input['NO3'] = df_umol['HNO3'].fillna(0).copy() + df_umol['NO3-'] - ## Cl - df_input['Cl'] = df_umol['HCl'].fillna(0).copy() + df_umol['Cl-'] + # Cl + df_input['Cl'] = df_umol['HCl'].fillna(0).copy() + df_umol['Cl-'] - ## temp, RH - df_input['RH'] = df_all['RH'] / 100 - df_input['TEMP'] = df_all['temp'] + 273.15 + # temp, RH + df_input['RH'] = df_all['RH'] / 100 + df_input['TEMP'] = df_all['temp'] + 273.15 - df_input[['Na', 'SO4', 'Ca', 'K', 'Mg']] = df_umol[['Na+', 'SO42-', 'Ca2+', 'K+', 'Mg2+']].copy() + df_input[['Na', 'SO4', 'Ca', 'K', 'Mg']] = df_umol[['Na+', 'SO42-', 'Ca2+', 'K+', 'Mg2+']].copy() - df_input = df_input[['Na', 'SO4', 'NH3', 'NO3', 'Cl', 'Ca', 'K', 'Mg', 'RH', 'TEMP']].fillna('-').copy() + df_input = df_input[['Na', 'SO4', 'NH3', 'NO3', 'Cl', 'Ca', 'K', 'Mg', 'RH', 'TEMP']].fillna('-').copy() - ## output the input data - df_input.to_csv(pth_input, index=False) - with (pth_input).open('r+', encoding='utf-8', errors='ignore') as _f: - _cont = _f.read() - _f.seek(0) + # output the input data + df_input.to_csv(pth_input, index=False) + with (pth_input).open('r+', encoding='utf-8', errors='ignore') as _f: + _cont = _f.read() + _f.seek(0) - _f.write(_header) - _f.write(_cont) + _f.write(_header) + _f.write(_cont) - # use ISOROPIA2 - run = Popen([path_iso], stdin=PIPE, stdout=PIPE, stderr=PIPE) - scrn_res, run_res = run.communicate(input=str(pth_input.resolve()).encode()) + # use ISOROPIA2 + run = Popen([path_iso], stdin=PIPE, stdout=PIPE, stderr=PIPE) + scrn_res, run_res = run.communicate(input=str(pth_input.resolve()).encode()) - # read dat file and transform to the normal name - cond_idx = df_all[['SO42-', 'NH4+', 'NO3-']].dropna().index + # read dat file and transform to the normal name + cond_idx = df_all[['SO42-', 'NH4+', 'NO3-']].dropna().index - with (pth_output).open('r', encoding='utf-8', errors='ignore') as f: - df_res = read_csv(f, delimiter='\s+').apply(to_numeric, errors='coerce').set_index(index) + with pth_output.open('r', encoding='utf-8', errors='ignore') as f: + df_res = read_csv(f, delimiter=r'\s+').apply(to_numeric, errors='coerce').set_index(index) - df_out['H'] = df_res['HLIQ'] / (df_res['WATER'] / 1000) + df_out['H'] = df_res['HLIQ'] / (df_res['WATER'] / 1000) - df_out.loc[cond_idx, 'pH'] = -np.log10(df_out['H'].loc[cond_idx]) - df_out['pH'] = df_out['pH'].where((df_all['RH'] <= 95) & (df_all['RH'] >= 20)) + df_out.loc[cond_idx, 'pH'] = -np.log10(df_out['H'].loc[cond_idx]) + df_out['pH'] = df_out['pH'].where((df_all['RH'] <= 95) & (df_all['RH'] >= 20)) - cond_idx = df_out['pH'].dropna().index - df_out.loc[cond_idx, 'ALWC'] = df_res['WATER'].loc[cond_idx] + cond_idx = df_out['pH'].dropna().index + df_out.loc[cond_idx, 'ALWC'] = df_res['WATER'].loc[cond_idx] - df_out[['NH3', 'HNO3', 'HCl', 'NH4+', 'NO3-', 'Cl-']] = df_res[ - ['GNH3', 'GHNO3', 'GHCL', 'NH4AER', 'NO3AER', 'CLAER']] + df_out[['NH3', 'HNO3', 'HCl', 'NH4+', 'NO3-', 'Cl-']] = df_res[ + ['GNH3', 'GHNO3', 'GHCL', 'NH4AER', 'NO3AER', 'CLAER']] - # calculate partition - # df_out['epls_NO3-'] = df_umol['NO3-'] / (df_umol['NO3-'] + df_umol['HNO3']) - # df_out['epls_NH4+'] = df_umol['NH4+'] / (df_umol['NH4+'] + df_umol['NH3']) - # df_out['epls_Cl-'] = df_umol['Cl-'] / (df_umol['Cl-'] + df_umol['HCl']) + # calculate partition + # df_out['epls_NO3-'] = df_umol['NO3-'] / (df_umol['NO3-'] + df_umol['HNO3']) + # df_out['epls_NH4+'] = df_umol['NH4+'] / (df_umol['NH4+'] + df_umol['NH3']) + # df_out['epls_Cl-'] = df_umol['Cl-'] / (df_umol['Cl-'] + df_umol['HCl']) - # remove _temp file (input and output) - pth_input.unlink(missing_ok=True) - pth_output.unlink(missing_ok=True) + # remove _temp file (input and output) + pth_input.unlink(missing_ok=True) + pth_output.unlink(missing_ok=True) - # output input and output - out = { - 'input': df_input, - 'output': df_out, - } + # output input and output + out = { + 'input': df_input, + 'output': df_out, + } - return out + return out diff --git a/AeroViz/dataProcess/Chemistry/_mass_volume.py b/AeroViz/dataProcess/Chemistry/_mass_volume.py index db295b2..0d9e785 100644 --- a/AeroViz/dataProcess/Chemistry/_mass_volume.py +++ b/AeroViz/dataProcess/Chemistry/_mass_volume.py @@ -1,175 +1,173 @@ -from pandas import date_range, concat, DataFrame, to_numeric +from pandas import concat, DataFrame def _basic(df_che, df_ref, df_water, df_density, nam_lst): - df_all = concat(df_che, axis=1) - index = df_all.index.copy() - df_all.columns = nam_lst - - ## parameter - mol_A, mol_S, mol_N = df_all['NH4+'] / 18, df_all['SO42-'] / 96, df_all['NO3-'] / 62 - df_all['status'] = (mol_A) / (2 * mol_S + mol_N) - - convert_nam = {'AS': 'SO42-', - 'AN': 'NO3-', - 'OM': 'OC', - 'Soil': 'Fe', - 'SS': 'Na+', - 'EC': 'EC', - } - - mass_coe = {'AS': 1.375, - 'AN': 1.29, - 'OM': 1.8, - 'Soil': 28.57, - 'SS': 2.54, - 'EC': 1, - } - - vol_coe = {'AS': 1.76, - 'AN': 1.73, - 'OM': 1.4, - 'Soil': 2.6, - 'SS': 2.16, - 'EC': 1.5, - } - - RI_coe = {'550': {'ALWC': 1.333 + 0j, - 'AS': 1.53 + 0j, - 'AN': 1.55 + 0j, - 'OM': 1.55 + 0.0163j, - 'Soil': 1.56 + 0.006j, - 'SS': 1.54 + 0j, - 'EC': 1.80 + 0.72j, - }, - - ## m + kj -> m value is same as 550 current - '450': {'ALWC': 1.333 + 0j, - 'AS': 1.57 + 0j, - 'AN': 1.57 + 0j, - 'OM': 1.58 + 0.056, - 'Soil': 1.56 + 0.009j, - 'SS': 1.54 + 0j, - 'EC': 1.80 + 0.79j, - }, - } - - ## mass - ## NH4 Enough - df_mass = DataFrame() - df_enough = df_all.where(df_all['status'] >= 1).dropna().copy() - - for _mass_nam, _coe in mass_coe.items(): - df_mass[_mass_nam] = df_all[convert_nam[_mass_nam]] * _coe - - ## NH4 Deficiency - defic_idx = df_all['status'] < 1 - - if defic_idx.any(): - residual = mol_A - 2 * mol_S - - ## residual > 0 - _status = residual > 0 - if _status.any(): - _cond = _status & (residual <= mol_N) - df_mass.loc[_cond, 'AN'] = residual.loc[_cond] * 80 - - _cond = _status & (residual > mol_N) - df_mass.loc[_cond, 'AN'] = mol_N.loc[_cond] * 80 - - ## residual < 0 - _status = residual <= 0 - if _status.any(): - df_mass.loc[_status, 'AN'] = 0 - - _cond = _status & (mol_A <= 2 * mol_S) - df_mass.loc[_cond, 'AS'] = mol_A.loc[_cond] / 2 * 132 - - _cond = _status & (mol_A > 2 * mol_S) - df_mass.loc[_cond, 'AS'] = mol_S.loc[_cond] * 132 - - df_mass_cal = df_mass.dropna().copy() - df_mass['total'] = df_mass.sum(axis=1, min_count=6) - - qc_ratio = df_mass['total'] / df_ref - qc_cond = (qc_ratio >= 0.7) & (qc_ratio <= 1.3) - - ## volume - df_vol = DataFrame() - for _vol_nam, _coe in vol_coe.items(): - df_vol[_vol_nam] = df_mass_cal[_vol_nam] / _coe - - if df_water is not None: - df_vol['ALWC'] = df_water - df_vol = df_vol.dropna() - df_vol['total_wet'] = df_vol.sum(axis=1, min_count=6) - - df_vol['total_dry'] = df_vol[vol_coe.keys()].sum(axis=1, min_count=6) - - ## density - df_vol_cal = DataFrame() - df_den_rec = df_mass['total'] / df_vol['total_dry'] - if df_density is not None: - df_den_all = concat([df_all[['SO42-', 'NO3-', 'NH4+', 'EC']], df_density, df_mass['OM']], axis=1).dropna() - - df_vol_cal = (df_den_all[['SO42-', 'NO3-', 'NH4+']].sum(axis=1) / 1.75) + \ - df_den_all['Cl-'] / 1.52 + \ - df_den_all['OM'] / 1.4 + df_den_all['EC'] / 1.77 - - df_den = df_den_all.sum(axis=1, min_count=6) / df_vol_cal - # df_den = df_den_all.sum(axis=1) / df_vol_cal - # df_den = df_mass['total'].loc[df_den_all.index] / df_vol_cal - - ## refractive index - ri_dic = {} - for _lambda, _coe in RI_coe.items(): - df_RI = DataFrame() + df_all = concat(df_che, axis=1) + index = df_all.index.copy() + df_all.columns = nam_lst + + # parameter + mol_A, mol_S, mol_N = df_all['NH4+'] / 18, df_all['SO42-'] / 96, df_all['NO3-'] / 62 + df_all['status'] = mol_A / (2 * mol_S + mol_N) + + convert_nam = {'AS': 'SO42-', + 'AN': 'NO3-', + 'OM': 'OC', + 'Soil': 'Fe', + 'SS': 'Na+', + 'EC': 'EC', + } + + mass_coe = {'AS': 1.375, + 'AN': 1.29, + 'OM': 1.8, + 'Soil': 28.57, + 'SS': 2.54, + 'EC': 1, + } + + vol_coe = {'AS': 1.76, + 'AN': 1.73, + 'OM': 1.4, + 'Soil': 2.6, + 'SS': 2.16, + 'EC': 1.5, + } + + RI_coe = {'550': {'ALWC': 1.333 + 0j, + 'AS': 1.53 + 0j, + 'AN': 1.55 + 0j, + 'OM': 1.55 + 0.0163j, + 'Soil': 1.56 + 0.006j, + 'SS': 1.54 + 0j, + 'EC': 1.80 + 0.72j, + }, + + # m + kj -> m value is same as 550 current + '450': {'ALWC': 1.333 + 0j, + 'AS': 1.57 + 0j, + 'AN': 1.57 + 0j, + 'OM': 1.58 + 0.056, + 'Soil': 1.56 + 0.009j, + 'SS': 1.54 + 0j, + 'EC': 1.80 + 0.79j, + }, + } + + # mass + # NH4 Enough + df_mass = DataFrame() + df_enough = df_all.where(df_all['status'] >= 1).dropna().copy() + + for _mass_nam, _coe in mass_coe.items(): + df_mass[_mass_nam] = df_all[convert_nam[_mass_nam]] * _coe + + # NH4 Deficiency + defic_idx = df_all['status'] < 1 + + if defic_idx.any(): + residual = mol_A - 2 * mol_S + + # residual > 0 + _status = residual > 0 + if _status.any(): + _cond = _status & (residual <= mol_N) + df_mass.loc[_cond, 'AN'] = residual.loc[_cond] * 80 + + _cond = _status & (residual > mol_N) + df_mass.loc[_cond, 'AN'] = mol_N.loc[_cond] * 80 + + # residual < 0 + _status = residual <= 0 + if _status.any(): + df_mass.loc[_status, 'AN'] = 0 + + _cond = _status & (mol_A <= 2 * mol_S) + df_mass.loc[_cond, 'AS'] = mol_A.loc[_cond] / 2 * 132 + + _cond = _status & (mol_A > 2 * mol_S) + df_mass.loc[_cond, 'AS'] = mol_S.loc[_cond] * 132 + + df_mass_cal = df_mass.dropna().copy() + df_mass['total'] = df_mass.sum(axis=1, min_count=6) + + qc_ratio = df_mass['total'] / df_ref + qc_cond = (qc_ratio >= 0.5) & (qc_ratio <= 1.5) + + # volume + df_vol = DataFrame() + for _vol_nam, _coe in vol_coe.items(): + df_vol[_vol_nam] = df_mass_cal[_vol_nam] / _coe + + if df_water is not None: + df_vol['ALWC'] = df_water.copy() + df_vol = df_vol.dropna() + df_vol['total_wet'] = df_vol.sum(axis=1, min_count=6) + + df_vol['total_dry'] = df_vol[vol_coe.keys()].sum(axis=1, min_count=6) + + # density + df_vol_cal = DataFrame() + df_den_rec = df_mass['total'] / df_vol['total_dry'] + if df_density is not None: + df_den_all = concat([df_all[['SO42-', 'NO3-', 'NH4+', 'EC']], df_density, df_mass['OM']], axis=1).dropna() + + df_vol_cal = (df_den_all[['SO42-', 'NO3-', 'NH4+']].sum(axis=1) / 1.75) + \ + df_den_all['Cl-'] / 1.52 + \ + df_den_all['OM'] / 1.4 + df_den_all['EC'] / 1.77 + + df_den = df_den_all.sum(axis=1, min_count=6) / df_vol_cal + else: + df_den = df_den_rec + + # refractive index + ri_dic = {} + for _lambda, _coe in RI_coe.items(): + + df_RI = DataFrame() + + for _ky, _df in df_vol.items(): + if 'total' in _ky: continue + df_RI[_ky] = (_df * _coe[_ky]) + + df_RI['RI_wet'] = None + if df_water is not None: + df_RI['RI_wet'] = (df_RI / df_vol['total_wet'].to_frame().values).sum(axis=1) - for _ky, _df in df_vol.items(): - if 'total' in _ky: continue - df_RI[_ky] = (_df * _coe[_ky]) + df_RI['RI_dry'] = (df_RI[vol_coe.keys()] / df_vol['total_dry'].to_frame().values).sum(axis=1) - df_RI['RI_wet'] = None - if df_water is not None: - df_RI['RI_wet'] = (df_RI / df_vol['total_wet'].to_frame().values).sum(axis=1) + ri_dic[f'RI_{_lambda}'] = df_RI[['RI_dry', 'RI_wet']] - df_RI['RI_dry'] = (df_RI[vol_coe.keys()] / df_vol['total_dry'].to_frame().values).sum(axis=1) + # mole and equivalent + df_eq = concat((mol_A, mol_S, mol_N, mol_A * 1, mol_S * 2, mol_N * 1), axis=1) + df_eq.columns = ['mol_NH4', 'mol_SO4', 'mol_NO3', 'eq_NH4', 'eq_SO4', 'eq_NO3', ] - ri_dic[f'RI_{_lambda}'] = df_RI[['RI_dry', 'RI_wet']] + # out + out = {'mass': df_mass, + 'volume': df_vol, + 'vol_cal': df_vol_cal, + 'eq': df_eq, + 'density_mat': df_den, + 'density_rec': df_den_rec, + } + out.update(ri_dic) - ## mole and equivalent - df_eq = concat((mol_A, mol_S, mol_N, mol_A * 1, mol_S * 2, mol_N * 1), axis=1) - df_eq.columns = ['mol_NH4', 'mol_SO4', 'mol_NO3', 'eq_NH4', 'eq_SO4', 'eq_NO3', ] + for _ky, _df in out.items(): + out[_ky] = _df.reindex(index) - ## out - out = {'mass': df_mass, - 'volume': df_vol, - 'vol_cal': df_vol_cal, - 'eq': df_eq, - 'density_mat': df_den, - 'density_rec': df_den_rec, - } - out.update(ri_dic) - - for _ky, _df in out.items(): - out[_ky] = _df.reindex(index).where(qc_cond) - - return out - - -# ''' + return out def mass_ratio(_df): - if _df['PM25'] >= _df['total_mass']: - _df['others'] = _df['PM25'] - _df['total_mass'] - for _val, _species in zip(_df.values, _df.index): - _df[f'{_species}_ratio'] = _val / _df['PM25'].__round__(3) + if _df['PM25'] >= _df['total_mass']: + _df['others'] = _df['PM25'] - _df['total_mass'] + for _val, _species in zip(_df.values, _df.index): + _df[f'{_species}_ratio'] = _val / _df['PM25'].__round__(3) - if _df['PM25'] < _df['total_mass']: - _df['others'] = 0 - for _val, _species in zip(_df.values, _df.index): - _df[f'{_species}_ratio'] = _val / _df['PM25'].__round__(3) + if _df['PM25'] < _df['total_mass']: + _df['others'] = 0 + for _val, _species in zip(_df.values, _df.index): + _df[f'{_species}_ratio'] = _val / _df['PM25'].__round__(3) - return _df['others':].drop(labels=['PM25_ratio', 'total_mass_ratio']) + return _df['others':].drop(labels=['PM25_ratio', 'total_mass_ratio']) diff --git a/AeroViz/dataProcess/Chemistry/_ocec.py b/AeroViz/dataProcess/Chemistry/_ocec.py index 80bb398..139c7ab 100644 --- a/AeroViz/dataProcess/Chemistry/_ocec.py +++ b/AeroViz/dataProcess/Chemistry/_ocec.py @@ -1,184 +1,172 @@ -from AeroViz.dataProcess.core import _union_index +import warnings -from pandas import date_range, concat, DataFrame, to_numeric -from scipy.optimize import curve_fit import numpy as np +from pandas import concat, DataFrame +from scipy.optimize import curve_fit, least_squares, OptimizeWarning + +from AeroViz.dataProcess.core import union_index __all__ = [ - '_basic', - # '_ocec_ratio_cal', + '_basic', + # '_ocec_ratio_cal', ] def _min_Rsq(_oc, _ec, _rng): - _val_mesh, _oc_mesh = np.meshgrid(_rng, _oc) - _val_mesh, _ec_mesh = np.meshgrid(_rng, _ec) - - _out_table = DataFrame(_oc_mesh - _val_mesh * _ec_mesh, index=_oc.index, columns=_rng) - - ## calculate R2 - _r2_dic = {} - _func = lambda _x, _sl, _inte: _sl * _x + _inte - for _ocec, _out in _out_table.items(): - _df = DataFrame([_out.values, _ec.values]).T.dropna() - - _x, _y = _df[0], _df[1] - _opt, _ = curve_fit(_func, _x, _y) - - _tss = np.sum((_y - _y.mean()) ** 2.) - _rss = np.sum((_y - _func(_x, *_opt)) ** 2.) + _val_mesh, _oc_mesh = np.meshgrid(_rng, _oc) + _val_mesh, _ec_mesh = np.meshgrid(_rng, _ec) - _r2_dic[round(_ocec, 3)] = 1. - _rss / _tss + _out_table = DataFrame(_oc_mesh - _val_mesh * _ec_mesh, index=_oc.index, columns=_rng) - ## get the min R2 - _ratio = DataFrame(_r2_dic, index=[0]).idxmin(axis=1).values[0] + # calculate R2 + _r2_dic = {} + _func = lambda _x, _sl, _inte: _sl * _x + _inte + for _ocec, _out in _out_table.items(): + _df = DataFrame([_out.values, _ec.values]).T.dropna() - return _ratio, _out_table[_ratio] + _x, _y = _df[0].values, _df[1].values + # 初始參數估計 + slope_guess = (_y[-1] - _y[0]) / (_x[-1] - _x[0]) + intercept_guess = _y[0] - slope_guess * _x[0] -def _ocec_ratio_cal(_nam, _lcres_splt, _hr_lim, _range_, _wisoc_range_): - ## parameter - _out = DataFrame(index=_lcres_splt.index) - (_, _oc), (_, _ec) = _lcres_splt.items() - # _oc, _ec = _lcres_splt['Thermal_OC'], _lcres_splt['Thermal_EC'] - - ## real data OC/EC - _ocec_ratio_real = (_oc / _ec).quantile(.5) - - _out[f'OC/EC_real_{_nam}'] = _ocec_ratio_real - _out[f'POC_real_{_nam}'] = _ocec_ratio_real * _ec - _out[f'SOC_real_{_nam}'] = _oc - _out[f'POC_real_{_nam}'] + try: + with warnings.catch_warnings(): + warnings.filterwarnings('error') + _opt, _ = curve_fit(_func, _x, _y, p0=[slope_guess, intercept_guess], maxfev=5000) + except (RuntimeWarning, OptimizeWarning): + # 如果 curve_fit 失敗,嘗試使用 least_squares + residuals = lambda p: _func(_x, *p) - _y + _opt = least_squares(residuals, [slope_guess, intercept_guess]).x - ## the least R2 method - ## estimated OC/EC - if (len(_lcres_splt) <= _hr_lim): - print(f"\t\t{_lcres_splt.index[0].strftime('%Y-%m-%d %X')} to {_lcres_splt.index[-1].strftime('%Y-%m-%d %X')}") - print('\t\tPlease Modify the Values of "hour_limit" or Input Sufficient Amount of Data !!') + _tss = np.sum((_y - np.mean(_y)) ** 2) + _rss = np.sum((_y - _func(_x, *_opt)) ** 2) - _out[[f'OC/EC_{_nam}', f'POC_{_nam}', f'SOC_{_nam}', f'WISOC/OC_{_nam}', f'WSOC_{_nam}', - f'WISOC_{_nam}']] = np.nan + _r2_dic[round(_ocec, 3)] = 1 - _rss / _tss - return _out + _ratio = DataFrame(_r2_dic, index=[0]).idxmin(axis=1).values[0] - if (len(_lcres_splt.dropna()) == 0): - _out[[f'OC/EC_{_nam}', f'POC_{_nam}', f'SOC_{_nam}', f'WISOC/OC_{_nam}', f'WSOC_{_nam}', - f'WISOC_{_nam}']] = np.nan + return _ratio, _out_table[_ratio] - return _out - ## OC/EC - _ocec_ratio = False - _st, _ed, _stp = _range_ - - for _ in range(2): - if _ocec_ratio: - _ocec_rng = np.arange(_ocec_ratio - _stp / 2, _ocec_ratio + _stp / 2, .01).round(3) - else: - _ocec_rng = np.arange(_st, _ed + _stp, _stp).round(3) +def _ocec_ratio_cal(_nam, _lcres_splt, _hr_lim, _range_, _wisoc_range_): + # parameter + _out = DataFrame(index=_lcres_splt.index) + (_, _oc), (_, _ec) = _lcres_splt.items() + # _oc, _ec = _lcres_splt['Thermal_OC'], _lcres_splt['Thermal_EC'] - _ocec_ratio, _soc = _min_Rsq(_oc, _ec, _ocec_rng) + # real data OC/EC + _ocec_ratio_real = (_oc / _ec).quantile(.5) - ## WISOC - _st, _ed, _stp = _wisoc_range_ - _wisoc_rng = (np.arange(_st, _ed + _stp, _stp) * _ocec_ratio).round(5) - _wisoc_ratio, _wsoc = _min_Rsq(_oc, _ec, _wisoc_rng) + _out[f'OC/EC_real_{_nam}'] = _ocec_ratio_real + _out[f'POC_real_{_nam}'] = _ocec_ratio_real * _ec + _out[f'SOC_real_{_nam}'] = _oc - _out[f'POC_real_{_nam}'] - ## out - _out[f'OC/EC_{_nam}'] = _ocec_ratio - _out[f'SOC_{_nam}'] = _soc - _out[f'POC_{_nam}'] = _oc - _out[f'SOC_{_nam}'] - _out[f'WISOC/OC_{_nam}'] = _wisoc_ratio - _out[f'WSOC_{_nam}'] = _wsoc - _out[f'WISOC_{_nam}'] = _oc - _out[f'WSOC_{_nam}'] + # the least R2 method + # estimated OC/EC + if len(_lcres_splt) <= _hr_lim: + print(f"\t\t{_lcres_splt.index[0].strftime('%Y-%m-%d %X')} to {_lcres_splt.index[-1].strftime('%Y-%m-%d %X')}") + print('\t\tPlease Modify the Values of "hour_limit" or Input Sufficient Amount of Data !!') - return _out[[f'OC/EC_{_nam}', f'POC_{_nam}', f'SOC_{_nam}', f'WISOC/OC_{_nam}', f'WSOC_{_nam}', f'WISOC_{_nam}', - f'OC/EC_real_{_nam}', f'POC_real_{_nam}', f'SOC_real_{_nam}']] + _out[[f'OC/EC_{_nam}', f'POC_{_nam}', f'SOC_{_nam}', f'WISOC/OC_{_nam}', f'WSOC_{_nam}', + f'WISOC_{_nam}']] = np.nan + return _out -def _basic(_lcres, _res, _mass, _ocec_ratio, _ocec_ratio_month, _hr_lim, _range, _wisoc_range): - _lcres, _res, _mass = _union_index(_lcres, _res, _mass) + if len(_lcres_splt.dropna()) == 0: + _out[[f'OC/EC_{_nam}', f'POC_{_nam}', f'SOC_{_nam}', f'WISOC/OC_{_nam}', f'WSOC_{_nam}', + f'WISOC_{_nam}']] = np.nan - _out = {} + return _out - ## OC1, OC2, OC3, OC4, PC - _df_bsc = _res / _lcres['Sample_Volume'].to_frame().values.copy() + # OC/EC + _ocec_ratio = False + _st, _ed, _stp = _range_ - ## SOC, POC, OC/EC - if _ocec_ratio is not None: - try: - iter(_ocec_ratio) - except TypeError: - raise TypeError('"ocec_ratio" Only Accept a Single Value !!') + for _ in range(2): + if _ocec_ratio: + _ocec_rng = np.arange(_ocec_ratio - _stp / 2, _ocec_ratio + _stp / 2, .01).round(3) + else: + _ocec_rng = np.arange(_st, _ed + _stp, _stp).round(3) - _prcs_df = DataFrame(index=_df_bsc.index) - _prcs_df['OC/EC'] = _ocec_ratio - _prcs_df['POC'] = _ocec_ratio * _lcres['Thermal_EC'] - _prcs_df['SOC'] = _lcres['Thermal_OC'] - _prcs_df['POC'] + _ocec_ratio, _soc = _min_Rsq(_oc, _ec, _ocec_rng) - else: - _df_lst = [] - for _, _df in _lcres.resample(f'{_ocec_ratio_month}MS', closed='left'): - _thm_cal = _ocec_ratio_cal('thm', _df[['Thermal_OC', 'Thermal_EC']], _hr_lim, _range, _wisoc_range) - _opt_cal = _ocec_ratio_cal('opt', _df[['Optical_OC', 'Optical_EC']], _hr_lim, _range, _wisoc_range) - _df_lst.append(concat([_thm_cal, _opt_cal], axis=1)) + # WISOC + _st, _ed, _stp = _wisoc_range_ + _wisoc_rng = (np.arange(_st, _ed + _stp, _stp) * _ocec_ratio).round(5) + _wisoc_ratio, _wsoc = _min_Rsq(_oc, _ec, _wisoc_rng) - _prcs_df = concat(_df_lst) + # out + _out[f'OC/EC_{_nam}'] = _ocec_ratio + _out[f'SOC_{_nam}'] = _soc + _out[f'POC_{_nam}'] = _oc - _out[f'SOC_{_nam}'] + _out[f'WISOC/OC_{_nam}'] = _wisoc_ratio + _out[f'WSOC_{_nam}'] = _wsoc + _out[f'WISOC_{_nam}'] = _oc - _out[f'WSOC_{_nam}'] - _df_bsc = concat((_df_bsc.copy(), _prcs_df), axis=1) + return _out[[f'OC/EC_{_nam}', f'POC_{_nam}', f'SOC_{_nam}', f'WISOC/OC_{_nam}', f'WSOC_{_nam}', f'WISOC_{_nam}', + f'OC/EC_real_{_nam}', f'POC_real_{_nam}', f'SOC_real_{_nam}']] - ## ratio - _df_ratio = DataFrame(index=_df_bsc.index) - for _ky, _val in _df_bsc.items(): - if 'OC/EC' in _ky: continue - _df_ratio[f'{_ky}/Thermal_OC'] = _val / _lcres['Thermal_OC'] - _df_ratio[f'{_ky}/Optical_OC'] = _val / _lcres['Optical_OC'] +def _basic(_lcres, _mass, _ocec_ratio, _ocec_ratio_month, _hr_lim, _range, _wisoc_range): + _lcres, _mass = union_index(_lcres, _mass) - if _mass is not None: - for _ky, _val in _df_bsc.items(): - _df_ratio[f'{_ky}/PM'] = _val / _mass + _out = {} - _df_ratio[f'Thermal_OC/PM'] = _lcres['Thermal_OC'] / _mass - _df_ratio[f'Thermal_EC/PM'] = _lcres['Thermal_EC'] / _mass + # OC1, OC2, OC3, OC4, PC + _df_bsc = _lcres[['OC1', 'OC2', 'OC3', 'OC4', 'PC']].copy() - _df_ratio[f'Optical_OC/PM'] = _lcres['Optical_OC'] / _mass - _df_ratio[f'Optical_EC/PM'] = _lcres['Optical_EC'] / _mass + # SOC, POC, OC/EC + if _ocec_ratio is not None: + try: + iter(_ocec_ratio) + except TypeError: + raise TypeError('"ocec_ratio" Only Accept a Single Value !!') - ## ratio status - _df_bsc = concat((_lcres, _df_bsc.copy()), axis=1) + _prcs_df = DataFrame(index=_df_bsc.index) + _prcs_df['OC/EC'] = _ocec_ratio + _prcs_df['POC'] = _ocec_ratio * _lcres['Thermal_EC'] + _prcs_df['SOC'] = _lcres['Thermal_OC'] - _prcs_df['POC'] - for _ky, _df in _df_ratio.items(): - _df_bsc[f'{_ky}_status'] = 'Normal' - _df_bsc[f'{_ky}_status'] = _df_bsc[f'{_ky}_status'].mask(_df > 1, 'Warning') + else: + _df_lst = [] + for _, _df in _lcres.resample(f'{_ocec_ratio_month}MS', closed='left'): + _thm_cal = _ocec_ratio_cal('thm', _df[['Thermal_OC', 'Thermal_EC']], _hr_lim, _range, _wisoc_range) + _opt_cal = _ocec_ratio_cal('opt', _df[['Optical_OC', 'Optical_EC']], _hr_lim, _range, _wisoc_range) + _df_lst.append(concat([_thm_cal, _opt_cal], axis=1)) - ## out - _out['ratio'] = _df_ratio - _out['basic'] = _df_bsc + _prcs_df = concat(_df_lst) - return _out + _df_bsc = concat((_df_bsc.copy(), _prcs_df), axis=1) + # ratio + _df_ratio = DataFrame(index=_df_bsc.index) -''' -_ocec_mesh, _oc_mesh = n.meshgrid(_ocec_rng, _oc) -_ocec_mesh, _ec_mesh = n.meshgrid(_ocec_rng, _ec) + for _ky, _val in _df_bsc.items(): + if 'OC/EC' in _ky: + continue + _df_ratio[f'{_ky}/Thermal_OC'] = _val / _lcres['Thermal_OC'] + _df_ratio[f'{_ky}/Optical_OC'] = _val / _lcres['Optical_OC'] -_soc_table = DataFrame(_oc_mesh-_ocec_mesh*_ec_mesh, index=_oc.index, columns=_ocec_rng) + if _mass is not None: + for _ky, _val in _df_bsc.items(): + _df_ratio[f'{_ky}/PM'] = _val / _mass -## calculate R2 -_r2_dic = {} -_func = lambda _x, _sl, _inte : _sl*_x+_inte -for _ocec, _soc in _soc_table.items(): + _df_ratio[f'Thermal_OC/PM'] = _lcres['Thermal_OC'] / _mass + _df_ratio[f'Thermal_EC/PM'] = _lcres['Thermal_EC'] / _mass - _df = DataFrame([_soc.values, _ec.values]).T.dropna() - _x, _y = _df[0], _df[1] + _df_ratio[f'Optical_OC/PM'] = _lcres['Optical_OC'] / _mass + _df_ratio[f'Optical_EC/PM'] = _lcres['Optical_EC'] / _mass - _opt, _ = curve_fit(_func, _x, _y) + # ratio status + _df_bsc = concat((_lcres.loc[:, :'Sample_Volume'], _df_bsc.copy()), axis=1) - _tss = n.sum((_y - _y.mean())**2.) - _rss = n.sum((_y - _func(_x, *_opt))**2.) + for _ky, _df in _df_ratio.items(): + _df_bsc[f'{_ky}_status'] = 'Normal' + _df_bsc[f'{_ky}_status'] = _df_bsc[f'{_ky}_status'].mask(_df > 1, 'Warning') - _r2_dic[round(_ocec,2)] = 1. - _rss / _tss + # out + _out['basic'] = _df_bsc + _out['ratio'] = _df_ratio -## get the min R2 -_ocec_ratio = DataFrame(_r2_dic, index=[0]).idxmin(axis=1).values[0] -# ''' + return _out diff --git a/AeroViz/dataProcess/Chemistry/_partition.py b/AeroViz/dataProcess/Chemistry/_partition.py index 14d9a25..1471cb0 100644 --- a/AeroViz/dataProcess/Chemistry/_partition.py +++ b/AeroViz/dataProcess/Chemistry/_partition.py @@ -1,29 +1,30 @@ -from pandas import date_range, concat, DataFrame, to_numeric +from pandas import concat, DataFrame + from ._calculate import _ug2umol def _basic(df_che, nam_lst): - # parameter - df_all = concat(df_che, axis=1) - index = df_all.index.copy() - df_all.columns = nam_lst + # parameter + df_all = concat(df_che, axis=1) + index = df_all.index.copy() + df_all.columns = nam_lst - df_umol = _ug2umol(df_all) + df_umol = _ug2umol(df_all) - # calculate - df_out = DataFrame(index=df_umol.index) + # calculate + df_out = DataFrame(index=df_umol.index) - # df_out['NTR'] = df_umol['NH4+'] / (df_umol['NH4+'] + df_all['NH3'] / 22.4) - df_out['NTR+'] = df_umol['NH4+'] / (df_umol['NH4+'] + df_umol['NH3']) + # df_out['NTR'] = df_umol['NH4+'] / (df_umol['NH4+'] + df_all['NH3'] / 22.4) + df_out['NTR+'] = df_umol['NH4+'] / (df_umol['NH4+'] + df_umol['NH3']) - df_out['NOR'] = df_umol['NO3-'] / (df_umol['NO3-'] + df_umol['NO2']) - df_out['NOR_2'] = (df_umol['NO3-'] + df_umol['HNO3']) / (df_umol['NO3-'] + df_umol['NO2'] + df_umol['HNO3']) + df_out['NOR'] = df_umol['NO3-'] / (df_umol['NO3-'] + df_umol['NO2']) + df_out['NOR_2'] = (df_umol['NO3-'] + df_umol['HNO3']) / (df_umol['NO3-'] + df_umol['NO2'] + df_umol['HNO3']) - df_out['SOR'] = df_umol['SO42-'] / (df_umol['SO42-'] + df_umol['SO2']) + df_out['SOR'] = df_umol['SO42-'] / (df_umol['SO42-'] + df_umol['SO2']) - df_out['epls_NO3-'] = df_umol['NO3-'] / (df_umol['NO3-'] + df_umol['HNO3']) - df_out['epls_NH4+'] = df_umol['NH4+'] / (df_umol['NH4+'] + df_umol['NH3']) - df_out['epls_SO42-'] = df_out['SOR'] - df_out['epls_Cl-'] = df_umol['Cl-'] / (df_umol['Cl-'] + df_umol['HCl']) + df_out['epls_NO3-'] = df_umol['NO3-'] / (df_umol['NO3-'] + df_umol['HNO3']) + df_out['epls_NH4+'] = df_umol['NH4+'] / (df_umol['NH4+'] + df_umol['NH3']) + df_out['epls_SO42-'] = df_out['SOR'] + df_out['epls_Cl-'] = df_umol['Cl-'] / (df_umol['Cl-'] + df_umol['HCl']) - return df_out + return df_out diff --git a/AeroViz/dataProcess/Chemistry/_teom.py b/AeroViz/dataProcess/Chemistry/_teom.py index 64d4130..d834db1 100644 --- a/AeroViz/dataProcess/Chemistry/_teom.py +++ b/AeroViz/dataProcess/Chemistry/_teom.py @@ -1,16 +1,14 @@ -import numpy as np - - def _basic(_teom, _check): - _teom['Volatile_Fraction'] = (_teom['PM_Total'] - _teom['PM_NV']) / _teom['PM_Total'] + import numpy as np + _teom['Volatile_Fraction'] = (_teom['PM_Total'] - _teom['PM_NV']) / _teom['PM_Total'] - _teom.loc[(_teom['Volatile_Fraction'] < 0) | (_teom['Volatile_Fraction'] > 1)] = n.nan + _teom.loc[(_teom['Volatile_Fraction'] < 0) | (_teom['Volatile_Fraction'] > 1)] = np.nan - if _check is not None: - _ratio = _teom['PM_NV'] / _check - _teom['PM_Check'] = _check + if _check is not None: + _ratio = _teom['PM_NV'] / _check + _teom['PM_Check'] = _check - _teom.loc[_teom.dropna().index, 'status'] = 'Warning' - _teom.loc[(_ratio > 0) & (_ratio < 1), 'status'] = 'Normal' + _teom.loc[_teom.dropna().index, 'status'] = 'Warning' + _teom.loc[(_ratio > 0) & (_ratio < 1), 'status'] = 'Normal' - return _teom + return _teom diff --git a/AeroViz/dataProcess/Chemistry/isrpia2.exe b/AeroViz/dataProcess/Chemistry/isrpia2.exe old mode 100644 new mode 100755 diff --git a/AeroViz/dataProcess/Optical/Angstrom_exponent.py b/AeroViz/dataProcess/Optical/Angstrom_exponent.py new file mode 100644 index 0000000..30b5882 --- /dev/null +++ b/AeroViz/dataProcess/Optical/Angstrom_exponent.py @@ -0,0 +1,20 @@ +import numpy as np +import pandas as pd +from scipy.optimize import curve_fit + + +def get_species_wavelength(df, specified_band): + func = lambda wavelength, _sl, _int: _sl * wavelength + _int + popt, pcov = curve_fit(func, specified_band, df.values) + + return func(np.array(specified_band), *popt) + + +def get_Angstrom_exponent(df, band): + if (df <= 0).any(): + return pd.Series([np.nan, np.nan], index=['slope', 'intercept']) # 返回包含 NaN 的 Series,保持 DataFrame 结构 + + func = lambda wavelength, _sl, _int: _sl * wavelength + _int + popt, _ = curve_fit(func, np.log(band), np.log(df)) + + return pd.Series(popt, index=['slope', 'intercept']) # 返回带有索引的 Series diff --git a/AeroViz/dataProcess/Optical/PyMieScatt_update.py b/AeroViz/dataProcess/Optical/PyMieScatt_update.py new file mode 100644 index 0000000..d5e6a43 --- /dev/null +++ b/AeroViz/dataProcess/Optical/PyMieScatt_update.py @@ -0,0 +1,560 @@ +# -*- coding: utf-8 -*- +# http://pymiescatt.readthedocs.io/en/latest/forward.html +import warnings + +import numpy as np +from scipy.special import jv, yv + + +def MieQ(m, wavelength, diameter, nMedium=1.0, asDict=False, asCrossSection=False): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ + nMedium = nMedium.real + m /= nMedium + wavelength /= nMedium + x = np.pi * diameter / wavelength + if x == 0: + return 0, 0, 0, 1.5, 0, 0, 0 + elif x <= 0.05: + return RayleighMieQ(m, wavelength, diameter, nMedium, asDict) + elif x > 0.05: + nmax = np.round(2 + x + 4 * (x ** (1 / 3))) + n = np.arange(1, nmax + 1) + n1 = 2 * n + 1 + n2 = n * (n + 2) / (n + 1) + n3 = n1 / (n * (n + 1)) + x2 = x ** 2 + + an, bn = Mie_ab(m, x) + + qext = (2 / x2) * np.sum(n1 * (an.real + bn.real)) + qsca = (2 / x2) * np.sum(n1 * (an.real ** 2 + an.imag ** 2 + bn.real ** 2 + bn.imag ** 2)) + qabs = qext - qsca + + g1 = [an.real[1:int(nmax)], + an.imag[1:int(nmax)], + bn.real[1:int(nmax)], + bn.imag[1:int(nmax)]] + g1 = [np.append(x, 0.0) for x in g1] + g = (4 / (qsca * x2)) * np.sum( + (n2 * (an.real * g1[0] + an.imag * g1[1] + bn.real * g1[2] + bn.imag * g1[3])) + ( + n3 * (an.real * bn.real + an.imag * bn.imag))) + + qpr = qext - qsca * g + qback = (1 / x2) * (np.abs(np.sum(n1 * ((-1) ** n) * (an - bn))) ** 2) + qratio = qback / qsca + if asCrossSection: + css = np.pi * (diameter / 2) ** 2 + cext = css * qext + csca = css * qsca + cabs = css * qabs + cpr = css * qpr + cback = css * qback + cratio = css * qratio + if asDict: + return dict(Cext=cext, Csca=csca, Cabs=cabs, g=g, Cpr=cpr, Cback=cback, Cratio=cratio) + else: + return cext, csca, cabs, g, cpr, cback, cratio + else: + if asDict: + return dict(Qext=qext, Qsca=qsca, Qabs=qabs, g=g, Qpr=qpr, Qback=qback, Qratio=qratio) + else: + return qext, qsca, qabs, g, qpr, qback, qratio + + +def Mie_ab(m, x): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#Mie_ab + mx = m * x + nmax = np.round(2 + x + 4 * (x ** (1 / 3))) + nmx = np.round(max(nmax, np.abs(mx)) + 16) + n = np.arange(1, nmax + 1) # + nu = n + 0.5 # + + sx = np.sqrt(0.5 * np.pi * x) + + px = sx * jv(nu, x) # + p1x = np.append(np.sin(x), px[0:int(nmax) - 1]) # + + chx = -sx * yv(nu, x) # + ch1x = np.append(np.cos(x), chx[0:int(nmax) - 1]) # + + gsx = px - (0 + 1j) * chx # + gs1x = p1x - (0 + 1j) * ch1x # + + # B&H Equation 4.89 + Dn = np.zeros(int(nmx), dtype=complex) + for i in range(int(nmx) - 1, 1, -1): + Dn[i - 1] = (i / mx) - (1 / (Dn[i] + i / mx)) + + D = Dn[1:int(nmax) + 1] # Dn(mx), drop terms beyond nMax + da = D / m + n / x + db = m * D + n / x + + an = (da * px - p1x) / (da * gsx - gs1x) + bn = (db * px - p1x) / (db * gsx - gs1x) + + return an, bn + + +def Mie_cd(m, x): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#Mie_cd + mx = m * x + nmax = np.round(2 + x + 4 * (x ** (1 / 3))) + nmx = np.round(max(nmax, np.abs(mx)) + 16) + n = np.arange(1, int(nmax) + 1) + nu = n + 0.5 + + cnx = np.zeros(int(nmx), dtype=complex) + + for j in np.arange(nmx, 1, -1): + cnx[int(j) - 2] = j - mx * mx / (cnx[int(j) - 1] + j) + + cnn = np.array([cnx[b] for b in range(0, len(n))]) + + jnx = np.sqrt(np.pi / (2 * x)) * jv(nu, x) + jnmx = np.sqrt((2 * mx) / np.pi) / jv(nu, mx) + + yx = np.sqrt(np.pi / (2 * x)) * yv(nu, x) + hx = jnx + (1.0j) * yx + + b1x = np.append(np.sin(x) / x, jnx[0:int(nmax) - 1]) + y1x = np.append(-np.cos(x) / x, yx[0:int(nmax) - 1]) + + hn1x = b1x + (1.0j) * y1x + ax = x * b1x - n * jnx + ahx = x * hn1x - n * hx + + numerator = jnx * ahx - hx * ax + c_denominator = ahx - hx * cnn + d_denominator = m * m * ahx - hx * cnn + + cn = jnmx * numerator / c_denominator + dn = jnmx * m * numerator / d_denominator + + return cn, dn + + +def RayleighMieQ(m, wavelength, diameter, nMedium=1.0, asDict=False, asCrossSection=False): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#RayleighMieQ + nMedium = nMedium.real + m /= nMedium + wavelength /= nMedium + x = np.pi * diameter / wavelength + if x == 0: + return 0, 0, 0, 1.5, 0, 0, 0 + elif x > 0: + LL = (m ** 2 - 1) / (m ** 2 + 2) # Lorentz-Lorenz term + LLabsSq = np.abs(LL) ** 2 + qsca = 8 * LLabsSq * (x ** 4) / 3 # B&H eq 5.8 + qabs = 4 * x * LL.imag # B&H eq. 5.11 + qext = qsca + qabs + qback = 1.5 * qsca # B&H eq. 5.9 + qratio = 1.5 + g = 0 + qpr = qext + if asCrossSection: + css = np.pi * (diameter / 2) ** 2 + cext = css * qext + csca = css * qsca + cabs = css * qabs + cpr = css * qpr + cback = css * qback + cratio = css * qratio + if asDict: + return dict(Cext=cext, Csca=csca, Cabs=cabs, g=g, Cpr=cpr, Cback=cback, Cratio=cratio) + else: + return cext, csca, cabs, g, cpr, cback, cratio + else: + if asDict: + return dict(Qext=qext, Qsca=qsca, Qabs=qabs, g=g, Qpr=qpr, Qback=qback, Qratio=qratio) + else: + return qext, qsca, qabs, g, qpr, qback, qratio + + +def AutoMieQ(m, wavelength, diameter, nMedium=1.0, crossover=0.01, asDict=False, asCrossSection=False): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#AutoMieQ + nMedium = nMedium.real + m_eff = m / nMedium + wavelength_eff = wavelength / nMedium + x_eff = np.pi * diameter / wavelength_eff + if x_eff == 0: + return 0, 0, 0, 1.5, 0, 0, 0 + elif x_eff < crossover: + return RayleighMieQ(m, wavelength, diameter, nMedium, asDict=asDict, asCrossSection=asCrossSection) + else: + return MieQ(m, wavelength, diameter, nMedium, asDict=asDict, asCrossSection=asCrossSection) + + +def LowFrequencyMieQ(m, wavelength, diameter, nMedium=1.0, asDict=False, asCrossSection=False): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#LowFrequencyMieQ + nMedium = nMedium.real + m /= nMedium + wavelength /= nMedium + x = np.pi * diameter / wavelength + if x == 0: + return 0, 0, 0, 1.5, 0, 0, 0 + elif x > 0: + n = np.arange(1, 3) + n1 = 2 * n + 1 + n2 = n * (n + 2) / (n + 1) + n3 = n1 / (n * (n + 1)) + x2 = x ** 2 + + an, bn = LowFrequencyMie_ab(m, x) + + qext = (2 / x2) * np.sum(n1 * (an.real + bn.real)) + qsca = (2 / x2) * np.sum(n1 * (an.real ** 2 + an.imag ** 2 + bn.real ** 2 + bn.imag ** 2)) + qabs = qext - qsca + + g1 = [an.real[1:2], an.imag[1:2], bn.real[1:2], bn.imag[1:2]] + g1 = [np.append(x, 0.0) for x in g1] + g = (4 / (qsca * x2)) * np.sum( + (n2 * (an.real * g1[0] + an.imag * g1[1] + bn.real * g1[2] + bn.imag * g1[3])) + ( + n3 * (an.real * bn.real + an.imag * bn.imag))) + + qpr = qext - qsca * g + qback = (1 / x2) * (np.abs(np.sum(n1 * ((-1) ** n) * (an - bn))) ** 2) + qratio = qback / qsca + + if asCrossSection: + css = np.pi * (diameter / 2) ** 2 + cext = css * qext + csca = css * qsca + cabs = css * qabs + cpr = css * qpr + cback = css * qback + cratio = css * qratio + if asDict: + return dict(Cext=cext, Csca=csca, Cabs=cabs, g=g, Cpr=cpr, Cback=cback, Cratio=cratio) + else: + return cext, csca, cabs, g, cpr, cback, cratio + else: + if asDict: + return dict(Qext=qext, Qsca=qsca, Qabs=qabs, g=g, Qpr=qpr, Qback=qback, Qratio=qratio) + else: + return qext, qsca, qabs, g, qpr, qback, qratio + + +def LowFrequencyMie_ab(m, x): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#LowFrequencyMie_ab + # B&H page 131 + m2 = m ** 2 + LL = (m ** 2 - 1) / (m ** 2 + 2) + x3 = x ** 3 + x5 = x ** 5 + x6 = x ** 6 + + a1 = (-2j * x3 / 3) * LL - (2j * x5 / 5) * LL * (m2 - 2) / (m2 + 2) + (4 * x6 / 9) * (LL ** 2) + a2 = (-1j * x5 / 15) * (m2 - 1) / (2 * m2 + 3) + b1 = (-1j * x5 / 45) * (m2 - 1) + b2 = 0 + 0j + an = np.append(a1, a2) + bn = np.append(b1, b2) + return an, bn + + +def AutoMie_ab(m, x): + if x < 0.5: + return LowFrequencyMie_ab(m, x) + else: + return Mie_ab(m, x) + + +def Mie_SD(m, wavelength, dp, ndp, nMedium=1.0, SMPS=True, interpolate=False, asDict=False): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#Mie_SD + nMedium = nMedium.real + m /= nMedium + wavelength /= nMedium + dp = np.array(dp) + ndp = np.array(ndp) + _length = np.size(dp) + Q_ext = np.zeros(_length) + Q_sca = np.zeros(_length) + Q_abs = np.zeros(_length) + Q_pr = np.zeros(_length) + Q_back = np.zeros(_length) + Q_ratio = np.zeros(_length) + g = np.zeros(_length) + + # scaling of 1e-6 to cast in units of inverse megameters - see docs + aSDn = np.pi * ((dp / 2) ** 2) * ndp * (1e-6) + # _logdp = np.log10(dp) + + for i in range(_length): + Q_ext[i], Q_sca[i], Q_abs[i], g[i], Q_pr[i], Q_back[i], Q_ratio[i] = AutoMieQ(m, wavelength, dp[i], nMedium) + + if SMPS: + Bext = np.sum(Q_ext * aSDn) + Bsca = np.sum(Q_sca * aSDn) + Babs = Bext - Bsca + Bback = np.sum(Q_back * aSDn) + Bratio = np.sum(Q_ratio * aSDn) + bigG = np.sum(g * Q_sca * aSDn) / np.sum(Q_sca * aSDn) + Bpr = Bext - bigG * Bsca + else: + Bext = np.trapz(Q_ext * aSDn, dp) + Bsca = np.trapz(Q_sca * aSDn, dp) + Babs = Bext - Bsca + Bback = np.trapz(Q_back * aSDn, dp) + Bratio = np.trapz(Q_ratio * aSDn, dp) + bigG = np.trapz(g * Q_sca * aSDn, dp) / np.trapz(Q_sca * aSDn, dp) + Bpr = Bext - bigG * Bsca + + if asDict: + return dict(Bext=Bext, Bsca=Bsca, Babs=Babs, G=bigG, Bpr=Bpr, Bback=Bback, Bratio=Bratio) + else: + return Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio + + +def ScatteringFunction(m, wavelength, diameter, nMedium=1.0, minAngle=0, maxAngle=180, angularResolution=0.5, + space='theta', angleMeasure='radians', normalization=None): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#ScatteringFunction + nMedium = nMedium.real + m /= nMedium + wavelength /= nMedium + x = np.pi * diameter / wavelength + + _steps = int(1 + (maxAngle - minAngle) / angularResolution) # default 361 + + if angleMeasure in ['radians', 'RADIANS', 'rad', 'RAD']: + adjust = np.pi / 180 + elif angleMeasure in ['gradians', 'GRADIANS', 'grad', 'GRAD']: + adjust = 1 / 200 + else: + adjust = 1 + + if space in ['q', 'qspace', 'QSPACE', 'qSpace']: + # _steps *= 10 + _steps += 1 + if minAngle == 0: + minAngle = 1e-5 + # measure = np.logspace(np.log10(minAngle),np.log10(maxAngle),_steps)*np.pi/180 + measure = np.linspace(minAngle, maxAngle, _steps) * np.pi / 180 + _q = True + else: + measure = np.linspace(minAngle, maxAngle, _steps) * adjust + _q = False + if x == 0: + return measure, 0, 0, 0 + _measure = np.linspace(minAngle, maxAngle, _steps) * np.pi / 180 + SL = np.zeros(_steps) + SR = np.zeros(_steps) + SU = np.zeros(_steps) + for j in range(_steps): + u = np.cos(_measure[j]) + S1, S2 = MieS1S2(m, x, u) + SL[j] = (np.sum(np.conjugate(S1) * S1)).real + SR[j] = (np.sum(np.conjugate(S2) * S2)).real + SU[j] = (SR[j] + SL[j]) / 2 + if normalization in ['m', 'M', 'max', 'MAX']: + SL /= np.max(SL) + SR /= np.max(SR) + SU /= np.max(SU) + elif normalization in ['t', 'T', 'total', 'TOTAL']: + SL /= np.trapz(SL, measure) + SR /= np.trapz(SR, measure) + SU /= np.trapz(SU, measure) + if _q: + measure = (4 * np.pi / wavelength) * np.sin(measure / 2) * (diameter / 2) + return measure, SL, SR, SU + + +def SF_SD(m, wavelength, dp, ndp, nMedium=1.0, minAngle=0, maxAngle=180, angularResolution=0.5, space='theta', + angleMeasure='radians', normalization=None): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#SF_SD + nMedium = nMedium.real + m /= nMedium + wavelength /= nMedium + + _steps = int(1 + (maxAngle - minAngle) / angularResolution) + ndp = np.array(ndp) + dp = np.array(dp) + SL = np.zeros(_steps) + SR = np.zeros(_steps) + SU = np.zeros(_steps) + kwargs = {'minAngle': minAngle, + 'maxAngle': maxAngle, + 'angularResolution': angularResolution, + 'space': space, + 'normalization': None} + for n, d in zip(ndp, dp): + measure, l, r, u = ScatteringFunction(m, wavelength, d, **kwargs) + SL += l * n + SR += r * n + SU += u * n + if normalization in ['n', 'N', 'number', 'particles']: + _n = np.trapz(ndp, dp) + SL /= _n + SR /= _n + SU /= _n + elif normalization in ['m', 'M', 'max', 'MAX']: + SL /= np.max(SL) + SR /= np.max(SR) + SU /= np.max(SU) + elif normalization in ['t', 'T', 'total', 'TOTAL']: + SL /= np.trapz(SL, measure) + SR /= np.trapz(SR, measure) + SU /= np.trapz(SU, measure) + return measure, SL, SR, SU + + +def MieS1S2(m, x, mu): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieS1S2 + nmax = np.round(2 + x + 4 * np.power(x, 1 / 3)) + an, bn = AutoMie_ab(m, x) + pin, taun = MiePiTau(mu, nmax) + n = np.arange(1, int(nmax) + 1) + n2 = (2 * n + 1) / (n * (n + 1)) + S1 = np.sum(n2[0:len(an)] * (an * pin[0:len(an)] + bn * taun[0:len(bn)])) + S2 = np.sum(n2[0:len(an)] * (an * taun[0:len(an)] + bn * pin[0:len(bn)])) + return S1, S2 + + +def MiePiTau(mu, nmax): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#MiePiTau + p = np.zeros(int(nmax)) + t = np.zeros(int(nmax)) + p[0] = 1 + p[1] = 3 * mu + t[0] = mu + t[1] = 3.0 * np.cos(2 * np.arccos(mu)) + for n in range(2, int(nmax)): + p[n] = ((2 * n + 1) * (mu * p[n - 1]) - (n + 1) * p[n - 2]) / n + t[n] = (n + 1) * mu * p[n] - (n + 2) * p[n - 1] + return p, t + + +def MatrixElements(m, wavelength, diameter, mu, nMedium=1.0): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#MatrixElements + nMedium = nMedium.real + m /= nMedium + wavelength /= nMedium + x = np.pi * diameter / wavelength + # B&H eqs. 4.77, where mu=cos(theta) + S1, S2 = MieS1S2(m, x, mu) + S11 = 0.5 * (np.abs(S2) ** 2 + np.abs(S1) ** 2) + S12 = 0.5 * (np.abs(S2) ** 2 - np.abs(S1) ** 2) + S33 = 0.5 * (np.conjugate(S2) * S1 + S2 * np.conjugate(S1)) + S34 = 0.5j * (S1 * np.conjugate(S2) - S2 * np.conjugate(S1)) + return S11, S12, S33, S34 + + +def MieQ_withDiameterRange(m, wavelength, nMedium=1.0, diameterRange=(10, 1000), nd=1000, logD=False): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ_withDiameterRange + nMedium = nMedium.real + m /= nMedium + wavelength /= nMedium + if logD: + diameters = np.logspace(np.log10(diameterRange[0]), np.log10(diameterRange[1]), nd) + else: + diameters = np.linspace(diameterRange[0], diameterRange[1], nd) + _qD = [AutoMieQ(m, wavelength, diameter) for diameter in diameters] + qext = np.array([q[0] for q in _qD]) + qsca = np.array([q[1] for q in _qD]) + qabs = np.array([q[2] for q in _qD]) + g = np.array([q[3] for q in _qD]) + qpr = np.array([q[4] for q in _qD]) + qback = np.array([q[5] for q in _qD]) + qratio = np.array([q[6] for q in _qD]) + return diameters, qext, qsca, qabs, g, qpr, qback, qratio + + +def MieQ_withWavelengthRange(m, diameter, nMedium=1.0, wavelengthRange=(100, 1600), nw=1000, logW=False): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ_withWavelengthRange + nMedium = nMedium.real + _m = m / nMedium + _wavelengthRange = tuple([x / nMedium for x in wavelengthRange]) + if type(_m) == complex and len(_wavelengthRange) == 2: + if logW: + wavelengths = np.logspace(np.log10(_wavelengthRange[0]), np.log10(_wavelengthRange[1]), nw) + else: + wavelengths = np.linspace(_wavelengthRange[0], _wavelengthRange[1], nw) + _qD = [AutoMieQ(_m, wavelength, diameter) for wavelength in wavelengths] + elif type(_m) in [np.ndarray, list, tuple] and len(_wavelengthRange) == len(_m): + wavelengths = _wavelengthRange + _qD = [MieQ(emm, wavelength, diameter) for emm, wavelength in zip(_m, wavelengths)] + else: + warnings.warn("Error: the size of the input data is mismatched. Please examine your inputs and try again.") + return + + qext = np.array([q[0] for q in _qD]) + qsca = np.array([q[1] for q in _qD]) + qabs = np.array([q[2] for q in _qD]) + g = np.array([q[3] for q in _qD]) + qpr = np.array([q[4] for q in _qD]) + qback = np.array([q[5] for q in _qD]) + qratio = np.array([q[6] for q in _qD]) + return wavelengths, qext, qsca, qabs, g, qpr, qback, qratio + + +def MieQ_withSizeParameterRange(m, nMedium=1.0, xRange=(1, 10), nx=1000, logX=False): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ_withSizeParameterRange + nMedium = nMedium.real + m /= nMedium + _xRange = tuple([x * nMedium for x in xRange]) # I think + if logX: + xValues = list(np.logspace(np.log10(_xRange[0]), np.log10(_xRange[1]), nx)) + else: + xValues = list(np.linspace(_xRange[0], _xRange[1], nx)) + dValues = [1000 * x / np.pi for x in xValues] + _qD = [AutoMieQ(m, 1000, d) for d in dValues] + qext = np.array([q[0] for q in _qD]) + qsca = np.array([q[1] for q in _qD]) + qabs = np.array([q[2] for q in _qD]) + g = np.array([q[3] for q in _qD]) + qpr = np.array([q[4] for q in _qD]) + qback = np.array([q[5] for q in _qD]) + qratio = np.array([q[6] for q in _qD]) + return xValues, qext, qsca, qabs, g, qpr, qback, qratio + + +def Mie_Lognormal(m, wavelength, geoStdDev, geoMean, numberOfParticles, nMedium=1.0, numberOfBins=10000, lower=1, + upper=1000, gamma=[1], returnDistribution=False, decomposeMultimodal=False, asDict=False): + # http://pymiescatt.readthedocs.io/en/latest/forward.html#Mie_Lognormal + nMedium = nMedium.real + m /= nMedium + wavelength /= nMedium + ithPart = lambda gammai, dp, dpgi, sigmagi: (gammai / (np.sqrt(2 * np.pi) * np.log(sigmagi) * dp)) * np.exp( + -(np.log(dp) - np.log(dpgi)) ** 2 / (2 * np.log(sigmagi) ** 2)) + dp = np.logspace(np.log10(lower), np.log10(upper), numberOfBins) + if all([type(x) in [list, tuple, np.ndarray] for x in [geoStdDev, geoMean]]): + # multimodal + if len(gamma) == 1 and (len(geoStdDev) == len(geoMean) > 1): + # gamma is distributed equally among modes + gamma = [1 for x in geoStdDev] + gamma = [float(x / np.sum(gamma)) for x in gamma] + ndpi = [numberOfParticles * ithPart(g, dp, dpg, sg) for g, dpg, sg in zip(gamma, geoMean, geoStdDev)] + ndp = np.sum(ndpi, axis=0) + elif len(gamma) == len(geoStdDev) == len(geoMean): + # gamma is fully specified for each mode + gamma = [float(x / np.sum(gamma)) for x in gamma] + ndpi = [numberOfParticles * ithPart(g, dp, dpg, sg) for g, dpg, sg in zip(gamma, geoMean, geoStdDev)] + ndp = np.sum(ndpi, axis=0) + else: + # user fucked up + warnings.warn("Not enough parameters to fully specify each mode.") + return None + else: + # unimodal + decomposeMultimodal = False + ndp = numberOfParticles * ithPart(1, dp, geoMean, geoStdDev) + if ndp[-1] > np.max(ndp) / 100 or ndp[0] > np.max(ndp) / 100: + warnings.warn( + "Warning: distribution may not be compact on the specified interval. Consider using a higher upper bound.") + Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio = Mie_SD(m, wavelength, dp, ndp, SMPS=False) + if returnDistribution: + if decomposeMultimodal: + if asDict == True: + return dict(Bext=Bext, Bsca=Bsca, Babs=Babs, bigG=bigG, Bpr=Bpr, Bback=Bback, + Bratio=Bratio), dp, ndp, ndpi + else: + return Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio, dp, ndp, ndpi + else: + if asDict == True: + return dict(Bext=Bext, Bsca=Bsca, Babs=Babs, bigG=bigG, Bpr=Bpr, Bback=Bback, Bratio=Bratio), dp, ndp + else: + return Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio, dp, ndp + else: + if asDict == True: + return dict(Bext=Bext, Bsca=Bsca, Babs=Babs, bigG=bigG, Bpr=Bpr, Bback=Bback, Bratio=Bratio) + else: + return Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio diff --git a/AeroViz/dataProcess/Optical/_IMPROVE.py b/AeroViz/dataProcess/Optical/_IMPROVE.py index 1aff147..6710f4a 100644 --- a/AeroViz/dataProcess/Optical/_IMPROVE.py +++ b/AeroViz/dataProcess/Optical/_IMPROVE.py @@ -1,61 +1,59 @@ -import pickle as pkl from pathlib import Path + import numpy as np +from pandas import DataFrame, read_pickle -from pandas import date_range, concat, DataFrame, to_numeric -from AeroViz.dataProcess.core import _union_index +from AeroViz.dataProcess.core import union_index def _revised(_df_mass, _df_RH): - _df_mass, _df_RH = _union_index(_df_mass, _df_RH) - - ## fRH - with (Path(__file__).parent / 'fRH.pkl').open('rb') as f: - _fRH = pkl.load(f) - _fRH.loc[np.nan] = np.nan + _df_mass, _df_RH = union_index(_df_mass, _df_RH) - def fRH(_RH): - if _RH is not None: - _RH = _RH.mask(_RH > 95, 95).round(0) - return _fRH.loc[_RH].values.T + # fRH + with (Path(__file__).parent / 'fRH.pkl').open('rb') as f: + _fRH = read_pickle(f) + _fRH.loc[np.nan] = np.nan - return 1, 1, 1, 1 + def fRH(_RH): + if _RH is not None: + _RH = _RH.mask(_RH > 95, 95).round(0) + return _fRH.loc[_RH].values.T - ## different mode - ## mass < 20 : - ## large = mass**2/20 - ## small = mass-large - ## mass >= 20 : - ## large = mass - ## small = 0 - _df_mode = _df_mass[['AS', 'AN', 'OM']].copy() + return 1, 1, 1, 1 - _df_mass[['L_AS', 'L_AN', 'L_OM']] = _df_mode.mask(_df_mode < 20, _df_mode ** 2 / 20) - _df_mass[['S_AS', 'S_AN', 'S_OM']] = _df_mode.values - _df_mass[['L_AS', 'L_AN', 'L_OM']] + # different mode + # mass < 20 : + # large = mass**2/20 + # small = mass-large + # mass >= 20 : + # large = mass + # small = 0 + _df_mode = _df_mass[['AS', 'AN', 'OM']].copy() - ## apply IMPROVE ccoe. - def _ext_cal(_RH=None): + _df_mass[['L_AS', 'L_AN', 'L_OM']] = _df_mode.mask(_df_mode < 20, _df_mode ** 2 / 20) + _df_mass[['S_AS', 'S_AN', 'S_OM']] = _df_mode.values - _df_mass[['L_AS', 'L_AN', 'L_OM']] - _frh, _frhss, _frhs, _frhl = fRH(_RH) - _df = DataFrame(index=_df_mass.index) + # apply IMPROVE ccoe. + def _ext_cal(_RH=None): - _df['AS'] = 2.2 * _frhs * _df_mass['S_AS'] + 4.8 * _frhl * _df_mass['L_AS'] - _df['AN'] = 2.4 * _frhs * _df_mass['S_AN'] + 5.1 * _frhl * _df_mass['L_AN'] - _df['OM'] = 2.8 * _df_mass['S_OM'] + 6.1 * _frhl * _df_mass['L_OM'] - _df['Soil'] = _df_mass['Soil'] - _df['SS'] = 1.7 * _frhss * _df_mass['SS'] - _df['EC'] = 10 * _df_mass['EC'] + _frh, _frhss, _frhs, _frhl = fRH(_RH) + _df = DataFrame(index=_df_mass.index) - _df['total'] = _df.sum(axis=1) + _df['AS'] = 2.2 * _frhs * _df_mass['S_AS'] + 4.8 * _frhl * _df_mass['L_AS'] + _df['AN'] = 2.4 * _frhs * _df_mass['S_AN'] + 5.1 * _frhl * _df_mass['L_AN'] + _df['OM'] = 2.8 * _df_mass['S_OM'] + 6.1 * _frhl * _df_mass['L_OM'] + _df['Soil'] = _df_mass['Soil'] + _df['SS'] = 1.7 * _frhss * _df_mass['SS'] + _df['EC'] = 10 * _df_mass['EC'] - return _df.dropna().reindex(_df_mass.index) + _df['total'] = _df.sum(axis=1) - ## calculate - _out = {} + return _df.dropna().reindex(_df_mass.index) - _out['dry'] = _ext_cal() + # calculate + _out = {'dry': _ext_cal()} - if _df_RH is not None: - _out['wet'] = _ext_cal(_df_RH) + if _df_RH is not None: + _out['wet'] = _ext_cal(_df_RH) - return _out + return _out diff --git a/AeroViz/dataProcess/Optical/__init__.py b/AeroViz/dataProcess/Optical/__init__.py index 9674043..4bd59e0 100644 --- a/AeroViz/dataProcess/Optical/__init__.py +++ b/AeroViz/dataProcess/Optical/__init__.py @@ -1,62 +1,47 @@ -from ..core import _writter, _run_process +from ..core import Writer, run_process -__all__ = [ +__all__ = ['Optical'] - 'Optical', -] +class Optical(Writer): + @run_process('Optical - scaCoe', 'scaCoe') + def scaCoe(self, df_sca, instru, specified_band): + from ._scattering import _scaCoe -class Optical(_writter): + out = _scaCoe(df_sca, instru=instru, specified_band=[550] if specified_band is None else specified_band) - ## scatter - @_run_process('Optical - SAE', 'SAE') - def SAE(self, df_sca): - from ._scattering import _SAE + return self, out - out = _SAE(df_sca) + @run_process('Optical - absCoe', 'absCoe') + def absCoe(self, df_ae33, instru, specified_band): + from ._absorption import _absCoe - return self, out + out = _absCoe(df_ae33, instru=instru, specified_band=[550] if specified_band is None else specified_band) - ## absorption - @_run_process('Optical - absCoe', 'absCoe') - def absCoe(self, df_ae33, abs_band=[550]): - from ._absorption import _absCoe + return self, out - out = _absCoe(df_ae33, abs_band) + @run_process('Optical - basic', 'opt_basic') + def basic(self, df_sca, df_abs, df_mass=None, df_no2=None, df_temp=None): + from ._extinction import _basic - return self, out + out = _basic(df_sca, df_abs, df_mass, df_no2, df_temp) - @_run_process('Optical - AAE', 'AAE') - def AAE(self, df_abs): - from ._absorption import _AAE + return self, out - out = _AAE(df_abs) + @run_process('Optical - Mie', 'Mie') + def Mie(self, df_psd, df_m, wave_length=550): + from ._mie import _mie - return self, out + out = _mie(df_psd, df_m, wave_length) - ## extinction - @_run_process('Optical - basic', 'opt_basic') - def basic(self, df_abs, df_sca, df_ec=None, df_mass=None, df_no2=None): - from ._extinction import _basic + return self, out - out = _basic(df_abs, df_sca, df_ec, df_mass, df_no2) + @run_process('Optical - IMPROVE', 'IMPROVE') + def IMPROVE(self, df_mass, df_RH, method='revised'): + # _fc = __import__(f'_IMPROVE._{method}') + from ._IMPROVE import _revised - return self, out + out = _revised(df_mass, df_RH) - @_run_process('Optical - Mie', 'Mie') - def Mie(self, df_psd, df_m, wave_length=550): - from ._mie import _mie - - out = _mie(df_psd, df_m, wave_length) - - return self, out - - @_run_process('Optical - IMPROVE', 'IMPROVE') - def IMPROVE(self, df_mass, df_RH, method='revised'): - # _fc = __import__(f'_IMPROVE._{method}') - from ._IMPROVE import _revised - - out = _revised(df_mass, df_RH) - - return self, out + return self, out diff --git a/AeroViz/dataProcess/Optical/_absorption.py b/AeroViz/dataProcess/Optical/_absorption.py index b0c68a7..452a776 100644 --- a/AeroViz/dataProcess/Optical/_absorption.py +++ b/AeroViz/dataProcess/Optical/_absorption.py @@ -1,54 +1,28 @@ -def _absCoe(df, abs_band): - import numpy as n - from scipy.optimize import curve_fit +def _absCoe(df, instru, specified_band: list): + import numpy as np + from pandas import concat + from .Angstrom_exponent import get_Angstrom_exponent, get_species_wavelength - band = n.array([370, 470, 520, 590, 660, 880, 950]) + band_AE33 = np.array([370, 470, 520, 590, 660, 880, 950]) + band_BC1054 = np.array([370, 430, 470, 525, 565, 590, 660, 700, 880, 950]) - df_out = {} + MAE_AE33 = np.array([18.47, 14.54, 13.14, 11.58, 10.35, 7.77, 7.19]) * 1e-3 + MAE_BC1054 = np.array([18.48, 15.90, 14.55, 13.02, 12.10, 11.59, 10.36, 9.77, 7.77, 7.20]) * 1e-3 - def _get_slope(__df): - func = lambda _x, _sl, _int: _sl * _x + _int - popt, pcov = curve_fit(func, band, __df.values) + band = band_AE33 if instru == 'AE33' else band_BC1054 + MAE = MAE_AE33 if instru == 'AE33' else MAE_BC1054 + eBC = 'BC6' if instru == 'AE33' else 'BC9' - return func(n.array(abs_band), *popt) + # calculate + df_abs = (df.copy().dropna() * MAE).copy() - MAE = n.array([18.47, 14.54, 13.14, 11.58, 10.35, 7.77, 7.19]) * 1e-3 - df_abs = (df.copy() * MAE).dropna().copy() + df_out = df_abs.apply(get_species_wavelength, axis=1, result_type='expand', args=(specified_band,)) + df_out.columns = [f'abs_{_band}' for _band in specified_band] + df_out['eBC'] = df[eBC] - df_out = df_abs.apply(_get_slope, axis=1, result_type='expand').reindex(df.index) - df_out.columns = [f'abs_{_band}' for _band in abs_band] + df_AAE = df_abs.apply(get_Angstrom_exponent, axis=1, result_type='expand', args=(band,)) + df_AAE.columns = ['AAE', 'AAE_intercept'] + df_AAE = df_AAE.mask((-df_AAE['AAE'] < 0.8) | (-df_AAE['AAE'] > 2.)).copy() - df_out['eBC'] = df['BC6'] - - return df_out - - -def _AAE(df): - import numpy as n - from scipy.optimize import curve_fit - - def _AAEcalc(_df): - ## parameter - MAE = n.array([18.47, 14.54, 13.14, 11.58, 10.35, 7.77, 7.19]) * 1e-3 - band = n.array([370, 470, 520, 590, 660, 880, 950]) - _df *= MAE - - ## 7 pts fitting - ## function - def _get_slope(__df): - func = lambda _x, _sl, _int: _sl * _x + _int - popt, pcov = curve_fit(func, n.log(band), n.log(__df)) - - return popt - - ## calculate - _AAE = _df.apply(_get_slope, axis=1, result_type='expand') - _AAE.columns = ['slope', 'intercept'] - - return _AAE - - df_out = _AAEcalc(df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].dropna()) - df_out = df_out.mask((-df_out.slope < 0.8) | (-df_out.slope > 2.)).copy() - - df_out['eBC'] = df['BC6'] - return df_out.reindex(df.index) + _df = concat([df_out, df_AAE['AAE']], axis=1) + return _df.reindex(df.index) diff --git a/AeroViz/dataProcess/Optical/_extinction.py b/AeroViz/dataProcess/Optical/_extinction.py index e6d0bb8..ae6a39b 100644 --- a/AeroViz/dataProcess/Optical/_extinction.py +++ b/AeroViz/dataProcess/Optical/_extinction.py @@ -1,36 +1,42 @@ -from AeroViz.dataProcess.core import _union_index from pandas import DataFrame +from AeroViz.dataProcess.core import union_index -def _basic(df_abs, df_sca, df_ec, df_mass, df_no2): - df_abs, df_sca, df_ec, df_mass, df_no2 = _union_index(df_abs, df_sca, df_ec, df_mass, df_no2) - df_out = DataFrame() +def _basic(df_sca, df_abs, df_mass, df_no2, df_temp): + df_sca, df_abs, df_mass, df_no2, df_temp = union_index(df_sca, df_abs, df_mass, df_no2, df_temp) - ## abs and sca coe - df_out['abs'] = df_abs.copy() - df_out['sca'] = df_sca.copy() + df_out = DataFrame() - ## extinction coe. - df_out['ext'] = df_out['abs'] + df_out['sca'] + # abs and sca coe + df_out['abs'] = df_abs['abs_550'].copy() + df_out['sca'] = df_sca['sca_550'].copy() - ## SSA - df_out['SSA'] = df_out['sca'] / df_out['ext'] + # extinction coe. + df_out['ext'] = df_out['abs'] + df_out['sca'] - ## MAE, MSE, MEE - if df_mass is not None: - df_out['MAE'] = df_out['abs'] / df_mass - df_out['MSE'] = df_out['sca'] / df_mass - df_out['MEE'] = df_out['MSE'] + df_out['MAE'] + # SSA + df_out['SSA'] = df_out['sca'] / df_out['ext'] - ## gas absorbtion - if df_no2 is not None: - df_out['abs_gas'] = df_no2 * .33 - df_out['sca_gas'] = 10 - df_out['ext_all'] = df_out['ext'] + df_out['abs_gas'] + df_out['sca_gas'] + # SAE, AAE, eBC + df_out['SAE'] = df_sca['SAE'].copy() + df_out['AAE'] = df_abs['AAE'].copy() + df_out['eBC'] = df_abs['eBC'].copy() / 1e3 - ## other - if df_ec is not None: - df_out['eBC'] = df_ec / 1e3 + # MAE, MSE, MEE + if df_mass is not None: + df_out['MAE'] = df_out['abs'] / df_mass + df_out['MSE'] = df_out['sca'] / df_mass + df_out['MEE'] = df_out['MSE'] + df_out['MAE'] - return df_out + # gas absorbtion + if df_no2 is not None: + df_out['abs_gas'] = df_no2 * .33 + + if df_temp is not None: + df_out['sca_gas'] = (11.4 * 293 / (273 + df_temp)) + + if df_no2 is not None and df_temp is not None: + df_out['ext_all'] = df_out['ext'] + df_out['abs_gas'] + df_out['sca_gas'] + + return df_out diff --git a/AeroViz/dataProcess/Optical/_mie.py b/AeroViz/dataProcess/Optical/_mie.py index 5cf2894..c62f24e 100644 --- a/AeroViz/dataProcess/Optical/_mie.py +++ b/AeroViz/dataProcess/Optical/_mie.py @@ -1,16 +1,14 @@ # from PyMieScatt import Mie_SD -# from PyMieScatt import Mie_SD from ._mie_sd import Mie_SD -from pandas import date_range, concat, DataFrame, to_numeric def _mie(_psd_ori, _RI_ori, _wave): - _ori_idx = _psd_ori.index.copy() - _cal_idx = _psd_ori.loc[_RI_ori.dropna().index].dropna(how='all').index + _ori_idx = _psd_ori.index.copy() + _cal_idx = _psd_ori.loc[_RI_ori.dropna().index].dropna(how='all').index - _psd, _RI = _psd_ori.loc[_cal_idx], _RI_ori.loc[_cal_idx] + _psd, _RI = _psd_ori.loc[_cal_idx], _RI_ori.loc[_cal_idx] - _out = Mie_SD(_RI.values, 550, _psd) + _out = Mie_SD(_RI.values, 550, _psd) - return _out.reindex(_ori_idx) + return _out.reindex(_ori_idx) diff --git a/AeroViz/dataProcess/Optical/_mie_sd.py b/AeroViz/dataProcess/Optical/_mie_sd.py index 5346968..e59c0a4 100644 --- a/AeroViz/dataProcess/Optical/_mie_sd.py +++ b/AeroViz/dataProcess/Optical/_mie_sd.py @@ -1,143 +1,142 @@ # -*- coding: utf-8 -*- # http://pymiescatt.readthedocs.io/en/latest/forward.html import numpy as np +from pandas import concat, DataFrame from scipy.integrate import trapezoid from scipy.special import jv, yv -import warnings -from pandas import date_range, concat, DataFrame, to_numeric, to_datetime, Series def coerceDType(d): - if type(d) is not np.ndarray: - return np.array(d) - else: - return d + if type(d) is not np.ndarray: + return np.array(d) + else: + return d def Mie_ab(m, x, nmax, df_n): - nu = df_n.copy() + 0.5 - n1 = 2 * df_n.copy() + 1 + nu = df_n.copy() + 0.5 + n1 = 2 * df_n.copy() + 1 - sx = np.sqrt(0.5 * np.pi * x) - px = sx.reshape(-1, 1) * jv(nu, x.reshape(-1, 1)) - chx = -sx.reshape(-1, 1) * yv(nu, x.reshape(-1, 1)) + sx = np.sqrt(0.5 * np.pi * x) + px = sx.reshape(-1, 1) * jv(nu, x.reshape(-1, 1)) + chx = -sx.reshape(-1, 1) * yv(nu, x.reshape(-1, 1)) - p1x = concat([DataFrame(np.sin(x)), px.mask(df_n == nmax.reshape(-1, 1))], axis=1) - p1x.columns = np.arange(len(p1x.keys())) - p1x = p1x[df_n.keys()] + p1x = concat([DataFrame(np.sin(x)), px.mask(df_n == nmax.reshape(-1, 1))], axis=1) + p1x.columns = np.arange(len(p1x.keys())) + p1x = p1x[df_n.keys()] - ch1x = concat([DataFrame(np.cos(x)), chx.mask(df_n == nmax.reshape(-1, 1))], axis=1) - ch1x.columns = np.arange(len(ch1x.keys())) - ch1x = ch1x[df_n.keys()] + ch1x = concat([DataFrame(np.cos(x)), chx.mask(df_n == nmax.reshape(-1, 1))], axis=1) + ch1x.columns = np.arange(len(ch1x.keys())) + ch1x = ch1x[df_n.keys()] - gsx = px - (0 + 1j) * chx - gs1x = p1x - (0 + 1j) * ch1x + gsx = px - (0 + 1j) * chx + gs1x = p1x - (0 + 1j) * ch1x - mx = m.reshape(-1, 1) * x - nmx = np.round(np.max(np.hstack([[nmax] * m.size, np.abs(mx)]).reshape(m.size, 2, -1), axis=1) + 16) + mx = m.reshape(-1, 1) * x + nmx = np.round(np.max(np.hstack([[nmax] * m.size, np.abs(mx)]).reshape(m.size, 2, -1), axis=1) + 16) - df_qext = DataFrame(columns=m, index=df_n.index) - df_qsca = DataFrame(columns=m, index=df_n.index) + df_qext = DataFrame(columns=m, index=df_n.index) + df_qsca = DataFrame(columns=m, index=df_n.index) - df_n /= x.reshape(-1, 1) - for _bin_idx, (_nmx_ary, _mx, _nmax) in enumerate(zip(nmx.T, mx.T, nmax)): + df_n /= x.reshape(-1, 1) + for _bin_idx, (_nmx_ary, _mx, _nmax) in enumerate(zip(nmx.T, mx.T, nmax)): - df_D = DataFrame(np.nan, index=np.arange(m.size), columns=df_n.keys()) + df_D = DataFrame(np.nan, index=np.arange(m.size), columns=df_n.keys()) - Dn_lst = [] - for _nmx, _uni_idx in DataFrame(_nmx_ary).groupby(0).groups.items(): + Dn_lst = [] + for _nmx, _uni_idx in DataFrame(_nmx_ary).groupby(0).groups.items(): - _inv_mx = 1 / _mx[_uni_idx] + _inv_mx = 1 / _mx[_uni_idx] - Dn = np.zeros((_uni_idx.size, int(_nmx)), dtype=complex) - for _idx in range(int(_nmx) - 1, 1, -1): - Dn[:, _idx - 1] = (_idx * _inv_mx) - (1 / (Dn[:, _idx] + _idx * _inv_mx)) + Dn = np.zeros((_uni_idx.size, int(_nmx)), dtype=complex) + for _idx in range(int(_nmx) - 1, 1, -1): + Dn[:, _idx - 1] = (_idx * _inv_mx) - (1 / (Dn[:, _idx] + _idx * _inv_mx)) - Dn_lst.append(Dn[:, 1: int(_nmax) + 1]) - df_D.loc[_uni_idx, 0: int(_nmax) - 1] = Dn[:, 1: int(_nmax) + 1] + Dn_lst.append(Dn[:, 1: int(_nmax) + 1]) + df_D.loc[_uni_idx, 0: int(_nmax) - 1] = Dn[:, 1: int(_nmax) + 1] - ## other parameter - _df_n, _px, _p1x, _gsx, _gs1x, _n1 = df_n.loc[_bin_idx], px.loc[_bin_idx], p1x.loc[_bin_idx], gsx.loc[_bin_idx], \ - gs1x.loc[_bin_idx], n1.loc[_bin_idx].values + ## other parameter + _df_n, _px, _p1x, _gsx, _gs1x, _n1 = df_n.loc[_bin_idx], px.loc[_bin_idx], p1x.loc[_bin_idx], gsx.loc[_bin_idx], \ + gs1x.loc[_bin_idx], n1.loc[_bin_idx].values - _da = df_D / m.reshape(-1, 1) + _df_n - _db = df_D * m.reshape(-1, 1) + _df_n + _da = df_D / m.reshape(-1, 1) + _df_n + _db = df_D * m.reshape(-1, 1) + _df_n - _an = (_da * _px - _p1x) / (_da * _gsx - _gs1x) - _bn = (_db * _px - _p1x) / (_db * _gsx - _gs1x) + _an = (_da * _px - _p1x) / (_da * _gsx - _gs1x) + _bn = (_db * _px - _p1x) / (_db * _gsx - _gs1x) - _real_an, _real_bn = np.real(_an), np.real(_bn) - _imag_an, _imag_bn = np.imag(_an), np.imag(_bn) + _real_an, _real_bn = np.real(_an), np.real(_bn) + _imag_an, _imag_bn = np.imag(_an), np.imag(_bn) - _pr_qext = np.nansum(_n1 * (_real_an + _real_bn), axis=1) - _pr_qsca = np.nansum(_n1 * (_real_an ** 2 + _real_bn ** 2 + _imag_an ** 2 + _imag_bn ** 2), axis=1) + _pr_qext = np.nansum(_n1 * (_real_an + _real_bn), axis=1) + _pr_qsca = np.nansum(_n1 * (_real_an ** 2 + _real_bn ** 2 + _imag_an ** 2 + _imag_bn ** 2), axis=1) - df_qext.loc[_bin_idx] = _pr_qext - df_qsca.loc[_bin_idx] = _pr_qsca + df_qext.loc[_bin_idx] = _pr_qext + df_qsca.loc[_bin_idx] = _pr_qsca - return df_qext, df_qsca + return df_qext, df_qsca def MieQ(m_ary, wavelength, diameter): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ + # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ - x = np.pi * diameter / wavelength + x = np.pi * diameter / wavelength - nmax = np.round(2 + x + 4 * (x ** (1 / 3))) + nmax = np.round(2 + x + 4 * (x ** (1 / 3))) - df_n = DataFrame([np.arange(1, nmax.max() + 1)] * nmax.size) - df_n = df_n.mask(df_n > nmax.reshape(-1, 1)) + df_n = DataFrame([np.arange(1, nmax.max() + 1)] * nmax.size) + df_n = df_n.mask(df_n > nmax.reshape(-1, 1)) - n1 = 2 * df_n + 1 - n2 = df_n * (df_n + 2) / (df_n + 1) - n3 = n1 / (df_n * (df_n + 1)) - x2 = x ** 2 + n1 = 2 * df_n + 1 + n2 = df_n * (df_n + 2) / (df_n + 1) + n3 = n1 / (df_n * (df_n + 1)) + x2 = x ** 2 - _qext, _qsca = Mie_ab(m_ary, x, nmax, df_n) + _qext, _qsca = Mie_ab(m_ary, x, nmax, df_n) - qext = (2 / x2).reshape(-1, 1) * _qext - qsca = (2 / x2).reshape(-1, 1) * _qsca + qext = (2 / x2).reshape(-1, 1) * _qext + qsca = (2 / x2).reshape(-1, 1) * _qsca - # return qext.astype(float).values.T, qsca.astype(float).values.T, - return qext.values.T.astype(float), qsca.values.T.astype(float) + # return qext.astype(float).values.T, qsca.astype(float).values.T, + return qext.values.T.astype(float), qsca.values.T.astype(float) def Mie_SD(m_ary, wavelength, psd, multp_m_in1psd=False, dt_chunk_size=10, q_table=False): - m_ary = coerceDType(m_ary) - if type(psd) is not DataFrame: - psd = DataFrame(psd).T + m_ary = coerceDType(m_ary) + if type(psd) is not DataFrame: + psd = DataFrame(psd).T - if (len(m_ary) != len(psd)) & ~multp_m_in1psd: - raise ValueError('"m array" size should be same as "psd" size') + if (len(m_ary) != len(psd)) & ~multp_m_in1psd: + raise ValueError('"m array" size should be same as "psd" size') - dp = psd.keys().values - ndp = psd.values - aSDn = np.pi * ((dp / 2) ** 2) * ndp * 1e-6 + dp = psd.keys().values + ndp = psd.values + aSDn = np.pi * ((dp / 2) ** 2) * ndp * 1e-6 - if q_table: - qext, qsca = q_table - else: - qext, qsca = MieQ(m_ary, wavelength, dp) + if q_table: + qext, qsca = q_table + else: + qext, qsca = MieQ(m_ary, wavelength, dp) - if multp_m_in1psd: - # print('\tcalculate ext') + if multp_m_in1psd: + # print('\tcalculate ext') - aSDn_all = np.repeat(aSDn, m_ary.size, axis=0).reshape(len(aSDn), m_ary.size, -1) + aSDn_all = np.repeat(aSDn, m_ary.size, axis=0).reshape(len(aSDn), m_ary.size, -1) - qext_all = np.repeat(qext[np.newaxis, :, :], len(aSDn), axis=0).reshape(*aSDn_all.shape) - qsca_all = np.repeat(qsca[np.newaxis, :, :], len(aSDn), axis=0).reshape(*aSDn_all.shape) + qext_all = np.repeat(qext[np.newaxis, :, :], len(aSDn), axis=0).reshape(*aSDn_all.shape) + qsca_all = np.repeat(qsca[np.newaxis, :, :], len(aSDn), axis=0).reshape(*aSDn_all.shape) - df_ext = DataFrame(trapezoid(aSDn_all * qext_all), columns=m_ary, index=psd.index).astype(float) - df_sca = DataFrame(trapezoid(aSDn_all * qsca_all), columns=m_ary, index=psd.index).astype(float) - df_abs = df_ext - df_sca - # print('\tdone') + df_ext = DataFrame(trapezoid(aSDn_all * qext_all), columns=m_ary, index=psd.index).astype(float) + df_sca = DataFrame(trapezoid(aSDn_all * qsca_all), columns=m_ary, index=psd.index).astype(float) + df_abs = df_ext - df_sca + # print('\tdone') - return dict(ext=df_ext, sca=df_sca, abs=df_abs) + return dict(ext=df_ext, sca=df_sca, abs=df_abs) - else: - df_out = DataFrame(index=psd.index) - df_out['ext'] = trapezoid(qext * aSDn).astype(float) - df_out['sca'] = trapezoid(qsca * aSDn).astype(float) - df_out['abs'] = df_out['ext'] - df_out['sca'] + else: + df_out = DataFrame(index=psd.index) + df_out['ext'] = trapezoid(qext * aSDn).astype(float) + df_out['sca'] = trapezoid(qsca * aSDn).astype(float) + df_out['abs'] = df_out['ext'] - df_out['sca'] - return df_out + return df_out diff --git a/AeroViz/dataProcess/Optical/_scattering.py b/AeroViz/dataProcess/Optical/_scattering.py index 907fac2..4654719 100644 --- a/AeroViz/dataProcess/Optical/_scattering.py +++ b/AeroViz/dataProcess/Optical/_scattering.py @@ -1,30 +1,29 @@ import numpy as np -from scipy.optimize import curve_fit +from pandas import concat -__all__ = [ - '_SAE', -] +__all__ = ['_scaCoe'] -def _SAE(df): - def _SAEcalc(_df): - ## parameter - band = np.array([450, 550, 700]) * 1e-3 +def _scaCoe(df, instru, specified_band: list): + from .Angstrom_exponent import get_Angstrom_exponent, get_species_wavelength + band_Neph = np.array([450, 550, 700]) + band_Aurora = np.array([450, 525, 635]) - ## 3 pts fitting - ## function - def _get_slope(__df): - func = lambda _x, _sl, _int: _sl * _x + _int - popt, pcov = curve_fit(func, np.log(band), np.log(__df)) + band = band_Neph if instru == 'Neph' else band_Aurora - return popt + df_sca = df.copy().dropna() - ## calculate - _SAE = _df.apply(_get_slope, axis=1, result_type='expand') - _SAE.columns = ['slope', 'intercept'] + if instru == 'Neph': + df_out = df_sca[['B']].copy() + df_out.columns = [f'sca_{_band}' for _band in specified_band] + else: + df_out = df_sca.apply(get_species_wavelength, axis=1, result_type='expand', args=(specified_band,)) + df_out.columns = [f'sca_{_band}' for _band in specified_band] - return _SAE + # calculate + df_SAE = df[['B', 'G', 'R']].dropna().apply(get_Angstrom_exponent, axis=1, result_type='expand', args=(band,)) + df_SAE.columns = ['SAE', 'SAE_intercept'] - df_out = _SAEcalc(df[['B', 'G', 'R']].dropna()) + _df = concat([df_out, df_SAE['SAE']], axis=1) - return df_out.reindex(df.index) + return _df.reindex(df.index) diff --git a/AeroViz/dataProcess/Optical/mie_theory.py b/AeroViz/dataProcess/Optical/mie_theory.py new file mode 100644 index 0000000..44f2c7d --- /dev/null +++ b/AeroViz/dataProcess/Optical/mie_theory.py @@ -0,0 +1,260 @@ +from typing import Sequence, Literal + +import numpy as np +import pandas as pd +from numpy import exp, log, log10, sqrt, pi + +from .PyMieScatt_update import AutoMieQ + + +def Mie_Q(m: complex, + wavelength: float, + dp: float | Sequence[float] + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Calculate Mie scattering efficiency (Q) for given spherical particle diameter(s). + + Parameters + ---------- + m : complex + The complex refractive index of the particles. + wavelength : float + The wavelength of the incident light (in nm). + dp : float | Sequence[float] + Particle diameters (in nm), can be a single value or Sequence object. + + Returns + ------- + Q_ext : ndarray + The Mie extinction efficiency for each particle diameter. + Q_sca : ndarray + The Mie scattering efficiency for each particle diameter. + Q_abs : ndarray + The Mie absorption efficiency for each particle diameter. + + Examples + -------- + >>> Q_ext, Q_sca, Q_abs = Mie_Q(m=complex(1.5, 0.02), wavelength=550, dp=[100, 200, 300, 400]) + """ + # Ensure dp is a numpy array + dp = np.atleast_1d(dp) + + # Transpose for proper unpacking + Q_ext, Q_sca, Q_abs, g, Q_pr, Q_back, Q_ratio = np.array([AutoMieQ(m, wavelength, _dp) for _dp in dp]).T + + return Q_ext, Q_sca, Q_abs + + +def Mie_MEE(m: complex, + wavelength: float, + dp: float | Sequence[float], + density: float + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Calculate mass extinction efficiency and other parameters. + + Parameters + ---------- + m : complex + The complex refractive index of the particles. + wavelength : float + The wavelength of the incident light. + dp : float | Sequence[float] + List of particle sizes or a single value. + density : float + The density of particles. + + Returns + ------- + MEE : ndarray + The mass extinction efficiency for each particle diameter. + MSE : ndarray + The mass scattering efficiency for each particle diameter. + MAE : ndarray + The mass absorption efficiency for each particle diameter. + + Examples + -------- + >>> MEE, MSE, MAE = Mie_MEE(m=complex(1.5, 0.02), wavelength=550, dp=[100, 200, 300, 400], density=1.2) + """ + Q_ext, Q_sca, Q_abs = Mie_Q(m, wavelength, dp) + + MEE = (3 * Q_ext) / (2 * density * dp) * 1000 + MSE = (3 * Q_sca) / (2 * density * dp) * 1000 + MAE = (3 * Q_abs) / (2 * density * dp) * 1000 + + return MEE, MSE, MAE + + +def Mie_PESD(m: complex, + wavelength: float = 550, + dp: float | Sequence[float] = None, + ndp: float | Sequence[float] = None, + lognormal: bool = False, + dp_range: tuple = (1, 2500), + geoMean: float = 200, + geoStdDev: float = 2, + numberOfParticles: float = 1e6, + numberOfBins: int = 167, + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Simultaneously calculate "extinction distribution" and "integrated results" using the Mie_Q method. + + Parameters + ---------- + m : complex + The complex refractive index of the particles. + wavelength : float + The wavelength of the incident light. + dp : float | Sequence[float] + Particle sizes. + ndp : float | Sequence[float] + Number concentration from SMPS or APS in the units of dN/dlogdp. + lognormal : bool, optional + Whether to use lognormal distribution for ndp. Default is False. + dp_range : tuple, optional + Range of particle sizes. Default is (1, 2500) nm. + geoMean : float, optional + Geometric mean of the particle size distribution. Default is 200 nm. + geoStdDev : float, optional + Geometric standard deviation of the particle size distribution. Default is 2. + numberOfParticles : float, optional + Number of particles. Default is 1e6. + numberOfBins : int, optional + Number of bins for the lognormal distribution. Default is 167. + + Returns + ------- + ext_dist : ndarray + The extinction distribution for the given data. + sca_dist : ndarray + The scattering distribution for the given data. + abs_dist : ndarray + The absorption distribution for the given data. + + Notes + ----- + return in "dext/dlogdp", please make sure input the dNdlogdp data. + + Examples + -------- + >>> Ext, Sca, Abs = Mie_PESD(m=complex(1.5, 0.02), wavelength=550, dp=[100, 200, 500, 1000], ndp=[100, 50, 30, 20]) + """ + if lognormal: + dp = np.logspace(log10(dp_range[0]), log10(dp_range[1]), numberOfBins) + + ndp = numberOfParticles * (1 / (log(geoStdDev) * sqrt(2 * pi)) * + exp(-(log(dp) - log(geoMean)) ** 2 / (2 * log(geoStdDev) ** 2))) + + # dN / dlogdp + ndp = np.atleast_1d(ndp) + + Q_ext, Q_sca, Q_abs = Mie_Q(m, wavelength, dp) + + # The 1e-6 here is so that the final value is the same as the unit 1/10^6m. + Ext = Q_ext * (pi / 4 * dp ** 2) * ndp * 1e-6 + Sca = Q_sca * (pi / 4 * dp ** 2) * ndp * 1e-6 + Abs = Q_abs * (pi / 4 * dp ** 2) * ndp * 1e-6 + + return Ext, Sca, Abs + + +def internal(dist: pd.Series, + dp: float | Sequence[float], + wavelength: float = 550, + result_type: Literal['extinction', 'scattering', 'absorption'] = 'extinction' + ) -> np.ndarray: + """ + Calculate the extinction distribution by internal mixing model. + + Parameters + ---------- + dist : pd.Series + Particle size distribution data. + dp : float | Sequence[float] + Diameter(s) of the particles, either a single value or a sequence. + wavelength : float, optional + Wavelength of the incident light, default is 550 nm. + result_type : {'extinction', 'scattering', 'absorption'}, optional + Type of result to calculate, defaults to 'extinction'. + + Returns + ------- + np.ndarray + Extinction distribution calculated based on the internal mixing model. + """ + ext_dist, sca_dist, abs_dist = Mie_PESD(m=complex(dist['n_amb'], dist['k_amb']), + wavelength=wavelength, + dp=dp, + ndp=np.array(dist[:np.size(dp)])) + + if result_type == 'extinction': + return ext_dist + elif result_type == 'scattering': + return sca_dist + else: + return abs_dist + + +# return dict(ext=ext_dist, sca=sca_dist, abs=abs_dist) + + +def external(dist: pd.Series, + dp: float | Sequence[float], + wavelength: float = 550, + result_type: Literal['extinction', 'scattering', 'absorption'] = 'extinction' + ) -> np.ndarray: + """ + Calculate the extinction distribution by external mixing model. + + Parameters + ---------- + dist : pd.Series + Particle size distribution data. + dp : float | Sequence[float] + Diameter(s) of the particles, either a single value or a sequence. + wavelength : float, optional + Wavelength of the incident light, default is 550 nm. + result_type : {'extinction', 'scattering', 'absorption'}, optional + Type of result to calculate, defaults to 'extinction'. + + Returns + ------- + np.ndarray + Extinction distribution calculated based on the external mixing model. + """ + refractive_dic = {'AS_volume_ratio': complex(1.53, 0.00), + 'AN_volume_ratio': complex(1.55, 0.00), + 'OM_volume_ratio': complex(1.54, 0.00), + 'Soil_volume_ratio': complex(1.56, 0.01), + 'SS_volume_ratio': complex(1.54, 0.00), + 'EC_volume_ratio': complex(1.80, 0.54), + 'ALWC_volume_ratio': complex(1.33, 0.00)} + + ndp = np.array(dist[:np.size(dp)]) + mie_results = ( + Mie_PESD(refractive_dic[_specie], wavelength, dp, dist[_specie] / (1 + dist['ALWC_volume_ratio']) * ndp) for + _specie in refractive_dic) + + ext_dist, sca_dist, abs_dist = (np.sum([res[0] for res in mie_results], axis=0), + np.sum([res[1] for res in mie_results], axis=0), + np.sum([res[2] for res in mie_results], axis=0)) + + if result_type == 'extinction': + return ext_dist + elif result_type == 'scattering': + return sca_dist + else: + return abs_dist + + +def core_shell(): + pass + + +def sensitivity(): + pass + + +if __name__ == '__main__': + result = Mie_Q(m=complex(1.5, 0.02), wavelength=550, dp=[100., 200.]) diff --git a/AeroViz/dataProcess/SizeDistr/__init__.py b/AeroViz/dataProcess/SizeDistr/__init__.py index bcd271c..5d483b1 100644 --- a/AeroViz/dataProcess/SizeDistr/__init__.py +++ b/AeroViz/dataProcess/SizeDistr/__init__.py @@ -1,61 +1,57 @@ -from ..core import _writter, _run_process +from ..core import Writer, run_process -__all__ = [ +__all__ = ['SizeDistr'] - 'SizeDistr', -] +class SizeDistr(Writer): + # basic + @run_process('SizeDistr - basic', 'distr_basic') + def basic(self, df, hybrid_bin_start_loc=None, unit='nm', bin_range=(0, 20000), input_type='norm'): + from ._size_distr import _basic -class SizeDistr(_writter): + out = _basic(df, hybrid_bin_start_loc, unit, bin_range, input_type) - ## basic - @_run_process('SizeDistr - basic', 'distr_basic') - def basic(self, df, hybrid_bin_start_loc=None, unit='nm', bin_range=(0, 20000), input_type='norm'): - from ._size_distr import _basic + return self, out - out = _basic(df, hybrid_bin_start_loc, unit, bin_range, input_type) + # merge + @run_process('SizeDistr - merge_SMPS_APS_v4', 'distr_merge') + def merge_SMPS_APS_v4(self, df_smps, df_aps, df_pm25, aps_unit='um', + smps_overlap_lowbound=500, aps_fit_highbound=1000, dndsdv_alg=True, + times_range=(0.8, 1.25, .05)): + from ._merge_v4 import merge_SMPS_APS - return self, out + out = merge_SMPS_APS(df_smps, df_aps, df_pm25, aps_unit, smps_overlap_lowbound, aps_fit_highbound, dndsdv_alg, + times_range) - ## merge - @_run_process('SizeDistr - merge_SMPS_APS_v4', 'distr_merge') - def merge_SMPS_APS_v4(self, df_smps, df_aps, df_pm25, aps_unit='um', - smps_overlap_lowbound=500, aps_fit_highbound=1000, dndsdv_alg=True, - times_range=(0.8, 1.25, .05)): - from ._merge_v4 import merge_SMPS_APS + return self, out - out = merge_SMPS_APS(df_smps, df_aps, df_pm25, aps_unit, smps_overlap_lowbound, aps_fit_highbound, dndsdv_alg, - times_range) + # merge + @run_process('SizeDistr - merge_SMPS_APS_v3', 'distr_merge') + def merge_SMPS_APS_v3(self, df_smps, df_aps, aps_unit='um', + smps_overlap_lowbound=500, aps_fit_highbound=1000, dndsdv_alg=True): + from ._merge_v3 import merge_SMPS_APS - return self, out + out = merge_SMPS_APS(df_smps, df_aps, aps_unit, smps_overlap_lowbound, aps_fit_highbound, dndsdv_alg) - ## merge - @_run_process('SizeDistr - merge_SMPS_APS_v3', 'distr_merge') - def merge_SMPS_APS_v3(self, df_smps, df_aps, aps_unit='um', - smps_overlap_lowbound=500, aps_fit_highbound=1000, dndsdv_alg=True): - from ._merge_v3 import merge_SMPS_APS + return self, out - out = merge_SMPS_APS(df_smps, df_aps, aps_unit, smps_overlap_lowbound, aps_fit_highbound, dndsdv_alg) + # merge + @run_process('SizeDistr - merge_SMPS_APS_v2', 'distr_merge') + def merge_SMPS_APS_v2(self, df_smps, df_aps, aps_unit='um', + smps_overlap_lowbound=500, aps_fit_highbound=1000): + from ._merge_v2 import merge_SMPS_APS - return self, out + out = merge_SMPS_APS(df_smps, df_aps, aps_unit, smps_overlap_lowbound, aps_fit_highbound) - ## merge - @_run_process('SizeDistr - merge_SMPS_APS_v2', 'distr_merge') - def merge_SMPS_APS_v2(self, df_smps, df_aps, aps_unit='um', - smps_overlap_lowbound=500, aps_fit_highbound=1000): - from ._merge_v2 import merge_SMPS_APS + return self, out - out = merge_SMPS_APS(df_smps, df_aps, aps_unit, smps_overlap_lowbound, aps_fit_highbound) + # merge + @run_process('SizeDistr - merge_SMPS_APS_v1', 'distr_merge') + def merge_SMPS_APS(self, df_smps, df_aps, aps_unit='um', shift_mode='mobility', + smps_overlap_lowbound=523, aps_fit_highbound=800): + from ._merge_v1 import _merge_SMPS_APS - return self, out + out = _merge_SMPS_APS(df_smps, df_aps, aps_unit, shift_mode, smps_overlap_lowbound, aps_fit_highbound) - ## merge - @_run_process('SizeDistr - merge_SMPS_APS_v1', 'distr_merge') - def merge_SMPS_APS(self, df_smps, df_aps, aps_unit='um', shift_mode='mobility', - smps_overlap_lowbound=523, aps_fit_highbound=800): - from ._merge_v1 import _merge_SMPS_APS - - out = _merge_SMPS_APS(df_smps, df_aps, aps_unit, shift_mode, smps_overlap_lowbound, aps_fit_highbound) - - return self, out + return self, out diff --git a/AeroViz/dataProcess/SizeDistr/__merge.py b/AeroViz/dataProcess/SizeDistr/__merge.py index 27b55fa..a0b2e3a 100644 --- a/AeroViz/dataProcess/SizeDistr/__merge.py +++ b/AeroViz/dataProcess/SizeDistr/__merge.py @@ -1,32 +1,33 @@ from datetime import datetime as dtm + +import numpy as np from pandas import DataFrame, to_datetime # from scipy.interpolate import interp1d from scipy.interpolate import UnivariateSpline as unvpline, interp1d -import numpy as np __all__ = ['_merge_SMPS_APS'] def __test_plot(smpsx, smps, apsx, aps, mergex, merge, mergeox, mergeo, _sh): - from matplotlib.pyplot import subplots, close, show, rcParams + from matplotlib.pyplot import subplots, close, show - ## parameter - # ''' - ## plot - fig, ax = subplots() + ## parameter + # ''' + ## plot + fig, ax = subplots() - ax.plot(smpsx, smps, c='#ff794c', label='smps', marker='o', lw=2) - ax.plot(apsx, aps, c='#4c79ff', label='aps', marker='o', lw=2) - ax.plot(mergex, merge, c='#79796a', label='merge') - # ax.plot(mergeox,mergeo,c='#111111',label='mergeo',marker='o',lw=.75) + ax.plot(smpsx, smps, c='#ff794c', label='smps', marker='o', lw=2) + ax.plot(apsx, aps, c='#4c79ff', label='aps', marker='o', lw=2) + ax.plot(mergex, merge, c='#79796a', label='merge') + # ax.plot(mergeox,mergeo,c='#111111',label='mergeo',marker='o',lw=.75) - ax.set(xscale='log', yscale='log', ) + ax.set(xscale='log', yscale='log', ) - ax.legend(framealpha=0, ) - ax.set_title((_sh ** 2)[0], fontsize=13) + ax.legend(framealpha=0, ) + ax.set_title((_sh ** 2)[0], fontsize=13) - show() - close() + show() + close() # ''' @@ -36,78 +37,78 @@ def __test_plot(smpsx, smps, apsx, aps, mergex, merge, mergeox, mergeo, _sh): ## Create a fitting func. by smps data ## return : shift factor def _overlap_fitting(_smps_ori, _aps_ori, _smps_lb, _aps_hb): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting\033[0m") - ## overlap fitting - ## parmeter - _dt_indx = _smps_ori.index + ## overlap fitting + ## parmeter + _dt_indx = _smps_ori.index - ## overlap diameter data - _aps = _aps_ori[_aps_ori.keys()[_aps_ori.keys() < _aps_hb]].copy() - _smps = _smps_ori[_smps_ori.keys()[_smps_ori.keys() > _smps_lb]].copy() + ## overlap diameter data + _aps = _aps_ori[_aps_ori.keys()[_aps_ori.keys() < _aps_hb]].copy() + _smps = _smps_ori[_smps_ori.keys()[_smps_ori.keys() > _smps_lb]].copy() - ## use SMPS data apply power law fitting - ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy - ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html - ## power law fit to SMPS num conc at upper bins to log curve + ## use SMPS data apply power law fitting + ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy + ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html + ## power law fit to SMPS num conc at upper bins to log curve - ## coefficient A, B - _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) - _smps_qc = _smps.where(_smps_qc_cond) + ## coefficient A, B + _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) + _smps_qc = _smps.where(_smps_qc_cond) - _size = _smps_qc_cond.sum(axis=1) - _size = _size.where(_size != 0.).copy() + _size = _smps_qc_cond.sum(axis=1) + _size = _size.where(_size != 0.).copy() - _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) - _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() + _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) + _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() - _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) - _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) - _coeB = _coeB.values.reshape(-1, 1) + _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) + _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) + _coeB = _coeB.values.reshape(-1, 1) - ## rebuild shift smps data by coe. A, B - ## x_shift = (y_ori/A)**(1/B) - _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) - _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) + ## rebuild shift smps data by coe. A, B + ## x_shift = (y_ori/A)**(1/B) + _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) + _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) - ## the least squares of diameter - ## the shift factor which the cklosest to 1 - _shift_factor = (_aps_shift_x.keys()._data.astype(float) / _aps_shift_x) - _shift_factor.columns = range(len(_aps_shift_x.keys())) + ## the least squares of diameter + ## the shift factor which the cklosest to 1 + _shift_factor = (_aps_shift_x.keys()._data.astype(float) / _aps_shift_x) + _shift_factor.columns = range(len(_aps_shift_x.keys())) - _dropna_idx = _shift_factor.dropna(how='all').index.copy() + _dropna_idx = _shift_factor.dropna(how='all').index.copy() - ## use the target function to get the similar aps and smps bin - ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) - ## assumption : the same diameter between smps and aps should get the same conc. + ## use the target function to get the similar aps and smps bin + ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) + ## assumption : the same diameter between smps and aps should get the same conc. - ## be sure they art in log value - _S2 = DataFrame(index=_aps_shift_x.index) - _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), - columns=_aps_shift_x.keys(), index=_aps_shift_x.index) - for _idx, _factor in _shift_factor.items(): - _smps_fit_df = _coeA * (_dia_table / _factor.to_frame().values) ** _coeB - _S2[_idx] = ((_smps_fit_df - _aps) ** 2).sum(axis=1) + ## be sure they art in log value + _S2 = DataFrame(index=_aps_shift_x.index) + _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), + columns=_aps_shift_x.keys(), index=_aps_shift_x.index) + for _idx, _factor in _shift_factor.items(): + _smps_fit_df = _coeA * (_dia_table / _factor.to_frame().values) ** _coeB + _S2[_idx] = ((_smps_fit_df - _aps) ** 2).sum(axis=1) - _least_squ_idx = _S2.idxmin(axis=1).loc[_dropna_idx] + _least_squ_idx = _S2.idxmin(axis=1).loc[_dropna_idx] - _shift_factor_out = DataFrame(_shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), _least_squ_idx.values], - index=_dropna_idx).reindex(_dt_indx) + _shift_factor_out = DataFrame(_shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), _least_squ_idx.values], + index=_dropna_idx).reindex(_dt_indx) - return _shift_factor_out, (DataFrame(_coeA, index=_dt_indx), DataFrame(_coeB, index=_dt_indx)) + return _shift_factor_out, (DataFrame(_coeA, index=_dt_indx), DataFrame(_coeB, index=_dt_indx)) ## Remove big shift data () ## Return : aps, smps, shift (without big shift data) def _shift_data_process(_shift): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mshift-data quality control\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mshift-data quality control\033[0m") - _rho = _shift ** 2 - _shift = _shift.mask((~np.isfinite(_shift)) | (_rho > 2) | (_rho < 0.3)) + _rho = _shift ** 2 + _shift = _shift.mask((~np.isfinite(_shift)) | (_rho > 2) | (_rho < 0.3)) - _qc_index = _shift.mask((_rho < 0.6) | (_shift.isna())).dropna().index + _qc_index = _shift.mask((_rho < 0.6) | (_shift.isna())).dropna().index - return _qc_index, _shift + return _qc_index, _shift # return _smps.loc[~_big_shift], _aps.loc[~_big_shift], _shift[~_big_shift].reshape(-1,1) @@ -117,134 +118,134 @@ def _shift_data_process(_shift): ## shift all smps bin and remove the aps bin which smaller than the latest old smps bin ## Return : merge bins, merge data, density def _merge_data(_smps_ori, _aps_ori, _shift_ori, _shift_mode, _smps_lb, _aps_hb, _coe): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data\033[0m") - _ori_idx = _smps_ori.index - _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index + _ori_idx = _smps_ori.index + _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index - _uni_idx, _count = np.unique(np.hstack((_smps_ori.dropna(how='all').index, _aps_ori.dropna(how='all').index, - _shift_ori.dropna(how='all').index)), return_counts=True) + _uni_idx, _count = np.unique(np.hstack((_smps_ori.dropna(how='all').index, _aps_ori.dropna(how='all').index, + _shift_ori.dropna(how='all').index)), return_counts=True) - _merge_idx = to_datetime(np.unique(_uni_idx[_count == 3])) + _merge_idx = to_datetime(np.unique(_uni_idx[_count == 3])) - _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values + _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values - ## parameter - _coeA, _coeB = _coe[0].loc[_merge_idx], _coe[1].loc[_merge_idx] - _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) + ## parameter + _coeA, _coeB = _coe[0].loc[_merge_idx], _coe[1].loc[_merge_idx] + _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) - _test = 1000 + _test = 1000 - # _cntr = (_smps_lb+_aps_hb)/2 - _cntr = _test - _bin_lb = _smps_key[-1] + # _cntr = (_smps_lb+_aps_hb)/2 + _cntr = _test + _bin_lb = _smps_key[-1] - ## make shift bins - _smps_bin = np.full(_smps.shape, _smps_key) - _aps_bin = np.full(_aps.shape, _aps_key) - # _std_bin = _smps_key.tolist()+_aps_key[_aps_key>_smps_key[-1]].tolist() - _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) - _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] - _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] - _std_bin_inte2 = _std_bin[_std_bin >= _cntr] + ## make shift bins + _smps_bin = np.full(_smps.shape, _smps_key) + _aps_bin = np.full(_aps.shape, _aps_key) + # _std_bin = _smps_key.tolist()+_aps_key[_aps_key>_smps_key[-1]].tolist() + _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) + _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] + _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] + _std_bin_inte2 = _std_bin[_std_bin >= _cntr] - if _shift_mode == 'mobility': - _aps_bin /= _shift + if _shift_mode == 'mobility': + _aps_bin /= _shift - elif _shift_mode == 'aerodynamic': - _smps_bin *= _shift + elif _shift_mode == 'aerodynamic': + _smps_bin *= _shift - ## merge - _merge_lst = [] - for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): - ## remove + ## merge + _merge_lst = [] + for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): + ## remove - ## keep complete smps bins and data - ## remove the aps bin data lower than smps bin - _condi = _bin_aps >= _bin_smps[-1] + ## keep complete smps bins and data + ## remove the aps bin data lower than smps bin + _condi = _bin_aps >= _bin_smps[-1] - _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) - _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) + _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) + _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) - # _merge_fit_loc = (_merge_bin<_aps_hb)&(_merge_bin>_smps_lb) - _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) + # _merge_fit_loc = (_merge_bin<_aps_hb)&(_merge_bin>_smps_lb) + _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) - ## coeA and coeB - _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) - # _unvpl_fc = unvpline(_merge_bin[_merge_fit_loc],_merge_dt[_merge_fit_loc],s=150) - # _inte_log_fc = interp1d(n.log10(_merge_bin[_merge_fit_loc]),n.log10(_merge_dt[_merge_fit_loc]), - # kind='linear',fill_value='extrapolate') - _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') + ## coeA and coeB + _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) + # _unvpl_fc = unvpline(_merge_bin[_merge_fit_loc],_merge_dt[_merge_fit_loc],s=150) + # _inte_log_fc = interp1d(n.log10(_merge_bin[_merge_fit_loc]),n.log10(_merge_dt[_merge_fit_loc]), + # kind='linear',fill_value='extrapolate') + _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') - __merge = np.exp(_unvpl_fc(np.log(_std_bin_merge))) - # __merge = _unvpl_fc(_std_bin_merge) + __merge = np.exp(_unvpl_fc(np.log(_std_bin_merge))) + # __merge = _unvpl_fc(_std_bin_merge) - _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), __merge, _inte_fc(_std_bin_inte2))) - # _merge_dt_fit = __merge - # __test_plot(_bin_smps,_dt_smps,_bin_aps,_dt_aps,_std_bin,_merge_dt_fit,_merge_bin,_merge_dt,_sh) + _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), __merge, _inte_fc(_std_bin_inte2))) + # _merge_dt_fit = __merge + # __test_plot(_bin_smps,_dt_smps,_bin_aps,_dt_aps,_std_bin,_merge_dt_fit,_merge_bin,_merge_dt,_sh) - _merge_lst.append(_merge_dt_fit) + _merge_lst.append(_merge_dt_fit) - _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) - _df_merge = _df_merge.mask(_df_merge < 0) + _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) + _df_merge = _df_merge.mask(_df_merge < 0) - ## process output df - ## average, align with index - def _out_df(*_df_arg, **_df_kwarg): - _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) - _df.index.name = 'time' - return _df + ## process output df + ## average, align with index + def _out_df(*_df_arg, **_df_kwarg): + _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) + _df.index.name = 'time' + return _df - return _out_df(_df_merge), _out_df(_shift_ori ** 2) + return _out_df(_df_merge), _out_df(_shift_ori ** 2) ## aps_fit_highbound : the diameter I choose randomly def _merge_SMPS_APS(df_smps, df_aps, aps_unit, shift_mode, smps_overlap_lowbound, aps_fit_highbound): - # print(f'\nMerge data :') - # print(f' APS fittint higher diameter : {aps_fit_highbound:4d} nm') - # print(f' SMPS overlap lower diameter : {smps_overlap_lowbound:4d} nm') - # print(f' Average time : {self.data_freq:>4s}\n') + # print(f'\nMerge data :') + # print(f' APS fittint higher diameter : {aps_fit_highbound:4d} nm') + # print(f' SMPS overlap lower diameter : {smps_overlap_lowbound:4d} nm') + # print(f' Average time : {self.data_freq:>4s}\n') - ## get data, remove 'total' and 'mode' - ## set to the same units - smps, aps = df_smps, df_aps - smps.columns = smps.keys().to_numpy(float) - aps.columns = aps.keys().to_numpy(float) + ## get data, remove 'total' and 'mode' + ## set to the same units + smps, aps = df_smps, df_aps + smps.columns = smps.keys().to_numpy(float) + aps.columns = aps.keys().to_numpy(float) - if aps_unit == 'um': - aps.columns = aps.keys() * 1e3 + if aps_unit == 'um': + aps.columns = aps.keys() * 1e3 - ## shift infomation, calculate by powerlaw fitting - shift, coe = _overlap_fitting(smps, aps, smps_overlap_lowbound, aps_fit_highbound) + ## shift infomation, calculate by powerlaw fitting + shift, coe = _overlap_fitting(smps, aps, smps_overlap_lowbound, aps_fit_highbound) - ## process data by shift infomation, and average data - qc_cond, shift = _shift_data_process(shift) + ## process data by shift infomation, and average data + qc_cond, shift = _shift_data_process(shift) - ## merge aps and smps.. - merge_data, density = _merge_data(smps, aps, shift, shift_mode, smps_overlap_lowbound, aps_fit_highbound, coe) - density.columns = ['density'] + ## merge aps and smps.. + merge_data, density = _merge_data(smps, aps, shift, shift_mode, smps_overlap_lowbound, aps_fit_highbound, coe) + density.columns = ['density'] - ## add total and mode - # merge_total = merge_data.sum(axis=1,min_count=1).copy() - # merge_mode = merge_data.idxmax(axis=1).astype(float).copy() + ## add total and mode + # merge_total = merge_data.sum(axis=1,min_count=1).copy() + # merge_mode = merge_data.idxmax(axis=1).astype(float).copy() - # merge_data['total'] = merge_total - # merge_data['mode'] = merge_mode + # merge_data['total'] = merge_total + # merge_data['mode'] = merge_mode - ## out - out_dic = { - 'data_all': merge_data, - 'data_qc': merge_data.loc[qc_cond], - 'density_all': density, - 'density_qc': density.loc[qc_cond], - } + ## out + out_dic = { + 'data_all': merge_data, + 'data_qc': merge_data.loc[qc_cond], + 'density_all': density, + 'density_qc': density.loc[qc_cond], + } - ## process data + ## process data - for _nam, _df in out_dic.items(): - out_dic[_nam] = _df.reindex(df_aps.index).copy() + for _nam, _df in out_dic.items(): + out_dic[_nam] = _df.reindex(df_aps.index).copy() - # merge_data = merge_data.reindex(df_aps.index) - # density = density.reindex(df_aps.index) + # merge_data = merge_data.reindex(df_aps.index) + # density = density.reindex(df_aps.index) - return out_dic + return out_dic diff --git a/AeroViz/dataProcess/SizeDistr/_merge.py b/AeroViz/dataProcess/SizeDistr/_merge.py index 1c73758..22d8e8b 100644 --- a/AeroViz/dataProcess/SizeDistr/_merge.py +++ b/AeroViz/dataProcess/SizeDistr/_merge.py @@ -1,34 +1,35 @@ -from AeroViz.dataProcess.core import _union_index - from datetime import datetime as dtm + +import numpy as np from pandas import DataFrame, to_datetime # from scipy.interpolate import interp1d from scipy.interpolate import UnivariateSpline as unvpline, interp1d -import numpy as np + +from AeroViz.dataProcess.core import union_index __all__ = ['merge_SMPS_APS'] def __test_plot(smpsx, smps, apsx, aps, mergex, merge, mergeox, mergeo, _sh): - from matplotlib.pyplot import subplots, close, show, rcParams + from matplotlib.pyplot import subplots, close, show - ## parameter - # ''' - ## plot - fig, ax = subplots() + ## parameter + # ''' + ## plot + fig, ax = subplots() - ax.plot(smpsx, smps, c='#ff794c', label='smps', marker='o', lw=2) - ax.plot(apsx, aps, c='#4c79ff', label='aps', marker='o', lw=2) - ax.plot(mergex, merge, c='#79796a', label='merge') - # ax.plot(mergeox,mergeo,c='#111111',label='mergeo',marker='o',lw=.75) + ax.plot(smpsx, smps, c='#ff794c', label='smps', marker='o', lw=2) + ax.plot(apsx, aps, c='#4c79ff', label='aps', marker='o', lw=2) + ax.plot(mergex, merge, c='#79796a', label='merge') + # ax.plot(mergeox,mergeo,c='#111111',label='mergeo',marker='o',lw=.75) - ax.set(xscale='log', yscale='log', ) + ax.set(xscale='log', yscale='log', ) - ax.legend(framealpha=0, ) - ax.set_title((_sh ** 2)[0], fontsize=13) + ax.legend(framealpha=0, ) + ax.set_title((_sh ** 2)[0], fontsize=13) - show() - close() + show() + close() # ''' @@ -38,78 +39,78 @@ def __test_plot(smpsx, smps, apsx, aps, mergex, merge, mergeox, mergeo, _sh): ## Create a fitting func. by smps data ## return : shift factor def _overlap_fitting(_smps_ori, _aps_ori, _smps_lb, _aps_hb): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting\033[0m") - ## overlap fitting - ## parmeter - _dt_indx = _smps_ori.index + ## overlap fitting + ## parmeter + _dt_indx = _smps_ori.index - ## overlap diameter data - _aps = _aps_ori[_aps_ori.keys()[_aps_ori.keys() < _aps_hb]].copy() - _smps = _smps_ori[_smps_ori.keys()[_smps_ori.keys() > _smps_lb]].copy() + ## overlap diameter data + _aps = _aps_ori[_aps_ori.keys()[_aps_ori.keys() < _aps_hb]].copy() + _smps = _smps_ori[_smps_ori.keys()[_smps_ori.keys() > _smps_lb]].copy() - ## use SMPS data apply power law fitting - ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy - ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html - ## power law fit to SMPS num conc at upper bins to log curve + ## use SMPS data apply power law fitting + ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy + ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html + ## power law fit to SMPS num conc at upper bins to log curve - ## coefficient A, B - _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) - _smps_qc = _smps.where(_smps_qc_cond) + ## coefficient A, B + _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) + _smps_qc = _smps.where(_smps_qc_cond) - _size = _smps_qc_cond.sum(axis=1) - _size = _size.where(_size != 0.).copy() + _size = _smps_qc_cond.sum(axis=1) + _size = _size.where(_size != 0.).copy() - _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) - _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() + _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) + _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() - _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) - _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) - _coeB = _coeB.values.reshape(-1, 1) + _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) + _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) + _coeB = _coeB.values.reshape(-1, 1) - ## rebuild shift smps data by coe. A, B - ## x_shift = (y_ori/A)**(1/B) - _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) - _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) + ## rebuild shift smps data by coe. A, B + ## x_shift = (y_ori/A)**(1/B) + _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) + _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) - ## the least squares of diameter - ## the shift factor which the cklosest to 1 - _shift_factor = (_aps_shift_x.keys()._data.astype(float) / _aps_shift_x) - _shift_factor.columns = range(len(_aps_shift_x.keys())) + ## the least squares of diameter + ## the shift factor which the cklosest to 1 + _shift_factor = (_aps_shift_x.keys()._data.astype(float) / _aps_shift_x) + _shift_factor.columns = range(len(_aps_shift_x.keys())) - _dropna_idx = _shift_factor.dropna(how='all').index.copy() + _dropna_idx = _shift_factor.dropna(how='all').index.copy() - ## use the target function to get the similar aps and smps bin - ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) - ## assumption : the same diameter between smps and aps should get the same conc. + ## use the target function to get the similar aps and smps bin + ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) + ## assumption : the same diameter between smps and aps should get the same conc. - ## be sure they art in log value - _S2 = DataFrame(index=_aps_shift_x.index) - _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), - columns=_aps_shift_x.keys(), index=_aps_shift_x.index) - for _idx, _factor in _shift_factor.items(): - _smps_fit_df = _coeA * (_dia_table / _factor.to_frame().values) ** _coeB - _S2[_idx] = ((_smps_fit_df - _aps) ** 2).sum(axis=1) + ## be sure they art in log value + _S2 = DataFrame(index=_aps_shift_x.index) + _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), + columns=_aps_shift_x.keys(), index=_aps_shift_x.index) + for _idx, _factor in _shift_factor.items(): + _smps_fit_df = _coeA * (_dia_table / _factor.to_frame().values) ** _coeB + _S2[_idx] = ((_smps_fit_df - _aps) ** 2).sum(axis=1) - _least_squ_idx = _S2.idxmin(axis=1).loc[_dropna_idx] + _least_squ_idx = _S2.idxmin(axis=1).loc[_dropna_idx] - _shift_factor_out = DataFrame(_shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), _least_squ_idx.values], - index=_dropna_idx).reindex(_dt_indx) + _shift_factor_out = DataFrame(_shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), _least_squ_idx.values], + index=_dropna_idx).reindex(_dt_indx) - return _shift_factor_out, (DataFrame(_coeA, index=_dt_indx), DataFrame(_coeB, index=_dt_indx)) + return _shift_factor_out, (DataFrame(_coeA, index=_dt_indx), DataFrame(_coeB, index=_dt_indx)) ## Remove big shift data () ## Return : aps, smps, shift (without big shift data) def _shift_data_process(_shift): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mshift-data quality control\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mshift-data quality control\033[0m") - _rho = _shift ** 2 - _shift = _shift.mask((~np.isfinite(_shift)) | (_rho > 2) | (_rho < 0.3)) + _rho = _shift ** 2 + _shift = _shift.mask((~np.isfinite(_shift)) | (_rho > 2) | (_rho < 0.3)) - _qc_index = _shift.mask((_rho < 0.6) | (_shift.isna())).dropna().index + _qc_index = _shift.mask((_rho < 0.6) | (_shift.isna())).dropna().index - return _qc_index, _shift + return _qc_index, _shift # return _smps.loc[~_big_shift], _aps.loc[~_big_shift], _shift[~_big_shift].reshape(-1,1) @@ -119,127 +120,127 @@ def _shift_data_process(_shift): ## shift all smps bin and remove the aps bin which smaller than the latest old smps bin ## Return : merge bins, merge data, density def _merge_data(_smps_ori, _aps_ori, _shift_ori, _smps_lb, _aps_hb, _coe, _shift_mode): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data : {_shift_mode}\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data : {_shift_mode}\033[0m") - _ori_idx = _smps_ori.index - _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index + _ori_idx = _smps_ori.index + _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index - _corr_aps_cond = _aps_ori.keys() < 700 - _corr_aps_ky = _aps_ori.keys()[_corr_aps_cond] + _corr_aps_cond = _aps_ori.keys() < 700 + _corr_aps_ky = _aps_ori.keys()[_corr_aps_cond] - _uni_idx, _count = np.unique(np.hstack((_smps_ori.dropna(how='all').index, _aps_ori.dropna(how='all').index, - _shift_ori.dropna(how='all').index)), return_counts=True) + _uni_idx, _count = np.unique(np.hstack((_smps_ori.dropna(how='all').index, _aps_ori.dropna(how='all').index, + _shift_ori.dropna(how='all').index)), return_counts=True) - _merge_idx = to_datetime(np.unique(_uni_idx[_count == 3])) + _merge_idx = to_datetime(np.unique(_uni_idx[_count == 3])) - _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values + _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values - ## parameter - _coeA, _coeB = _coe[0].loc[_merge_idx], _coe[1].loc[_merge_idx] - _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) + ## parameter + _coeA, _coeB = _coe[0].loc[_merge_idx], _coe[1].loc[_merge_idx] + _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) - _cntr = 1000 - _bin_lb = _smps_key[-1] + _cntr = 1000 + _bin_lb = _smps_key[-1] - ## make shift bins - _smps_bin = np.full(_smps.shape, _smps_key) - _aps_bin = np.full(_aps.shape, _aps_key) + ## make shift bins + _smps_bin = np.full(_smps.shape, _smps_key) + _aps_bin = np.full(_aps.shape, _aps_key) - _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) - _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] - _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] - _std_bin_inte2 = _std_bin[_std_bin >= _cntr] + _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) + _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] + _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] + _std_bin_inte2 = _std_bin[_std_bin >= _cntr] - if _shift_mode == 'mobility': - _aps_bin /= _shift + if _shift_mode == 'mobility': + _aps_bin /= _shift - elif _shift_mode == 'aerodynamic': - _smps_bin *= _shift + elif _shift_mode == 'aerodynamic': + _smps_bin *= _shift - ## merge - _merge_lst, _corr_lst = [], [] - for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): - ## keep complete smps bins and data - ## remove the aps bin data lower than smps bin - _condi = _bin_aps >= _bin_smps[-1] + ## merge + _merge_lst, _corr_lst = [], [] + for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): + ## keep complete smps bins and data + ## remove the aps bin data lower than smps bin + _condi = _bin_aps >= _bin_smps[-1] - _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) - _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) + _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) + _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) - _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) + _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) - ## coeA and coeB - _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) - _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') + ## coeA and coeB + _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) + _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') - _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), np.exp(_unvpl_fc(np.log(_std_bin_merge))), - _inte_fc(_std_bin_inte2))) + _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), np.exp(_unvpl_fc(np.log(_std_bin_merge))), + _inte_fc(_std_bin_inte2))) - _merge_lst.append(_merge_dt_fit) - _corr_lst.append(interp1d(_std_bin, _merge_dt_fit)(_bin_aps[_corr_aps_cond])) + _merge_lst.append(_merge_dt_fit) + _corr_lst.append(interp1d(_std_bin, _merge_dt_fit)(_bin_aps[_corr_aps_cond])) - _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) - _df_merge = _df_merge.mask(_df_merge < 0) + _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) + _df_merge = _df_merge.mask(_df_merge < 0) - _df_corr = DataFrame(_corr_lst, columns=_corr_aps_ky, index=_merge_idx) / _aps_ori.loc[_merge_idx, _corr_aps_ky] + _df_corr = DataFrame(_corr_lst, columns=_corr_aps_ky, index=_merge_idx) / _aps_ori.loc[_merge_idx, _corr_aps_ky] - ## process output df - ## average, align with index - def _out_df(*_df_arg, **_df_kwarg): - _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) - _df.index.name = 'time' - return _df + ## process output df + ## average, align with index + def _out_df(*_df_arg, **_df_kwarg): + _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) + _df.index.name = 'time' + return _df - return _out_df(_df_merge), _out_df(_shift_ori ** 2), _out_df(_df_corr) + return _out_df(_df_merge), _out_df(_shift_ori ** 2), _out_df(_df_corr) def merge_SMPS_APS(df_smps, df_aps, aps_unit='um', smps_overlap_lowbound=500, aps_fit_highbound=1000): - df_smps, df_aps = _union_index(df_smps, df_aps) + df_smps, df_aps = union_index(df_smps, df_aps) - ## set to the same units - smps, aps_ori = df_smps.copy(), df_aps.copy() - smps.columns = smps.keys().to_numpy(float) - aps_ori.columns = aps_ori.keys().to_numpy(float) + ## set to the same units + smps, aps_ori = df_smps.copy(), df_aps.copy() + smps.columns = smps.keys().to_numpy(float) + aps_ori.columns = aps_ori.keys().to_numpy(float) - if aps_unit == 'um': - aps_ori.columns = aps_ori.keys() * 1e3 + if aps_unit == 'um': + aps_ori.columns = aps_ori.keys() * 1e3 - den_lst, mer_lst = [], [] - aps_input = aps_ori.loc[:, aps_ori.keys() > 700].copy() + den_lst, mer_lst = [], [] + aps_input = aps_ori.loc[:, aps_ori.keys() > 700].copy() - for _count in range(2): + for _count in range(2): - ## shift infomation, calculate by powerlaw fitting - shift, coe = _overlap_fitting(smps, aps_input, smps_overlap_lowbound, aps_fit_highbound) + ## shift infomation, calculate by powerlaw fitting + shift, coe = _overlap_fitting(smps, aps_input, smps_overlap_lowbound, aps_fit_highbound) - ## process data by shift infomation, and average data - qc_cond, shift = _shift_data_process(shift) + ## process data by shift infomation, and average data + qc_cond, shift = _shift_data_process(shift) - ## merge aps and smps - merge_arg = (smps, aps_ori, shift, smps_overlap_lowbound, aps_fit_highbound, coe) - merge_data_mob, density, _corr = _merge_data(*merge_arg, 'mobility') - merge_data_aer, density, _ = _merge_data(*merge_arg, 'aerodynamic') - density.columns = ['density'] + ## merge aps and smps + merge_arg = (smps, aps_ori, shift, smps_overlap_lowbound, aps_fit_highbound, coe) + merge_data_mob, density, _corr = _merge_data(*merge_arg, 'mobility') + merge_data_aer, density, _ = _merge_data(*merge_arg, 'aerodynamic') + density.columns = ['density'] - if _count == 0: - corr = _corr.resample('1d').mean().reindex(smps.index).ffill() - corr = corr.mask(corr < 1, 1) - aps_ori.loc[:, corr.keys()] *= corr + if _count == 0: + corr = _corr.resample('1d').mean().reindex(smps.index).ffill() + corr = corr.mask(corr < 1, 1) + aps_ori.loc[:, corr.keys()] *= corr - aps_input = aps_ori.copy() + aps_input = aps_ori.copy() - ## out - out_dic = { - 'data_all': merge_data_mob, - 'data_qc': merge_data_mob.loc[qc_cond], - 'data_all_aer': merge_data_aer, - 'data_qc_aer': merge_data_aer.loc[qc_cond], - 'density_all': density, - 'density_qc': density.loc[qc_cond], - } + ## out + out_dic = { + 'data_all': merge_data_mob, + 'data_qc': merge_data_mob.loc[qc_cond], + 'data_all_aer': merge_data_aer, + 'data_qc_aer': merge_data_aer.loc[qc_cond], + 'density_all': density, + 'density_qc': density.loc[qc_cond], + } - ## process data - for _nam, _df in out_dic.items(): - out_dic[_nam] = _df.reindex(smps.index).copy() + ## process data + for _nam, _df in out_dic.items(): + out_dic[_nam] = _df.reindex(smps.index).copy() - return out_dic + return out_dic diff --git a/AeroViz/dataProcess/SizeDistr/_merge_v1.py b/AeroViz/dataProcess/SizeDistr/_merge_v1.py index 28ce9de..bc906f9 100644 --- a/AeroViz/dataProcess/SizeDistr/_merge_v1.py +++ b/AeroViz/dataProcess/SizeDistr/_merge_v1.py @@ -1,34 +1,35 @@ -from AeroViz.dataProcess.core import _union_index - from datetime import datetime as dtm + +import numpy as np from pandas import DataFrame, to_datetime # from scipy.interpolate import interp1d from scipy.interpolate import UnivariateSpline as unvpline, interp1d -import numpy as np + +from AeroViz.dataProcess.core import union_index __all__ = ['_merge_SMPS_APS'] def __test_plot(smpsx, smps, apsx, aps, mergex, merge, mergeox, mergeo, _sh): - from matplotlib.pyplot import subplots, close, show, rcParams + from matplotlib.pyplot import subplots, close, show - ## parameter - # ''' - ## plot - fig, ax = subplots() + ## parameter + # ''' + ## plot + fig, ax = subplots() - ax.plot(smpsx, smps, c='#ff794c', label='smps', marker='o', lw=2) - ax.plot(apsx, aps, c='#4c79ff', label='aps', marker='o', lw=2) - ax.plot(mergex, merge, c='#79796a', label='merge') - # ax.plot(mergeox,mergeo,c='#111111',label='mergeo',marker='o',lw=.75) + ax.plot(smpsx, smps, c='#ff794c', label='smps', marker='o', lw=2) + ax.plot(apsx, aps, c='#4c79ff', label='aps', marker='o', lw=2) + ax.plot(mergex, merge, c='#79796a', label='merge') + # ax.plot(mergeox,mergeo,c='#111111',label='mergeo',marker='o',lw=.75) - ax.set(xscale='log', yscale='log', ) + ax.set(xscale='log', yscale='log', ) - ax.legend(framealpha=0, ) - ax.set_title((_sh ** 2)[0], fontsize=13) + ax.legend(framealpha=0, ) + ax.set_title((_sh ** 2)[0], fontsize=13) - show() - close() + show() + close() # ''' @@ -38,78 +39,78 @@ def __test_plot(smpsx, smps, apsx, aps, mergex, merge, mergeox, mergeo, _sh): ## Create a fitting func. by smps data ## return : shift factor def _overlap_fitting(_smps_ori, _aps_ori, _smps_lb, _aps_hb): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting\033[0m") - ## overlap fitting - ## parmeter - _dt_indx = _smps_ori.index + ## overlap fitting + ## parmeter + _dt_indx = _smps_ori.index - ## overlap diameter data - _aps = _aps_ori[_aps_ori.keys()[_aps_ori.keys() < _aps_hb]].copy() - _smps = _smps_ori[_smps_ori.keys()[_smps_ori.keys() > _smps_lb]].copy() + ## overlap diameter data + _aps = _aps_ori[_aps_ori.keys()[_aps_ori.keys() < _aps_hb]].copy() + _smps = _smps_ori[_smps_ori.keys()[_smps_ori.keys() > _smps_lb]].copy() - ## use SMPS data apply power law fitting - ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy - ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html - ## power law fit to SMPS num conc at upper bins to log curve + ## use SMPS data apply power law fitting + ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy + ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html + ## power law fit to SMPS num conc at upper bins to log curve - ## coefficient A, B - _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) - _smps_qc = _smps.where(_smps_qc_cond) + ## coefficient A, B + _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) + _smps_qc = _smps.where(_smps_qc_cond) - _size = _smps_qc_cond.sum(axis=1) - _size = _size.where(_size != 0.).copy() + _size = _smps_qc_cond.sum(axis=1) + _size = _size.where(_size != 0.).copy() - _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) - _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() + _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) + _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() - _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) - _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) - _coeB = _coeB.values.reshape(-1, 1) + _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) + _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) + _coeB = _coeB.values.reshape(-1, 1) - ## rebuild shift smps data by coe. A, B - ## x_shift = (y_ori/A)**(1/B) - _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) - _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) + ## rebuild shift smps data by coe. A, B + ## x_shift = (y_ori/A)**(1/B) + _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) + _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) - ## the least squares of diameter - ## the shift factor which the cklosest to 1 - _shift_factor = (_aps_shift_x.keys()._data.astype(float) / _aps_shift_x) - _shift_factor.columns = range(len(_aps_shift_x.keys())) + ## the least squares of diameter + ## the shift factor which the cklosest to 1 + _shift_factor = (_aps_shift_x.keys()._data.astype(float) / _aps_shift_x) + _shift_factor.columns = range(len(_aps_shift_x.keys())) - _dropna_idx = _shift_factor.dropna(how='all').index.copy() + _dropna_idx = _shift_factor.dropna(how='all').index.copy() - ## use the target function to get the similar aps and smps bin - ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) - ## assumption : the same diameter between smps and aps should get the same conc. + ## use the target function to get the similar aps and smps bin + ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) + ## assumption : the same diameter between smps and aps should get the same conc. - ## be sure they art in log value - _S2 = DataFrame(index=_aps_shift_x.index) - _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), - columns=_aps_shift_x.keys(), index=_aps_shift_x.index) - for _idx, _factor in _shift_factor.items(): - _smps_fit_df = _coeA * (_dia_table / _factor.to_frame().values) ** _coeB - _S2[_idx] = ((_smps_fit_df - _aps) ** 2).sum(axis=1) + ## be sure they art in log value + _S2 = DataFrame(index=_aps_shift_x.index) + _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), + columns=_aps_shift_x.keys(), index=_aps_shift_x.index) + for _idx, _factor in _shift_factor.items(): + _smps_fit_df = _coeA * (_dia_table / _factor.to_frame().values) ** _coeB + _S2[_idx] = ((_smps_fit_df - _aps) ** 2).sum(axis=1) - _least_squ_idx = _S2.idxmin(axis=1).loc[_dropna_idx] + _least_squ_idx = _S2.idxmin(axis=1).loc[_dropna_idx] - _shift_factor_out = DataFrame(_shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), _least_squ_idx.values], - index=_dropna_idx).reindex(_dt_indx) + _shift_factor_out = DataFrame(_shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), _least_squ_idx.values], + index=_dropna_idx).reindex(_dt_indx) - return _shift_factor_out, (DataFrame(_coeA, index=_dt_indx), DataFrame(_coeB, index=_dt_indx)) + return _shift_factor_out, (DataFrame(_coeA, index=_dt_indx), DataFrame(_coeB, index=_dt_indx)) ## Remove big shift data () ## Return : aps, smps, shift (without big shift data) def _shift_data_process(_shift): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mshift-data quality control\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mshift-data quality control\033[0m") - _rho = _shift ** 2 - _shift = _shift.mask((~np.isfinite(_shift)) | (_rho > 2.6) | (_rho < 0.3)) + _rho = _shift ** 2 + _shift = _shift.mask((~np.isfinite(_shift)) | (_rho > 2.6) | (_rho < 0.3)) - _qc_index = _shift.mask((_rho < 0.6) | (_shift.isna())).dropna().index + _qc_index = _shift.mask((_rho < 0.6) | (_shift.isna())).dropna().index - return _qc_index, _shift + return _qc_index, _shift # return _smps.loc[~_big_shift], _aps.loc[~_big_shift], _shift[~_big_shift].reshape(-1,1) @@ -119,136 +120,136 @@ def _shift_data_process(_shift): ## shift all smps bin and remove the aps bin which smaller than the latest old smps bin ## Return : merge bins, merge data, density def _merge_data(_smps_ori, _aps_ori, _shift_ori, _shift_mode, _smps_lb, _aps_hb, _coe): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data\033[0m") - _ori_idx = _smps_ori.index - _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index + _ori_idx = _smps_ori.index + _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index - _uni_idx, _count = np.unique(np.hstack((_smps_ori.dropna(how='all').index, _aps_ori.dropna(how='all').index, - _shift_ori.dropna(how='all').index)), return_counts=True) + _uni_idx, _count = np.unique(np.hstack((_smps_ori.dropna(how='all').index, _aps_ori.dropna(how='all').index, + _shift_ori.dropna(how='all').index)), return_counts=True) - _merge_idx = to_datetime(np.unique(_uni_idx[_count == 3])) + _merge_idx = to_datetime(np.unique(_uni_idx[_count == 3])) - _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values + _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values - ## parameter - _coeA, _coeB = _coe[0].loc[_merge_idx], _coe[1].loc[_merge_idx] - _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) + ## parameter + _coeA, _coeB = _coe[0].loc[_merge_idx], _coe[1].loc[_merge_idx] + _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) - _test = 1000 + _test = 1000 - # _cntr = (_smps_lb+_aps_hb)/2 - _cntr = _test - _bin_lb = _smps_key[-1] + # _cntr = (_smps_lb+_aps_hb)/2 + _cntr = _test + _bin_lb = _smps_key[-1] - ## make shift bins - _smps_bin = np.full(_smps.shape, _smps_key) - _aps_bin = np.full(_aps.shape, _aps_key) - # _std_bin = _smps_key.tolist()+_aps_key[_aps_key>_smps_key[-1]].tolist() - _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) - _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] - _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] - _std_bin_inte2 = _std_bin[_std_bin >= _cntr] + ## make shift bins + _smps_bin = np.full(_smps.shape, _smps_key) + _aps_bin = np.full(_aps.shape, _aps_key) + # _std_bin = _smps_key.tolist()+_aps_key[_aps_key>_smps_key[-1]].tolist() + _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) + _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] + _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] + _std_bin_inte2 = _std_bin[_std_bin >= _cntr] - if _shift_mode == 'mobility': - _aps_bin /= _shift + if _shift_mode == 'mobility': + _aps_bin /= _shift - elif _shift_mode == 'aerodynamic': - _smps_bin *= _shift + elif _shift_mode == 'aerodynamic': + _smps_bin *= _shift - ## merge - _merge_lst = [] - for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): - ## remove + ## merge + _merge_lst = [] + for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): + ## remove - ## keep complete smps bins and data - ## remove the aps bin data lower than smps bin - _condi = _bin_aps >= _bin_smps[-1] + ## keep complete smps bins and data + ## remove the aps bin data lower than smps bin + _condi = _bin_aps >= _bin_smps[-1] - _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) - _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) + _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) + _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) - # _merge_fit_loc = (_merge_bin<_aps_hb)&(_merge_bin>_smps_lb) - _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) + # _merge_fit_loc = (_merge_bin<_aps_hb)&(_merge_bin>_smps_lb) + _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) - ## coeA and coeB - _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) - # _unvpl_fc = unvpline(_merge_bin[_merge_fit_loc],_merge_dt[_merge_fit_loc],s=150) - # _inte_log_fc = interp1d(n.log10(_merge_bin[_merge_fit_loc]),n.log10(_merge_dt[_merge_fit_loc]), - # kind='linear',fill_value='extrapolate') - _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') + ## coeA and coeB + _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) + # _unvpl_fc = unvpline(_merge_bin[_merge_fit_loc],_merge_dt[_merge_fit_loc],s=150) + # _inte_log_fc = interp1d(n.log10(_merge_bin[_merge_fit_loc]),n.log10(_merge_dt[_merge_fit_loc]), + # kind='linear',fill_value='extrapolate') + _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') - __merge = np.exp(_unvpl_fc(np.log(_std_bin_merge))) - # __merge = _unvpl_fc(_std_bin_merge) + __merge = np.exp(_unvpl_fc(np.log(_std_bin_merge))) + # __merge = _unvpl_fc(_std_bin_merge) - _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), __merge, _inte_fc(_std_bin_inte2))) - # _merge_dt_fit = __merge - # __test_plot(_bin_smps,_dt_smps,_bin_aps,_dt_aps,_std_bin,_merge_dt_fit,_merge_bin,_merge_dt,_sh) + _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), __merge, _inte_fc(_std_bin_inte2))) + # _merge_dt_fit = __merge + # __test_plot(_bin_smps,_dt_smps,_bin_aps,_dt_aps,_std_bin,_merge_dt_fit,_merge_bin,_merge_dt,_sh) - _merge_lst.append(_merge_dt_fit) + _merge_lst.append(_merge_dt_fit) - _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) - _df_merge = _df_merge.mask(_df_merge < 0) + _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) + _df_merge = _df_merge.mask(_df_merge < 0) - ## process output df - ## average, align with index - def _out_df(*_df_arg, **_df_kwarg): - _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) - _df.index.name = 'time' - return _df + ## process output df + ## average, align with index + def _out_df(*_df_arg, **_df_kwarg): + _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) + _df.index.name = 'time' + return _df - return _out_df(_df_merge), _out_df(_shift_ori ** 2) + return _out_df(_df_merge), _out_df(_shift_ori ** 2) ## aps_fit_highbound : the diameter I choose randomly def _merge_SMPS_APS(df_smps, df_aps, aps_unit, shift_mode, smps_overlap_lowbound, aps_fit_highbound): - df_smps, df_aps = _union_index(df_smps, df_aps) + df_smps, df_aps = union_index(df_smps, df_aps) - # print(f'\nMerge data :') - # print(f' APS fittint higher diameter : {aps_fit_highbound:4d} nm') - # print(f' SMPS overlap lower diameter : {smps_overlap_lowbound:4d} nm') - # print(f' Average time : {self.data_freq:>4s}\n') + # print(f'\nMerge data :') + # print(f' APS fittint higher diameter : {aps_fit_highbound:4d} nm') + # print(f' SMPS overlap lower diameter : {smps_overlap_lowbound:4d} nm') + # print(f' Average time : {self.data_freq:>4s}\n') - ## get data, remove 'total' and 'mode' - ## set to the same units - smps, aps = df_smps, df_aps - smps.columns = smps.keys().to_numpy(float) - aps.columns = aps.keys().to_numpy(float) + ## get data, remove 'total' and 'mode' + ## set to the same units + smps, aps = df_smps, df_aps + smps.columns = smps.keys().to_numpy(float) + aps.columns = aps.keys().to_numpy(float) - if aps_unit == 'um': - aps.columns = aps.keys() * 1e3 + if aps_unit == 'um': + aps.columns = aps.keys() * 1e3 - ## shift infomation, calculate by powerlaw fitting - shift, coe = _overlap_fitting(smps, aps, smps_overlap_lowbound, aps_fit_highbound) + ## shift infomation, calculate by powerlaw fitting + shift, coe = _overlap_fitting(smps, aps, smps_overlap_lowbound, aps_fit_highbound) - ## process data by shift infomation, and average data - qc_cond, shift = _shift_data_process(shift) + ## process data by shift infomation, and average data + qc_cond, shift = _shift_data_process(shift) - ## merge aps and smps.. - merge_data, density = _merge_data(smps, aps, shift, shift_mode, smps_overlap_lowbound, aps_fit_highbound, coe) - density.columns = ['density'] + ## merge aps and smps.. + merge_data, density = _merge_data(smps, aps, shift, shift_mode, smps_overlap_lowbound, aps_fit_highbound, coe) + density.columns = ['density'] - ## add total and mode - # merge_total = merge_data.sum(axis=1,min_count=1).copy() - # merge_mode = merge_data.idxmax(axis=1).astype(float).copy() + ## add total and mode + # merge_total = merge_data.sum(axis=1,min_count=1).copy() + # merge_mode = merge_data.idxmax(axis=1).astype(float).copy() - # merge_data['total'] = merge_total - # merge_data['mode'] = merge_mode + # merge_data['total'] = merge_total + # merge_data['mode'] = merge_mode - ## out - out_dic = { - 'data_all': merge_data, - 'data_qc': merge_data.loc[qc_cond], - 'density_all': density, - 'density_qc': density.loc[qc_cond], - } + ## out + out_dic = { + 'data_all': merge_data, + 'data_qc': merge_data.loc[qc_cond], + 'density_all': density, + 'density_qc': density.loc[qc_cond], + } - ## process data + ## process data - for _nam, _df in out_dic.items(): - out_dic[_nam] = _df.reindex(df_aps.index).copy() + for _nam, _df in out_dic.items(): + out_dic[_nam] = _df.reindex(df_aps.index).copy() - # merge_data = merge_data.reindex(df_aps.index) - # density = density.reindex(df_aps.index) + # merge_data = merge_data.reindex(df_aps.index) + # density = density.reindex(df_aps.index) - return out_dic + return out_dic diff --git a/AeroViz/dataProcess/SizeDistr/_merge_v2.py b/AeroViz/dataProcess/SizeDistr/_merge_v2.py index ae1600b..a743eca 100644 --- a/AeroViz/dataProcess/SizeDistr/_merge_v2.py +++ b/AeroViz/dataProcess/SizeDistr/_merge_v2.py @@ -1,34 +1,35 @@ -from AeroViz.dataProcess.core import _union_index - from datetime import datetime as dtm + +import numpy as np from pandas import DataFrame, to_datetime # from scipy.interpolate import interp1d from scipy.interpolate import UnivariateSpline as unvpline, interp1d -import numpy as np + +from AeroViz.dataProcess.core import union_index __all__ = ['_merge_SMPS_APS'] def __test_plot(smpsx, smps, apsx, aps, mergex, merge, mergeox, mergeo, _sh): - from matplotlib.pyplot import subplots, close, show, rcParams + from matplotlib.pyplot import subplots, close, show - ## parameter - # ''' - ## plot - fig, ax = subplots() + ## parameter + # ''' + ## plot + fig, ax = subplots() - ax.plot(smpsx, smps, c='#ff794c', label='smps', marker='o', lw=2) - ax.plot(apsx, aps, c='#4c79ff', label='aps', marker='o', lw=2) - ax.plot(mergex, merge, c='#79796a', label='merge') - # ax.plot(mergeox,mergeo,c='#111111',label='mergeo',marker='o',lw=.75) + ax.plot(smpsx, smps, c='#ff794c', label='smps', marker='o', lw=2) + ax.plot(apsx, aps, c='#4c79ff', label='aps', marker='o', lw=2) + ax.plot(mergex, merge, c='#79796a', label='merge') + # ax.plot(mergeox,mergeo,c='#111111',label='mergeo',marker='o',lw=.75) - ax.set(xscale='log', yscale='log', ) + ax.set(xscale='log', yscale='log', ) - ax.legend(framealpha=0, ) - ax.set_title((_sh ** 2)[0], fontsize=13) + ax.legend(framealpha=0, ) + ax.set_title((_sh ** 2)[0], fontsize=13) - show() - close() + show() + close() # ''' @@ -38,79 +39,79 @@ def __test_plot(smpsx, smps, apsx, aps, mergex, merge, mergeox, mergeo, _sh): ## Create a fitting func. by smps data ## return : shift factor def _overlap_fitting(_smps_ori, _aps_ori, _smps_lb, _aps_hb): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting\033[0m") - ## overlap fitting - ## parmeter - _dt_indx = _smps_ori.index + ## overlap fitting + ## parmeter + _dt_indx = _smps_ori.index - ## overlap diameter data - _aps = _aps_ori[_aps_ori.keys()[_aps_ori.keys() < _aps_hb]].copy() - _smps = _smps_ori[_smps_ori.keys()[_smps_ori.keys() > _smps_lb]].copy() + ## overlap diameter data + _aps = _aps_ori[_aps_ori.keys()[_aps_ori.keys() < _aps_hb]].copy() + _smps = _smps_ori[_smps_ori.keys()[_smps_ori.keys() > _smps_lb]].copy() - ## use SMPS data apply power law fitting - ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy - ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html - ## power law fit to SMPS num conc at upper bins to log curve + ## use SMPS data apply power law fitting + ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy + ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html + ## power law fit to SMPS num conc at upper bins to log curve - ## coefficient A, B - _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) - _smps_qc = _smps.where(_smps_qc_cond) + ## coefficient A, B + _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) + _smps_qc = _smps.where(_smps_qc_cond) - _size = _smps_qc_cond.sum(axis=1) - _size = _size.where(_size != 0.).copy() + _size = _smps_qc_cond.sum(axis=1) + _size = _size.where(_size != 0.).copy() - _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) - _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() + _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) + _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() - _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) - _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) - _coeB = _coeB.values.reshape(-1, 1) + _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) + _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) + _coeB = _coeB.values.reshape(-1, 1) - ## rebuild shift smps data by coe. A, B - ## x_shift = (y_ori/A)**(1/B) - _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) - _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) + ## rebuild shift smps data by coe. A, B + ## x_shift = (y_ori/A)**(1/B) + _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) + _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) - ## the least squares of diameter - ## the shift factor which the cklosest to 1 - _shift_factor = (_aps_shift_x.keys()._data.astype(float) / _aps_shift_x) - _shift_factor.columns = range(len(_aps_shift_x.keys())) + ## the least squares of diameter + ## the shift factor which the cklosest to 1 + _shift_factor = (_aps_shift_x.keys()._data.astype(float) / _aps_shift_x) + _shift_factor.columns = range(len(_aps_shift_x.keys())) - _dropna_idx = _shift_factor.dropna(how='all').index.copy() + _dropna_idx = _shift_factor.dropna(how='all').index.copy() - ## use the target function to get the similar aps and smps bin - ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) - ## assumption : the same diameter between smps and aps should get the same conc. + ## use the target function to get the similar aps and smps bin + ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) + ## assumption : the same diameter between smps and aps should get the same conc. - ## be sure they art in log value - _S2 = DataFrame(index=_aps_shift_x.index) - _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), - columns=_aps_shift_x.keys(), index=_aps_shift_x.index) - for _idx, _factor in _shift_factor.items(): - _smps_fit_df = _coeA * (_dia_table / _factor.to_frame().values) ** _coeB - _S2[_idx] = ((_smps_fit_df - _aps) ** 2).sum(axis=1) + ## be sure they art in log value + _S2 = DataFrame(index=_aps_shift_x.index) + _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), + columns=_aps_shift_x.keys(), index=_aps_shift_x.index) + for _idx, _factor in _shift_factor.items(): + _smps_fit_df = _coeA * (_dia_table / _factor.to_frame().values) ** _coeB + _S2[_idx] = ((_smps_fit_df - _aps) ** 2).sum(axis=1) - _least_squ_idx = _S2.idxmin(axis=1).loc[_dropna_idx] + _least_squ_idx = _S2.idxmin(axis=1).loc[_dropna_idx] - _shift_factor_out = DataFrame(_shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), _least_squ_idx.values], - index=_dropna_idx).reindex(_dt_indx) + _shift_factor_out = DataFrame(_shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), _least_squ_idx.values], + index=_dropna_idx).reindex(_dt_indx) - return _shift_factor_out, (DataFrame(_coeA, index=_dt_indx), DataFrame(_coeB, index=_dt_indx)) + return _shift_factor_out, (DataFrame(_coeA, index=_dt_indx), DataFrame(_coeB, index=_dt_indx)) ## Remove big shift data () ## Return : aps, smps, shift (without big shift data) def _shift_data_process(_shift): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mshift-data quality control\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mshift-data quality control\033[0m") - _rho = _shift ** 2 - _shift = _shift.mask((~np.isfinite(_shift)) | (_rho > 2.6) | (_rho < 0.6)) + _rho = _shift ** 2 + _shift = _shift.mask((~np.isfinite(_shift)) | (_rho > 2.6) | (_rho < 0.6)) - # _qc_index = _shift.mask((_rho<0.6) | (_shift.isna())).dropna().index + # _qc_index = _shift.mask((_rho<0.6) | (_shift.isna())).dropna().index - # return _qc_index, _shift - return _shift + # return _qc_index, _shift + return _shift # return _smps.loc[~_big_shift], _aps.loc[~_big_shift], _shift[~_big_shift].reshape(-1,1) @@ -120,124 +121,124 @@ def _shift_data_process(_shift): ## shift all smps bin and remove the aps bin which smaller than the latest old smps bin ## Return : merge bins, merge data, density def _merge_data(_smps_ori, _aps_ori, _shift_ori, _smps_lb, _aps_hb, _coe, _shift_mode): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data : {_shift_mode}\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data : {_shift_mode}\033[0m") - _ori_idx = _smps_ori.index - _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index + _ori_idx = _smps_ori.index + _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index - _corr_aps_cond = _aps_ori.keys() < 700 - _corr_aps_ky = _aps_ori.keys()[_corr_aps_cond] + _corr_aps_cond = _aps_ori.keys() < 700 + _corr_aps_ky = _aps_ori.keys()[_corr_aps_cond] - _uni_idx, _count = np.unique(np.hstack((_smps_ori.dropna(how='all').index, _aps_ori.dropna(how='all').index, - _shift_ori.dropna(how='all').index)), return_counts=True) + _uni_idx, _count = np.unique(np.hstack((_smps_ori.dropna(how='all').index, _aps_ori.dropna(how='all').index, + _shift_ori.dropna(how='all').index)), return_counts=True) - _merge_idx = to_datetime(np.unique(_uni_idx[_count == 3])) + _merge_idx = to_datetime(np.unique(_uni_idx[_count == 3])) - _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values + _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values - ## parameter - _coeA, _coeB = _coe[0].loc[_merge_idx], _coe[1].loc[_merge_idx] - _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) + ## parameter + _coeA, _coeB = _coe[0].loc[_merge_idx], _coe[1].loc[_merge_idx] + _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) - _cntr = 1000 - _bin_lb = _smps_key[-1] + _cntr = 1000 + _bin_lb = _smps_key[-1] - ## make shift bins - _smps_bin = np.full(_smps.shape, _smps_key) - _aps_bin = np.full(_aps.shape, _aps_key) + ## make shift bins + _smps_bin = np.full(_smps.shape, _smps_key) + _aps_bin = np.full(_aps.shape, _aps_key) - _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) - _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] - _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] - _std_bin_inte2 = _std_bin[_std_bin >= _cntr] + _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) + _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] + _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] + _std_bin_inte2 = _std_bin[_std_bin >= _cntr] - if _shift_mode == 'mobility': - _aps_bin /= _shift + if _shift_mode == 'mobility': + _aps_bin /= _shift - elif _shift_mode == 'aerodynamic': - _smps_bin *= _shift + elif _shift_mode == 'aerodynamic': + _smps_bin *= _shift - ## merge - _merge_lst, _corr_lst = [], [] - for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): - ## keep complete smps bins and data - ## remove the aps bin data lower than smps bin - _condi = _bin_aps >= _bin_smps[-1] + ## merge + _merge_lst, _corr_lst = [], [] + for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): + ## keep complete smps bins and data + ## remove the aps bin data lower than smps bin + _condi = _bin_aps >= _bin_smps[-1] - _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) - _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) + _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) + _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) - _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) + _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) - ## coeA and coeB - _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) - _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') + ## coeA and coeB + _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) + _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') - _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), np.exp(_unvpl_fc(np.log(_std_bin_merge))), - _inte_fc(_std_bin_inte2))) + _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), np.exp(_unvpl_fc(np.log(_std_bin_merge))), + _inte_fc(_std_bin_inte2))) - _merge_lst.append(_merge_dt_fit) - _corr_lst.append(interp1d(_std_bin, _merge_dt_fit)(_bin_aps[_corr_aps_cond])) + _merge_lst.append(_merge_dt_fit) + _corr_lst.append(interp1d(_std_bin, _merge_dt_fit)(_bin_aps[_corr_aps_cond])) - _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) - _df_merge = _df_merge.mask(_df_merge < 0) + _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) + _df_merge = _df_merge.mask(_df_merge < 0) - _df_corr = DataFrame(_corr_lst, columns=_corr_aps_ky, index=_merge_idx) / _aps_ori.loc[_merge_idx, _corr_aps_ky] + _df_corr = DataFrame(_corr_lst, columns=_corr_aps_ky, index=_merge_idx) / _aps_ori.loc[_merge_idx, _corr_aps_ky] - ## process output df - ## average, align with index - def _out_df(*_df_arg, **_df_kwarg): - _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) - _df.index.name = 'time' - return _df + ## process output df + ## average, align with index + def _out_df(*_df_arg, **_df_kwarg): + _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) + _df.index.name = 'time' + return _df - return _out_df(_df_merge), _out_df(_shift_ori ** 2), _out_df(_df_corr) + return _out_df(_df_merge), _out_df(_shift_ori ** 2), _out_df(_df_corr) def merge_SMPS_APS(df_smps, df_aps, aps_unit='um', smps_overlap_lowbound=500, aps_fit_highbound=1000): - df_smps, df_aps = _union_index(df_smps, df_aps) + df_smps, df_aps = union_index(df_smps, df_aps) - ## set to the same units - smps, aps_ori = df_smps.copy(), df_aps.copy() - smps.columns = smps.keys().to_numpy(float) - aps_ori.columns = aps_ori.keys().to_numpy(float) + ## set to the same units + smps, aps_ori = df_smps.copy(), df_aps.copy() + smps.columns = smps.keys().to_numpy(float) + aps_ori.columns = aps_ori.keys().to_numpy(float) - if aps_unit == 'um': - aps_ori.columns = aps_ori.keys() * 1e3 + if aps_unit == 'um': + aps_ori.columns = aps_ori.keys() * 1e3 - den_lst, mer_lst = [], [] - aps_input = aps_ori.loc[:, aps_ori.keys() > 700].copy() + den_lst, mer_lst = [], [] + aps_input = aps_ori.loc[:, aps_ori.keys() > 700].copy() - for _count in range(2): + for _count in range(2): - ## shift infomation, calculate by powerlaw fitting - shift, coe = _overlap_fitting(smps, aps_input, smps_overlap_lowbound, aps_fit_highbound) + ## shift infomation, calculate by powerlaw fitting + shift, coe = _overlap_fitting(smps, aps_input, smps_overlap_lowbound, aps_fit_highbound) - ## process data by shift infomation, and average data - shift = _shift_data_process(shift) + ## process data by shift infomation, and average data + shift = _shift_data_process(shift) - ## merge aps and smps - merge_arg = (smps, aps_ori, shift, smps_overlap_lowbound, aps_fit_highbound, coe) - merge_data_mob, density, _corr = _merge_data(*merge_arg, 'mobility') - merge_data_aer, density, _ = _merge_data(*merge_arg, 'aerodynamic') - density.columns = ['density'] + ## merge aps and smps + merge_arg = (smps, aps_ori, shift, smps_overlap_lowbound, aps_fit_highbound, coe) + merge_data_mob, density, _corr = _merge_data(*merge_arg, 'mobility') + merge_data_aer, density, _ = _merge_data(*merge_arg, 'aerodynamic') + density.columns = ['density'] - if _count == 0: - corr = _corr.resample('1d').mean().reindex(smps.index).ffill() - corr = corr.mask(corr < 1, 1) - aps_ori.loc[:, corr.keys()] *= corr + if _count == 0: + corr = _corr.resample('1d').mean().reindex(smps.index).ffill() + corr = corr.mask(corr < 1, 1) + aps_ori.loc[:, corr.keys()] *= corr - aps_input = aps_ori.copy() + aps_input = aps_ori.copy() - ## out - out_dic = { - 'data_all': merge_data_mob, - 'data_all_aer': merge_data_aer, - 'density_all': density, - } + ## out + out_dic = { + 'data_all': merge_data_mob, + 'data_all_aer': merge_data_aer, + 'density_all': density, + } - ## process data - for _nam, _df in out_dic.items(): - out_dic[_nam] = _df.reindex(smps.index).copy() + ## process data + for _nam, _df in out_dic.items(): + out_dic[_nam] = _df.reindex(smps.index).copy() - return out_dic + return out_dic diff --git a/AeroViz/dataProcess/SizeDistr/_merge_v3.py b/AeroViz/dataProcess/SizeDistr/_merge_v3.py index b6f37a8..eaef01a 100644 --- a/AeroViz/dataProcess/SizeDistr/_merge_v3.py +++ b/AeroViz/dataProcess/SizeDistr/_merge_v3.py @@ -1,4 +1,4 @@ -# from ContainerHandle.dataProcess.utils import _union_index +# from ContainerHandle.dataProcess.config import _union_index from datetime import datetime as dtm @@ -16,24 +16,24 @@ warnings.filterwarnings("ignore") __all__ = [ - '_merge_SMPS_APS', + '_merge_SMPS_APS', ] def _test_plot(ax, smps, aps, unp, shft): - fs = 22. - font_dic = dict(fontsize=fs, math_fontfamily='custom') + fs = 22. + font_dic = dict(fontsize=fs, math_fontfamily='custom') - ax.plot(smps, c='#2693ff', label='smps') - ax.plot(aps, c='#ff4c4d', label='aps_ori') - ax.plot(aps.index / shft, aps.values, c='#ff181b', label='aps_shft', ls='--') - ax.plot(unp, c='#333333', label='unp') + ax.plot(smps, c='#2693ff', label='smps') + ax.plot(aps, c='#ff4c4d', label='aps_ori') + ax.plot(aps.index / shft, aps.values, c='#ff181b', label='aps_shft', ls='--') + ax.plot(unp, c='#333333', label='unp') - # ax.tick_params(which='major', length=7, labelsize=fs-2.5) - # ax.tick_params(which='minor', length=4.5) - # ax.spines[['right', 'top']].set_visible(False) - ax.set(xlim=(11.8, 2500), xscale='log') + # ax.tick_params(which='major', length=7, labelsize=fs-2.5) + # ax.tick_params(which='minor', length=4.5) + # ax.spines[['right', 'top']].set_visible(False) + ax.set(xlim=(11.8, 2500), xscale='log') # ax.set_xlabel('', **font_dic) @@ -52,37 +52,37 @@ def _test_plot(ax, smps, aps, unp, shft): def test_plot(smps, aps, unp, shft): - from matplotlib.pyplot import subplots, show, rcParams + from matplotlib.pyplot import subplots, show, rcParams - ## parameter - fs = 22. - # font_fam = 'DejaVu Sans' - font_fam = 'Times New Roman' - rcParams['font.sans-serif'] = font_fam - rcParams['mathtext.fontset'] = 'custom' - font_dic = dict(fontsize=fs, math_fontfamily='custom') + ## parameter + fs = 22. + # font_fam = 'DejaVu Sans' + font_fam = 'Times New Roman' + rcParams['font.sans-serif'] = font_fam + rcParams['mathtext.fontset'] = 'custom' + font_dic = dict(fontsize=fs, math_fontfamily='custom') - ## plot - fig, axes = subplots(3, 1) + ## plot + fig, axes = subplots(3, 1) - ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi - dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 + ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi + dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 - axes[0].set_title(shft, **font_dic) - # axes[0].legend(framealpha=0, fontsize=fs * .6) + axes[0].set_title(shft, **font_dic) + # axes[0].legend(framealpha=0, fontsize=fs * .6) - _test_plot(axes[0], smps, aps, unp, shft) - _test_plot(axes[1], ds_fc(smps), ds_fc(aps), ds_fc(unp), shft) - _test_plot(axes[2], dv_fc(smps), dv_fc(aps), dv_fc(unp), shft) + _test_plot(axes[0], smps, aps, unp, shft) + _test_plot(axes[1], ds_fc(smps), ds_fc(aps), ds_fc(unp), shft) + _test_plot(axes[2], dv_fc(smps), dv_fc(aps), dv_fc(unp), shft) - show() + show() def _powerlaw_fit(_coeA, _coeB, _aps, _idx, _factor): - # breakpoint() + # breakpoint() - _smps_fit_df = _coeA * (_aps.keys().values / _factor) ** _coeB - return DataFrame(((_smps_fit_df.copy() - _aps.copy()) ** 2).sum(axis=1), columns=[_idx]) + _smps_fit_df = _coeA * (_aps.keys().values / _factor) ** _coeB + return DataFrame(((_smps_fit_df.copy() - _aps.copy()) ** 2).sum(axis=1), columns=[_idx]) ## Calculate S2 @@ -92,427 +92,427 @@ def _powerlaw_fit(_coeA, _coeB, _aps, _idx, _factor): ## return : S2 # def _S2_calculate_dN(_smps, _aps): def _powerlaw_fit_dN(_smps, _aps, _alg_type): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting : {_alg_type}\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting : {_alg_type}\033[0m") - ## overlap fitting - ## parmeter - _dt_indx = _smps.index + ## overlap fitting + ## parmeter + _dt_indx = _smps.index - ## use SMPS data apply power law fitting - ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy - ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html - ## power law fit to SMPS num conc at upper bins to log curve + ## use SMPS data apply power law fitting + ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy + ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html + ## power law fit to SMPS num conc at upper bins to log curve - ## coefficient A, B - _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) - _smps_qc = _smps.where(_smps_qc_cond) + ## coefficient A, B + _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) + _smps_qc = _smps.where(_smps_qc_cond) - _size = _smps_qc_cond.sum(axis=1) - _size = _size.where(_size != 0.).copy() + _size = _smps_qc_cond.sum(axis=1) + _size = _size.where(_size != 0.).copy() - _logx, _logy = n.log(_smps_qc.keys()._data.astype(float)), n.log(_smps_qc) - _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() + _logx, _logy = n.log(_smps_qc.keys()._data.astype(float)), n.log(_smps_qc) + _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() - _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) - _coeA = n.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) - _coeB = _coeB.values.reshape(-1, 1) + _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) + _coeA = n.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) + _coeB = _coeB.values.reshape(-1, 1) - ## rebuild shift smps data by coe. A, B - ## x_shift = (y_ori/A)**(1/B) - _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) - _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) + ## rebuild shift smps data by coe. A, B + ## x_shift = (y_ori/A)**(1/B) + _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) + _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) - ## the least squares of diameter - ## the shift factor which the closest to 1 - _shift_val = np.arange(0.3, 3.05, .05) ** .5 - # _shift_val = np.arange(0.9, 1.805, .005)**.5 + ## the least squares of diameter + ## the shift factor which the closest to 1 + _shift_val = np.arange(0.3, 3.05, .05) ** .5 + # _shift_val = np.arange(0.9, 1.805, .005)**.5 - _shift_factor = DataFrame(columns=range(_shift_val.size), index=_aps_shift_x.index) - _shift_factor.loc[:, :] = _shift_val + _shift_factor = DataFrame(columns=range(_shift_val.size), index=_aps_shift_x.index) + _shift_factor.loc[:, :] = _shift_val - # _dropna_idx = _shift_factor.dropna(how='all').index.copy() - _dropna_idx = _aps_shift_x.dropna(how='all').index.copy() + # _dropna_idx = _shift_factor.dropna(how='all').index.copy() + _dropna_idx = _aps_shift_x.dropna(how='all').index.copy() - ## use the target function to get the similar aps and smps bin - ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) - ## assumption : the same diameter between smps and aps should get the same conc. + ## use the target function to get the similar aps and smps bin + ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) + ## assumption : the same diameter between smps and aps should get the same conc. - ## be sure they art in log value - _S2 = DataFrame(index=_aps_shift_x.index) - _dia_table = DataFrame(n.full(_aps_shift_x.shape, _aps_shift_x.keys()), - columns=_aps_shift_x.keys(), index=_aps_shift_x.index) + ## be sure they art in log value + _S2 = DataFrame(index=_aps_shift_x.index) + _dia_table = DataFrame(n.full(_aps_shift_x.shape, _aps_shift_x.keys()), + columns=_aps_shift_x.keys(), index=_aps_shift_x.index) - pool = Pool(cpu_count()) + pool = Pool(cpu_count()) - _S2 = pool.starmap(partial(_powerlaw_fit, _coeA, _coeB, _aps), list(enumerate(_shift_val))) + _S2 = pool.starmap(partial(_powerlaw_fit, _coeA, _coeB, _aps), list(enumerate(_shift_val))) - pool.close() - pool.join() + pool.close() + pool.join() - S2 = concat(_S2, axis=1)[np.arange(_shift_val.size)] - # S2 /= S2.max(axis=1).to_frame().values + S2 = concat(_S2, axis=1)[np.arange(_shift_val.size)] + # S2 /= S2.max(axis=1).to_frame().values - shift_factor_dN = DataFrame( - _shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), S2.loc[_dropna_idx].idxmin(axis=1).values], - index=_dropna_idx).reindex(_dt_indx).astype(float) + shift_factor_dN = DataFrame( + _shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), S2.loc[_dropna_idx].idxmin(axis=1).values], + index=_dropna_idx).reindex(_dt_indx).astype(float) - shift_factor_dN = shift_factor_dN.mask((shift_factor_dN ** 2 < 0.6) | (shift_factor_dN ** 2 > 2.6)) + shift_factor_dN = shift_factor_dN.mask((shift_factor_dN ** 2 < 0.6) | (shift_factor_dN ** 2 > 2.6)) - return shift_factor_dN + return shift_factor_dN def _corr_fc(_aps_dia, _smps_dia, _smps_dn, _aps_dn, _smooth, _idx, _sh): - ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi - dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 + ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi + dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 - _aps_sh = _aps_dia / _sh - _aps_sh_inp = _aps_sh.where((_aps_sh >= 500) & (_aps_sh <= 1500.)).copy() - _aps_sh_corr = _aps_sh.where((_aps_sh >= _smps_dia[-1]) & (_aps_sh <= 1500.)).copy() + _aps_sh = _aps_dia / _sh + _aps_sh_inp = _aps_sh.where((_aps_sh >= 500) & (_aps_sh <= 1500.)).copy() + _aps_sh_corr = _aps_sh.where((_aps_sh >= _smps_dia[-1]) & (_aps_sh <= 1500.)).copy() - corr_x = np.append(_smps_dia, _aps_sh_corr.dropna()) + corr_x = np.append(_smps_dia, _aps_sh_corr.dropna()) - input_x = np.append(_smps_dia, _aps_sh_inp.dropna()) - input_y = concat([_smps_dn, _aps_dn.iloc[:, ~np.isnan(_aps_sh_inp)]], axis=1) - input_y.columns = input_x + input_x = np.append(_smps_dia, _aps_sh_inp.dropna()) + input_y = concat([_smps_dn, _aps_dn.iloc[:, ~np.isnan(_aps_sh_inp)]], axis=1) + input_y.columns = input_x - input_x.sort() - input_y = input_y[input_x] - corr_y = input_y[corr_x] + input_x.sort() + input_y = input_y[input_x] + corr_y = input_y[corr_x] - S2_lst = [] - for (_tm, _inp_y_dn), (_tm, _cor_y_dn) in zip(input_y.dropna(how='all').iterrows(), - corr_y.dropna(how='all').iterrows()): - ## corr(spec_data, spec_spline) - _spl_dt = [unvpline(input_x, _inp_y, s=_smooth)(corr_x) for _inp_y in - [_inp_y_dn, ds_fc(_inp_y_dn), dv_fc(_inp_y_dn)]] - _cor_dt = [_cor_y_dn, ds_fc(_cor_y_dn), dv_fc(_cor_y_dn)] + S2_lst = [] + for (_tm, _inp_y_dn), (_tm, _cor_y_dn) in zip(input_y.dropna(how='all').iterrows(), + corr_y.dropna(how='all').iterrows()): + ## corr(spec_data, spec_spline) + _spl_dt = [unvpline(input_x, _inp_y, s=_smooth)(corr_x) for _inp_y in + [_inp_y_dn, ds_fc(_inp_y_dn), dv_fc(_inp_y_dn)]] + _cor_dt = [_cor_y_dn, ds_fc(_cor_y_dn), dv_fc(_cor_y_dn)] - _cor_all = sum([np.corrcoef(_cor, _spl)[0, 1] for _cor, _spl in zip(_cor_dt, _spl_dt)]) + _cor_all = sum([np.corrcoef(_cor, _spl)[0, 1] for _cor, _spl in zip(_cor_dt, _spl_dt)]) - S2_lst.append((3 - _cor_all) / 3) + S2_lst.append((3 - _cor_all) / 3) - return DataFrame(S2_lst, columns=[_idx]) + return DataFrame(S2_lst, columns=[_idx]) # def _S2_calculate_dSdV(_smps, _aps, _shft_dn, _S2, smps_ori, aps_ori): # def _S2_calculate_dSdV(_smps, _aps, smps_ori=None): def _corr_with_dNdSdV(_smps, _aps, _alg_type): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range correlation : {_alg_type}\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range correlation : {_alg_type}\033[0m") - _smps_dia = _smps.keys().astype(float) - _aps_dia = _aps.keys().astype(float) + _smps_dia = _smps.keys().astype(float) + _aps_dia = _aps.keys().astype(float) - all_index = _smps.index.copy() - qc_index = DatetimeIndex(set(_smps.dropna(how='all').index) & set(_aps.dropna(how='all').index)).sort_values() + all_index = _smps.index.copy() + qc_index = DatetimeIndex(set(_smps.dropna(how='all').index) & set(_aps.dropna(how='all').index)).sort_values() - _smps_dn = _smps.loc[qc_index].copy() - _aps_dn = _aps.loc[qc_index].copy() + _smps_dn = _smps.loc[qc_index].copy() + _aps_dn = _aps.loc[qc_index].copy() - ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi - dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 + ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi + dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 - _std_bin = np.geomspace(11.8, 19810, 230) - _merge_bin = _std_bin[(_std_bin >= _smps_dia[-1]) & (_std_bin < 1500)].copy() + _std_bin = np.geomspace(11.8, 19810, 230) + _merge_bin = _std_bin[(_std_bin >= _smps_dia[-1]) & (_std_bin < 1500)].copy() - _smooth = 50 + _smooth = 50 - _shift_val = np.arange(0.5, 2.605, .005) ** .5 - _shift_val = np.arange(0.9, 2.01, .01) ** .5 - _shift_val = np.arange(0.9, 2.65, .05) ** .5 + _shift_val = np.arange(0.5, 2.605, .005) ** .5 + _shift_val = np.arange(0.9, 2.01, .01) ** .5 + _shift_val = np.arange(0.9, 2.65, .05) ** .5 - ## spline fitting with shift aps and smps - pool = Pool(cpu_count()) + ## spline fitting with shift aps and smps + pool = Pool(cpu_count()) - S2_lst = pool.starmap(partial(_corr_fc, _aps_dia, _smps_dia, _smps_dn, _aps_dn, _smooth), - list(enumerate(_shift_val))) + S2_lst = pool.starmap(partial(_corr_fc, _aps_dia, _smps_dia, _smps_dn, _aps_dn, _smooth), + list(enumerate(_shift_val))) - pool.close() - pool.join() + pool.close() + pool.join() - S2_table = concat(S2_lst, axis=1).set_index(qc_index)[np.arange(_shift_val.size)].astype(float).dropna() - min_shft = S2_table.idxmin(axis=1).values + S2_table = concat(S2_lst, axis=1).set_index(qc_index)[np.arange(_shift_val.size)].astype(float).dropna() + min_shft = S2_table.idxmin(axis=1).values - return DataFrame(_shift_val[min_shft.astype(int)], index=S2_table.index).astype(float).reindex(_smps.index) + return DataFrame(_shift_val[min_shft.astype(int)], index=S2_table.index).astype(float).reindex(_smps.index) ## Create merge data ## shift all smps bin and remove the aps bin which smaller than the latest old smps bin ## Return : merge bins, merge data, density def _merge_data(_smps_ori, _aps_ori, _shift_ori, _smps_lb, _aps_hb, _shift_mode, _alg_type): - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data : {_shift_mode} and {_alg_type}\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data : {_shift_mode} and {_alg_type}\033[0m") - _ori_idx = _smps_ori.index - _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index + _ori_idx = _smps_ori.index + _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index - _corr_aps_cond = _aps_ori.keys() < 700 - _corr_aps_ky = _aps_ori.keys()[_corr_aps_cond] + _corr_aps_cond = _aps_ori.keys() < 700 + _corr_aps_ky = _aps_ori.keys()[_corr_aps_cond] - _uni_idx = DatetimeIndex(set(_smps_ori.dropna(how='all').index) & set(_aps_ori.dropna(how='all').index) & - set(_shift_ori.dropna(how='all').index)).sort_values() + _uni_idx = DatetimeIndex(set(_smps_ori.dropna(how='all').index) & set(_aps_ori.dropna(how='all').index) & + set(_shift_ori.dropna(how='all').index)).sort_values() - _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values + _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values - ## parameter - _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) + ## parameter + _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) - _cntr = 1000 - _bin_lb = _smps_key[-1] + _cntr = 1000 + _bin_lb = _smps_key[-1] - ## make shift bins - _smps_bin = n.full(_smps.shape, _smps_key) - _aps_bin = n.full(_aps.shape, _aps_key) + ## make shift bins + _smps_bin = n.full(_smps.shape, _smps_key) + _aps_bin = n.full(_aps.shape, _aps_key) - _std_bin = n.geomspace(_smps_key[0], _aps_key[-1], 230) - _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] - _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] - _std_bin_inte2 = _std_bin[_std_bin >= _cntr] - # breakpoint() - if _shift_mode == 'mobility': - _aps_bin /= _shift + _std_bin = n.geomspace(_smps_key[0], _aps_key[-1], 230) + _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] + _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] + _std_bin_inte2 = _std_bin[_std_bin >= _cntr] + # breakpoint() + if _shift_mode == 'mobility': + _aps_bin /= _shift - elif _shift_mode == 'aerodynamic': - _smps_bin *= _shift + elif _shift_mode == 'aerodynamic': + _smps_bin *= _shift - ## merge - _merge_lst, _corr_lst = [], [] - for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): - ## keep complete smps bins and data - ## remove the aps bin data lower than smps bin - _condi = _bin_aps >= _bin_smps[-1] + ## merge + _merge_lst, _corr_lst = [], [] + for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): + ## keep complete smps bins and data + ## remove the aps bin data lower than smps bin + _condi = _bin_aps >= _bin_smps[-1] - _merge_bin = n.hstack((_bin_smps, _bin_aps[_condi])) - _merge_dt = n.hstack((_dt_smps, _dt_aps[_condi])) + _merge_bin = n.hstack((_bin_smps, _bin_aps[_condi])) + _merge_dt = n.hstack((_dt_smps, _dt_aps[_condi])) - _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) + _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) - ## coeA and coeB - _unvpl_fc = unvpline(n.log(_merge_bin[_merge_fit_loc]), n.log(_merge_dt[_merge_fit_loc]), s=50) - _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') + ## coeA and coeB + _unvpl_fc = unvpline(n.log(_merge_bin[_merge_fit_loc]), n.log(_merge_dt[_merge_fit_loc]), s=50) + _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') - _merge_dt_fit = n.hstack((_inte_fc(_std_bin_inte1), n.exp(_unvpl_fc(n.log(_std_bin_merge))), - _inte_fc(_std_bin_inte2))) + _merge_dt_fit = n.hstack((_inte_fc(_std_bin_inte1), n.exp(_unvpl_fc(n.log(_std_bin_merge))), + _inte_fc(_std_bin_inte2))) - _merge_lst.append(_merge_dt_fit) - _corr_lst.append(interp1d(_std_bin, _merge_dt_fit)(_bin_aps[_corr_aps_cond])) + _merge_lst.append(_merge_dt_fit) + _corr_lst.append(interp1d(_std_bin, _merge_dt_fit)(_bin_aps[_corr_aps_cond])) - _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) - _df_merge = _df_merge.mask(_df_merge < 0) + _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) + _df_merge = _df_merge.mask(_df_merge < 0) - _df_corr = DataFrame(_corr_lst, columns=_corr_aps_ky, index=_merge_idx) / _aps_ori.loc[_merge_idx, _corr_aps_ky] + _df_corr = DataFrame(_corr_lst, columns=_corr_aps_ky, index=_merge_idx) / _aps_ori.loc[_merge_idx, _corr_aps_ky] - ## process output df - ## average, align with index - def _out_df(*_df_arg, **_df_kwarg): - _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) - _df.index.name = 'time' - return _df + ## process output df + ## average, align with index + def _out_df(*_df_arg, **_df_kwarg): + _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) + _df.index.name = 'time' + return _df - return _out_df(_df_merge), _out_df(_shift_ori ** 2), _out_df(_df_corr) + return _out_df(_df_merge), _out_df(_shift_ori ** 2), _out_df(_df_corr) def merge_SMPS_APS(df_smps, df_aps, aps_unit='um', smps_overlap_lowbound=500, aps_fit_highbound=1000, dndsdv_alg=True): - merge_data, merge_data_dn, merge_data_dsdv, merge_data_cor_dn, density, density_dn, density_dsdv, density_cor_dn = [ - DataFrame( - [ - np.nan])] * 8 + merge_data, merge_data_dn, merge_data_dsdv, merge_data_cor_dn, density, density_dn, density_dsdv, density_cor_dn = [ + DataFrame( + [ + np.nan])] * 8 - ## set to the same units - smps, aps_ori = df_smps.copy(), df_aps.copy() - smps.columns = smps.keys().to_numpy(float) - aps_ori.columns = aps_ori.keys().to_numpy(float) + ## set to the same units + smps, aps_ori = df_smps.copy(), df_aps.copy() + smps.columns = smps.keys().to_numpy(float) + aps_ori.columns = aps_ori.keys().to_numpy(float) - if aps_unit == 'um': - aps_ori.columns = aps_ori.keys() * 1e3 + if aps_unit == 'um': + aps_ori.columns = aps_ori.keys() * 1e3 - den_lst, mer_lst = [], [] - aps_input = aps_ori.loc[:, (aps_ori.keys() > 700) & (aps_ori.keys() < 1000)].copy() + den_lst, mer_lst = [], [] + aps_input = aps_ori.loc[:, (aps_ori.keys() > 700) & (aps_ori.keys() < 1000)].copy() - # aps_over = aps[ aps.keys()[aps.keys() < 1000] ].copy() - smps_over = smps[smps.keys()[smps.keys() > 500]].copy() + # aps_over = aps[ aps.keys()[aps.keys() < 1000] ].copy() + smps_over = smps[smps.keys()[smps.keys() > 500]].copy() - for _count in range(2): + for _count in range(2): - ## shift data calculate - if _count == 0: - alg_type = 'dn' - shift = _powerlaw_fit_dN(smps_over, aps_input) + ## shift data calculate + if _count == 0: + alg_type = 'dn' + shift = _powerlaw_fit_dN(smps_over, aps_input) - if dndsdv_alg: - shift_dsdv = _corr_with_dNdSdV(smps_over, aps_input).mask(shift.isna()) + if dndsdv_alg: + shift_dsdv = _corr_with_dNdSdV(smps_over, aps_input).mask(shift.isna()) - else: - alg_type = 'cor_dndsdv' - shift_cor = _powerlaw_fit_dN(smps_over, aps_input) + else: + alg_type = 'cor_dndsdv' + shift_cor = _powerlaw_fit_dN(smps_over, aps_input) - if dndsdv_alg: - shift = _corr_with_dNdSdV(smps_over, aps_input).mask(shift_cor.isna()) + if dndsdv_alg: + shift = _corr_with_dNdSdV(smps_over, aps_input).mask(shift_cor.isna()) - ## merge aps and smps - ## 1. power law fit (dn) -> return dn data and aps correct factor - ## 2. correaltion with dn, ds, dv -> return corrected dn_ds_dv data - if (alg_type == 'dn') | dndsdv_alg: - merge_arg = (smps, aps_ori, shift, smps_overlap_lowbound, aps_fit_highbound) + ## merge aps and smps + ## 1. power law fit (dn) -> return dn data and aps correct factor + ## 2. correaltion with dn, ds, dv -> return corrected dn_ds_dv data + if (alg_type == 'dn') | dndsdv_alg: + merge_arg = (smps, aps_ori, shift, smps_overlap_lowbound, aps_fit_highbound) - merge_data, density, _corr = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) - density.columns = ['density'] + merge_data, density, _corr = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) + density.columns = ['density'] - ## without aps correct - if _count == 0: - ## merge aps and smps - ## dn_ds_dv data - if dndsdv_alg: - merge_arg = (smps, aps_ori, shift_dsdv, smps_overlap_lowbound, aps_fit_highbound) + ## without aps correct + if _count == 0: + ## merge aps and smps + ## dn_ds_dv data + if dndsdv_alg: + merge_arg = (smps, aps_ori, shift_dsdv, smps_overlap_lowbound, aps_fit_highbound) - merge_data_dsdv, density_dsdv, _ = _merge_data(*merge_arg, 'mobility', _alg_type='dndsdv') - density_dsdv.columns = ['density'] + merge_data_dsdv, density_dsdv, _ = _merge_data(*merge_arg, 'mobility', _alg_type='dndsdv') + density_dsdv.columns = ['density'] - ## dn data - merge_data_dn, density_dn = merge_data.copy(), density.copy() + ## dn data + merge_data_dn, density_dn = merge_data.copy(), density.copy() - ## correct aps data - corr = _corr.resample('1d').mean().reindex(smps.index).ffill() - corr = corr.mask(corr < 1, 1) - aps_ori.loc[:, corr.keys()] *= corr + ## correct aps data + corr = _corr.resample('1d').mean().reindex(smps.index).ffill() + corr = corr.mask(corr < 1, 1) + aps_ori.loc[:, corr.keys()] *= corr - aps_input = aps_ori.copy() + aps_input = aps_ori.copy() - ## with aps correct - else: - ## merge aps and smps - ## dn data - merge_arg = (smps, aps_ori, shift_cor, smps_overlap_lowbound, aps_fit_highbound) + ## with aps correct + else: + ## merge aps and smps + ## dn data + merge_arg = (smps, aps_ori, shift_cor, smps_overlap_lowbound, aps_fit_highbound) - merge_data_cor_dn, density_cor_dn, _ = _merge_data(*merge_arg, 'mobility', _alg_type='cor_dn') - density_cor_dn.columns = ['density'] + merge_data_cor_dn, density_cor_dn, _ = _merge_data(*merge_arg, 'mobility', _alg_type='cor_dn') + density_cor_dn.columns = ['density'] - out_rho = concat([density_dn, density_cor_dn, density_dsdv, density], axis=1) - out_rho.columns = ['dn', 'cor_dn', 'dndsdv', 'cor_dndsdv'] + out_rho = concat([density_dn, density_cor_dn, density_dsdv, density], axis=1) + out_rho.columns = ['dn', 'cor_dn', 'dndsdv', 'cor_dndsdv'] - ## out - out_dic = { - 'data_cor_dndsdv': merge_data, - 'data_dn': merge_data_dn, - 'data_dndsdv': merge_data_dsdv, - 'data_cor_dn': merge_data_cor_dn, + ## out + out_dic = { + 'data_cor_dndsdv': merge_data, + 'data_dn': merge_data_dn, + 'data_dndsdv': merge_data_dsdv, + 'data_cor_dn': merge_data_cor_dn, - 'density': out_rho, + 'density': out_rho, - # 'data_all_aer' : merge_data_aer, + # 'data_all_aer' : merge_data_aer, - # 'density_cor_dndsdv' : density, - # 'density_dn' : density_dn, - # 'density_dndsdv' : density_dsdv, - # 'density_cor_dn' : density_cor_dn, - } + # 'density_cor_dndsdv' : density, + # 'density_dn' : density_dn, + # 'density_dndsdv' : density_dsdv, + # 'density_cor_dn' : density_cor_dn, + } - ## process data - for _nam, _df in out_dic.items(): - out_dic[_nam] = _df.reindex(smps.index).copy() + ## process data + for _nam, _df in out_dic.items(): + out_dic[_nam] = _df.reindex(smps.index).copy() - return out_dic + return out_dic def merge_SMPS_APS(df_smps, df_aps, aps_unit='um', smps_overlap_lowbound=500, aps_fit_highbound=1000, dndsdv_alg=True): - # merge_data, merge_data_dn, merge_data_dsdv, merge_data_cor_dn, density, density_dn, density_dsdv, density_cor_dn = [DataFrame([np.nan])] * 8 + # merge_data, merge_data_dn, merge_data_dsdv, merge_data_cor_dn, density, density_dn, density_dsdv, density_cor_dn = [DataFrame([np.nan])] * 8 - ## set to the same units - smps, aps = df_smps.copy(), df_aps.copy() - smps.columns = smps.keys().to_numpy(float) - aps.columns = aps.keys().to_numpy(float) + ## set to the same units + smps, aps = df_smps.copy(), df_aps.copy() + smps.columns = smps.keys().to_numpy(float) + aps.columns = aps.keys().to_numpy(float) - if aps_unit == 'um': - aps.columns = aps.keys() * 1e3 + if aps_unit == 'um': + aps.columns = aps.keys() * 1e3 - oth_typ = dict() + oth_typ = dict() - aps_input = aps.copy() - aps_over = aps_input.loc[:, (aps.keys() > 700) & (aps.keys() < 1000)].copy() + aps_input = aps.copy() + aps_over = aps_input.loc[:, (aps.keys() > 700) & (aps.keys() < 1000)].copy() - smps_input = smps.copy() - smps_over = smps_input[smps.keys()[smps.keys() > 500]].copy() + smps_input = smps.copy() + smps_over = smps_input[smps.keys()[smps.keys() > 500]].copy() - for _count in range(2): + for _count in range(2): - ## shift data calculate - ## original - if _count == 0: - alg_type = 'dn' - shift = _powerlaw_fit_dN(smps_over, aps_over, alg_type) + ## shift data calculate + ## original + if _count == 0: + alg_type = 'dn' + shift = _powerlaw_fit_dN(smps_over, aps_over, alg_type) - if dndsdv_alg: - shift_dsdv = _corr_with_dNdSdV(smps_over, aps_over, 'dndsdv').mask(shift.isna()) + if dndsdv_alg: + shift_dsdv = _corr_with_dNdSdV(smps_over, aps_over, 'dndsdv').mask(shift.isna()) - ## aps correct - else: - alg_type = 'cor_dndsdv' - shift_cor = _powerlaw_fit_dN(smps_over, aps_over, 'cor_dn') + ## aps correct + else: + alg_type = 'cor_dndsdv' + shift_cor = _powerlaw_fit_dN(smps_over, aps_over, 'cor_dn') - if dndsdv_alg: - shift = _corr_with_dNdSdV(smps_over, aps_over, alg_type).mask(shift_cor.isna()) + if dndsdv_alg: + shift = _corr_with_dNdSdV(smps_over, aps_over, alg_type).mask(shift_cor.isna()) - ## merge aps and smps - ## 1. power law fit (dn) -> return dn data and aps correct factor - ## 2. correaltion with dn, ds, dv -> return corrected dn_ds_dv data - if (alg_type == 'dn') | dndsdv_alg: - merge_arg = (smps_input, aps_input, shift, smps_overlap_lowbound, aps_fit_highbound) + ## merge aps and smps + ## 1. power law fit (dn) -> return dn data and aps correct factor + ## 2. correaltion with dn, ds, dv -> return corrected dn_ds_dv data + if (alg_type == 'dn') | dndsdv_alg: + merge_arg = (smps_input, aps_input, shift, smps_overlap_lowbound, aps_fit_highbound) - merge_data, density, _corr = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) - density.columns = ['density'] + merge_data, density, _corr = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) + density.columns = ['density'] - ## without aps correct - if _count == 0: - ## merge aps and smps - ## dn_ds_dv data - if dndsdv_alg: - alg_type = 'dndsdv' - merge_arg = (smps_input, aps_input, shift_dsdv, smps_overlap_lowbound, aps_fit_highbound) + ## without aps correct + if _count == 0: + ## merge aps and smps + ## dn_ds_dv data + if dndsdv_alg: + alg_type = 'dndsdv' + merge_arg = (smps_input, aps_input, shift_dsdv, smps_overlap_lowbound, aps_fit_highbound) - merge_data_dsdv, density_dsdv, _ = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) - density_dsdv.columns = ['density'] + merge_data_dsdv, density_dsdv, _ = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) + density_dsdv.columns = ['density'] - ## dn data - merge_data_dn, density_dn = merge_data.copy(), density.copy() + ## dn data + merge_data_dn, density_dn = merge_data.copy(), density.copy() - ## correct aps data - corr = _corr.resample('1d').mean().reindex(smps.index).ffill() - corr = corr.mask(corr < 1, 1) + ## correct aps data + corr = _corr.resample('1d').mean().reindex(smps.index).ffill() + corr = corr.mask(corr < 1, 1) - aps_input.loc[:, corr.keys()] *= corr - aps_over = aps_input.copy() + aps_input.loc[:, corr.keys()] *= corr + aps_over = aps_input.copy() - ## with aps correct - else: - ## merge aps and smps - ## dn data - alg_type = 'cor_dn' - merge_arg = (smps_input, aps_input, shift_cor, smps_overlap_lowbound, aps_fit_highbound) + ## with aps correct + else: + ## merge aps and smps + ## dn data + alg_type = 'cor_dn' + merge_arg = (smps_input, aps_input, shift_cor, smps_overlap_lowbound, aps_fit_highbound) - merge_data_cor_dn, density_cor_dn, _ = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) - density_cor_dn.columns = ['density'] + merge_data_cor_dn, density_cor_dn, _ = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) + density_cor_dn.columns = ['density'] - ## out - out_rho = concat([density_dn, density_cor_dn, density_dsdv, density], axis=1) - out_rho.columns = ['dn', 'cor_dn', 'dndsdv', 'cor_dndsdv'] + ## out + out_rho = concat([density_dn, density_cor_dn, density_dsdv, density], axis=1) + out_rho.columns = ['dn', 'cor_dn', 'dndsdv', 'cor_dndsdv'] - out_dic = { - 'data_cor_dndsdv': merge_data, - 'data_dn': merge_data_dn, - 'data_dndsdv': merge_data_dsdv, - 'data_cor_dn': merge_data_cor_dn, + out_dic = { + 'data_cor_dndsdv': merge_data, + 'data_dn': merge_data_dn, + 'data_dndsdv': merge_data_dsdv, + 'data_cor_dn': merge_data_cor_dn, - 'density': out_rho, + 'density': out_rho, - # 'data_all_aer' : merge_data_aer, + # 'data_all_aer' : merge_data_aer, - # 'density_cor_dndsdv' : density, - # 'density_dn' : density_dn, - # 'density_dndsdv' : density_dsdv, - # 'density_cor_dn' : density_cor_dn, - } + # 'density_cor_dndsdv' : density, + # 'density_dn' : density_dn, + # 'density_dndsdv' : density_dsdv, + # 'density_cor_dn' : density_cor_dn, + } - ## process data - for _nam, _df in out_dic.items(): - out_dic[_nam] = _df.reindex(smps.index).copy() + ## process data + for _nam, _df in out_dic.items(): + out_dic[_nam] = _df.reindex(smps.index).copy() - return out_dic + return out_dic diff --git a/AeroViz/dataProcess/SizeDistr/_merge_v4.py b/AeroViz/dataProcess/SizeDistr/_merge_v4.py index c3d847d..34d9707 100644 --- a/AeroViz/dataProcess/SizeDistr/_merge_v4.py +++ b/AeroViz/dataProcess/SizeDistr/_merge_v4.py @@ -1,27 +1,25 @@ -# from ContainerHandle.dataProcess.utils import _union_index +# from ContainerHandle.dataProcess.config import _union_index +import warnings from datetime import datetime as dtm +from functools import partial +from multiprocessing import Pool, cpu_count import numpy as np from pandas import DataFrame, concat, DatetimeIndex # from scipy.interpolate import interp1d from scipy.interpolate import UnivariateSpline as unvpline, interp1d -from multiprocessing import Pool, cpu_count -from functools import partial - -import warnings - warnings.filterwarnings("ignore") __all__ = ['_merge_SMPS_APS'] def _powerlaw_fit(_coeA, _coeB, _aps, _idx, _factor): - # breakpoint() + # breakpoint() - _smps_fit_df = _coeA * (_aps.keys().values / _factor) ** _coeB - return DataFrame(((_smps_fit_df.copy() - _aps.copy()) ** 2).sum(axis=1), columns=[_idx]) + _smps_fit_df = _coeA * (_aps.keys().values / _factor) ** _coeB + return DataFrame(((_smps_fit_df.copy() - _aps.copy()) ** 2).sum(axis=1), columns=[_idx]) ## Calculate S2 @@ -31,394 +29,394 @@ def _powerlaw_fit(_coeA, _coeB, _aps, _idx, _factor): ## return : S2 # def _S2_calculate_dN(_smps, _aps): def _powerlaw_fit_dN(_smps, _aps, _alg_type): - print(f"\t\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting : {_alg_type}\033[0m") + print(f"\t\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range fitting : {_alg_type}\033[0m") - ## overlap fitting - ## parmeter - _dt_indx = _smps.index + ## overlap fitting + ## parmeter + _dt_indx = _smps.index - ## use SMPS data apply power law fitting - ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy - ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html - ## power law fit to SMPS num conc at upper bins to log curve + ## use SMPS data apply power law fitting + ## y = Ax^B, A = e**coefa, B = coefb, x = logx, y = logy + ## ref : http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html + ## power law fit to SMPS num conc at upper bins to log curve - ## coefficient A, B - _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) - _smps_qc = _smps.where(_smps_qc_cond) + ## coefficient A, B + _smps_qc_cond = ((_smps != 0) & np.isfinite(_smps)) + _smps_qc = _smps.where(_smps_qc_cond) - _size = _smps_qc_cond.sum(axis=1) - _size = _size.where(_size != 0.).copy() + _size = _smps_qc_cond.sum(axis=1) + _size = _size.where(_size != 0.).copy() - _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) - _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() + _logx, _logy = np.log(_smps_qc.keys()._data.astype(float)), np.log(_smps_qc) + _x, _y, _xy, _xx = _logx.sum(), _logy.sum(axis=1), (_logx * _logy).sum(axis=1), (_logx ** 2).sum() - _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) - _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) - _coeB = _coeB.values.reshape(-1, 1) + _coeB = ((_size * _xy - _x * _y) / (_size * _xx - _x ** 2.)) + _coeA = np.exp((_y - _coeB * _x) / _size).values.reshape(-1, 1) + _coeB = _coeB.values.reshape(-1, 1) - ## rebuild shift smps data by coe. A, B - ## x_shift = (y_ori/A)**(1/B) - _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) - _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) + ## rebuild shift smps data by coe. A, B + ## x_shift = (y_ori/A)**(1/B) + _aps_shift_x = (_aps / _coeA) ** (1 / _coeB) + _aps_shift_x = _aps_shift_x.where(np.isfinite(_aps_shift_x)) - ## the least squares of diameter - ## the shift factor which the closest to 1 - _shift_val = np.arange(0.3, 3.05, .05) ** .5 - # _shift_val = np.arange(0.9, 1.805, .005)**.5 + ## the least squares of diameter + ## the shift factor which the closest to 1 + _shift_val = np.arange(0.3, 3.05, .05) ** .5 + # _shift_val = np.arange(0.9, 1.805, .005)**.5 - _shift_factor = DataFrame(columns=range(_shift_val.size), index=_aps_shift_x.index) - _shift_factor.loc[:, :] = _shift_val + _shift_factor = DataFrame(columns=range(_shift_val.size), index=_aps_shift_x.index) + _shift_factor.loc[:, :] = _shift_val - # _dropna_idx = _shift_factor.dropna(how='all').index.copy() - _dropna_idx = _aps_shift_x.dropna(how='all').index.copy() + # _dropna_idx = _shift_factor.dropna(how='all').index.copy() + _dropna_idx = _aps_shift_x.dropna(how='all').index.copy() - ## use the target function to get the similar aps and smps bin - ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) - ## assumption : the same diameter between smps and aps should get the same conc. + ## use the target function to get the similar aps and smps bin + ## S2 = sum( (smps_fit_line(dia) - aps(dia*shift_factor) )**2 ) + ## assumption : the same diameter between smps and aps should get the same conc. - ## be sure they art in log value - _S2 = DataFrame(index=_aps_shift_x.index) - _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), - columns=_aps_shift_x.keys(), index=_aps_shift_x.index) + ## be sure they art in log value + _S2 = DataFrame(index=_aps_shift_x.index) + _dia_table = DataFrame(np.full(_aps_shift_x.shape, _aps_shift_x.keys()), + columns=_aps_shift_x.keys(), index=_aps_shift_x.index) - pool = Pool(cpu_count()) + pool = Pool(cpu_count()) - _S2 = pool.starmap(partial(_powerlaw_fit, _coeA, _coeB, _aps), list(enumerate(_shift_val))) + _S2 = pool.starmap(partial(_powerlaw_fit, _coeA, _coeB, _aps), list(enumerate(_shift_val))) - pool.close() - pool.join() + pool.close() + pool.join() - S2 = concat(_S2, axis=1)[np.arange(_shift_val.size)] - # S2 /= S2.max(axis=1).to_frame().values + S2 = concat(_S2, axis=1)[np.arange(_shift_val.size)] + # S2 /= S2.max(axis=1).to_frame().values - shift_factor_dN = DataFrame( - _shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), S2.loc[_dropna_idx].idxmin(axis=1).values], - index=_dropna_idx).reindex(_dt_indx).astype(float) + shift_factor_dN = DataFrame( + _shift_factor.loc[_dropna_idx].values[range(len(_dropna_idx)), S2.loc[_dropna_idx].idxmin(axis=1).values], + index=_dropna_idx).reindex(_dt_indx).astype(float) - shift_factor_dN = shift_factor_dN.mask((shift_factor_dN ** 2 < 0.6) | (shift_factor_dN ** 2 > 2.6)) + shift_factor_dN = shift_factor_dN.mask((shift_factor_dN ** 2 < 0.6) | (shift_factor_dN ** 2 > 2.6)) - return shift_factor_dN + return shift_factor_dN def _corr_fc(_aps_dia, _smps_dia, _smps_dn, _aps_dn, _smooth, _idx, _sh): - ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi - dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 + ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi + dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 - _aps_sh = _aps_dia / _sh - _aps_sh_inp = _aps_sh.where((_aps_sh >= 500) & (_aps_sh <= 1500.)).copy() - _aps_sh_corr = _aps_sh.where((_aps_sh >= _smps_dia[-1]) & (_aps_sh <= 1500.)).copy() + _aps_sh = _aps_dia / _sh + _aps_sh_inp = _aps_sh.where((_aps_sh >= 500) & (_aps_sh <= 1500.)).copy() + _aps_sh_corr = _aps_sh.where((_aps_sh >= _smps_dia[-1]) & (_aps_sh <= 1500.)).copy() - corr_x = np.append(_smps_dia, _aps_sh_corr.dropna()) + corr_x = np.append(_smps_dia, _aps_sh_corr.dropna()) - input_x = np.append(_smps_dia, _aps_sh_inp.dropna()) - input_y = concat([_smps_dn, _aps_dn.iloc[:, ~np.isnan(_aps_sh_inp)]], axis=1) - input_y.columns = input_x + input_x = np.append(_smps_dia, _aps_sh_inp.dropna()) + input_y = concat([_smps_dn, _aps_dn.iloc[:, ~np.isnan(_aps_sh_inp)]], axis=1) + input_y.columns = input_x - input_x.sort() - input_y = input_y[input_x] - corr_y = input_y[corr_x] + input_x.sort() + input_y = input_y[input_x] + corr_y = input_y[corr_x] - S2_lst = [] - for (_tm, _inp_y_dn), (_tm, _cor_y_dn) in zip(input_y.dropna(how='all').iterrows(), - corr_y.dropna(how='all').iterrows()): - ## corr(spec_data, spec_spline) - _spl_dt = [unvpline(input_x, _inp_y, s=_smooth)(corr_x) for _inp_y in - [_inp_y_dn, ds_fc(_inp_y_dn), dv_fc(_inp_y_dn)]] - _cor_dt = [_cor_y_dn, ds_fc(_cor_y_dn), dv_fc(_cor_y_dn)] + S2_lst = [] + for (_tm, _inp_y_dn), (_tm, _cor_y_dn) in zip(input_y.dropna(how='all').iterrows(), + corr_y.dropna(how='all').iterrows()): + ## corr(spec_data, spec_spline) + _spl_dt = [unvpline(input_x, _inp_y, s=_smooth)(corr_x) for _inp_y in + [_inp_y_dn, ds_fc(_inp_y_dn), dv_fc(_inp_y_dn)]] + _cor_dt = [_cor_y_dn, ds_fc(_cor_y_dn), dv_fc(_cor_y_dn)] - _cor_all = sum([np.corrcoef(_cor, _spl)[0, 1] for _cor, _spl in zip(_cor_dt, _spl_dt)]) + _cor_all = sum([np.corrcoef(_cor, _spl)[0, 1] for _cor, _spl in zip(_cor_dt, _spl_dt)]) - S2_lst.append((3 - _cor_all) / 3) + S2_lst.append((3 - _cor_all) / 3) - return DataFrame(S2_lst, columns=[_idx]) + return DataFrame(S2_lst, columns=[_idx]) # def _S2_calculate_dSdV(_smps, _aps, _shft_dn, _S2, smps_ori, aps_ori): # def _S2_calculate_dSdV(_smps, _aps, smps_ori=None): def _corr_with_dNdSdV(_smps, _aps, _alg_type): - print(f"\t\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range correlation : {_alg_type}\033[0m") + print(f"\t\t\t{dtm.now().strftime('%m/%d %X')} : \033[92moverlap range correlation : {_alg_type}\033[0m") - _smps_dia = _smps.keys().astype(float) - _aps_dia = _aps.keys().astype(float) + _smps_dia = _smps.keys().astype(float) + _aps_dia = _aps.keys().astype(float) - all_index = _smps.index.copy() - qc_index = DatetimeIndex(set(_smps.dropna(how='all').index) & set(_aps.dropna(how='all').index)).sort_values() + all_index = _smps.index.copy() + qc_index = DatetimeIndex(set(_smps.dropna(how='all').index) & set(_aps.dropna(how='all').index)).sort_values() - _smps_dn = _smps.loc[qc_index].copy() - _aps_dn = _aps.loc[qc_index].copy() + _smps_dn = _smps.loc[qc_index].copy() + _aps_dn = _aps.loc[qc_index].copy() - ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi - dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 + ds_fc = lambda _dt: _dt * _dt.index ** 2 * np.pi + dv_fc = lambda _dt: _dt * _dt.index ** 3 * np.pi / 6 - _std_bin = np.geomspace(11.8, 19810, 230) - _merge_bin = _std_bin[(_std_bin >= _smps_dia[-1]) & (_std_bin < 1500)].copy() + _std_bin = np.geomspace(11.8, 19810, 230) + _merge_bin = _std_bin[(_std_bin >= _smps_dia[-1]) & (_std_bin < 1500)].copy() - _smooth = 50 + _smooth = 50 - _shift_val = np.arange(0.5, 2.605, .005) ** .5 - _shift_val = np.arange(0.9, 2.01, .01) ** .5 - _shift_val = np.arange(0.9, 2.65, .05) ** .5 + _shift_val = np.arange(0.5, 2.605, .005) ** .5 + _shift_val = np.arange(0.9, 2.01, .01) ** .5 + _shift_val = np.arange(0.9, 2.65, .05) ** .5 - ## spline fitting with shift aps and smps - pool = Pool(cpu_count()) + ## spline fitting with shift aps and smps + pool = Pool(cpu_count()) - S2_lst = pool.starmap(partial(_corr_fc, _aps_dia, _smps_dia, _smps_dn, _aps_dn, _smooth), - list(enumerate(_shift_val))) + S2_lst = pool.starmap(partial(_corr_fc, _aps_dia, _smps_dia, _smps_dn, _aps_dn, _smooth), + list(enumerate(_shift_val))) - pool.close() - pool.join() + pool.close() + pool.join() - S2_table = concat(S2_lst, axis=1).set_index(qc_index)[np.arange(_shift_val.size)].astype(float).dropna() - min_shft = S2_table.idxmin(axis=1).values + S2_table = concat(S2_lst, axis=1).set_index(qc_index)[np.arange(_shift_val.size)].astype(float).dropna() + min_shft = S2_table.idxmin(axis=1).values - return DataFrame(_shift_val[min_shft.astype(int)], index=S2_table.index).astype(float).reindex(_smps.index) + return DataFrame(_shift_val[min_shft.astype(int)], index=S2_table.index).astype(float).reindex(_smps.index) ## Create merge data ## shift all smps bin and remove the aps bin which smaller than the latest old smps bin ## Return : merge bins, merge data, density def _merge_data(_smps_ori, _aps_ori, _shift_ori, _smps_lb, _aps_hb, _shift_mode, _alg_type): - print(f"\t\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data : {_shift_mode} and {_alg_type}\033[0m") + print(f"\t\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mcreate merge data : {_shift_mode} and {_alg_type}\033[0m") - _ori_idx = _smps_ori.index.copy() - # _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index + _ori_idx = _smps_ori.index.copy() + # _merge_idx = _smps_ori.loc[_aps_ori.dropna(how='all').index].dropna(how='all').index - _corr_aps_cond = _aps_ori.keys() < 700 - _corr_aps_ky = _aps_ori.keys()[_corr_aps_cond] + _corr_aps_cond = _aps_ori.keys() < 700 + _corr_aps_ky = _aps_ori.keys()[_corr_aps_cond] - _merge_idx = DatetimeIndex(set(_smps_ori.dropna(how='all').index) & set(_aps_ori.dropna(how='all').index) & - set(_shift_ori.dropna(how='all').index)).sort_values() + _merge_idx = DatetimeIndex(set(_smps_ori.dropna(how='all').index) & set(_aps_ori.dropna(how='all').index) & + set(_shift_ori.dropna(how='all').index)).sort_values() - _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values + _smps, _aps, _shift = _smps_ori.loc[_merge_idx], _aps_ori.loc[_merge_idx], _shift_ori.loc[_merge_idx].values - ## parameter - _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) + ## parameter + _smps_key, _aps_key = _smps.keys()._data.astype(float), _aps.keys()._data.astype(float) - _cntr = 1000 - _bin_lb = _smps_key[-1] + _cntr = 1000 + _bin_lb = _smps_key[-1] - ## make shift bins - _smps_bin = np.full(_smps.shape, _smps_key) - _aps_bin = np.full(_aps.shape, _aps_key) + ## make shift bins + _smps_bin = np.full(_smps.shape, _smps_key) + _aps_bin = np.full(_aps.shape, _aps_key) - _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) - _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] - _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] - _std_bin_inte2 = _std_bin[_std_bin >= _cntr] + _std_bin = np.geomspace(_smps_key[0], _aps_key[-1], 230) + _std_bin_merge = _std_bin[(_std_bin < _cntr) & (_std_bin > _bin_lb)] + _std_bin_inte1 = _std_bin[_std_bin <= _bin_lb] + _std_bin_inte2 = _std_bin[_std_bin >= _cntr] - if _shift_mode == 'mobility': - _aps_bin /= _shift + if _shift_mode == 'mobility': + _aps_bin /= _shift - elif _shift_mode == 'aerodynamic': - _smps_bin *= _shift + elif _shift_mode == 'aerodynamic': + _smps_bin *= _shift - ## merge - _merge_lst, _corr_lst = [], [] - for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): - ## keep complete smps bins and data - ## remove the aps bin data lower than smps bin - _condi = _bin_aps >= _bin_smps[-1] + ## merge + _merge_lst, _corr_lst = [], [] + for _bin_smps, _bin_aps, _dt_smps, _dt_aps, _sh in zip(_smps_bin, _aps_bin, _smps.values, _aps.values, _shift): + ## keep complete smps bins and data + ## remove the aps bin data lower than smps bin + _condi = _bin_aps >= _bin_smps[-1] - _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) - _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) + _merge_bin = np.hstack((_bin_smps, _bin_aps[_condi])) + _merge_dt = np.hstack((_dt_smps, _dt_aps[_condi])) - _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) + _merge_fit_loc = (_merge_bin < 1500) & (_merge_bin > _smps_lb) - ## coeA and coeB - _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) - _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') + ## coeA and coeB + _unvpl_fc = unvpline(np.log(_merge_bin[_merge_fit_loc]), np.log(_merge_dt[_merge_fit_loc]), s=50) + _inte_fc = interp1d(_merge_bin, _merge_dt, kind='linear', fill_value='extrapolate') - _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), np.exp(_unvpl_fc(np.log(_std_bin_merge))), - _inte_fc(_std_bin_inte2))) + _merge_dt_fit = np.hstack((_inte_fc(_std_bin_inte1), np.exp(_unvpl_fc(np.log(_std_bin_merge))), + _inte_fc(_std_bin_inte2))) - _merge_lst.append(_merge_dt_fit) - _corr_lst.append(interp1d(_std_bin, _merge_dt_fit)(_bin_aps[_corr_aps_cond])) + _merge_lst.append(_merge_dt_fit) + _corr_lst.append(interp1d(_std_bin, _merge_dt_fit)(_bin_aps[_corr_aps_cond])) - _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) - _df_merge = _df_merge.mask(_df_merge < 0) + _df_merge = DataFrame(_merge_lst, columns=_std_bin, index=_merge_idx) + _df_merge = _df_merge.mask(_df_merge < 0) - _df_corr = DataFrame(_corr_lst, columns=_corr_aps_ky, index=_merge_idx) / _aps_ori.loc[_merge_idx, _corr_aps_ky] + _df_corr = DataFrame(_corr_lst, columns=_corr_aps_ky, index=_merge_idx) / _aps_ori.loc[_merge_idx, _corr_aps_ky] - ## process output df - ## average, align with index - def _out_df(*_df_arg, **_df_kwarg): - _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) - _df.index.name = 'time' - return _df + ## process output df + ## average, align with index + def _out_df(*_df_arg, **_df_kwarg): + _df = DataFrame(*_df_arg, **_df_kwarg).reindex(_ori_idx) + _df.index.name = 'time' + return _df - return _out_df(_df_merge), _out_df(_shift_ori ** 2), _out_df(_df_corr) + return _out_df(_df_merge), _out_df(_shift_ori ** 2), _out_df(_df_corr) def _fitness_func(psd, rho, pm25): - psd_pm25 = psd[psd.keys()[psd.keys().values <= 2500]] * np.diff(np.log10(psd.keys())).mean() - rho_pm25 = pm25 / (psd_pm25 * np.pi * psd_pm25.keys().values ** 3 / 6 * 1e-9).sum(axis=1, min_count=1) + psd_pm25 = psd[psd.keys()[psd.keys().values <= 2500]] * np.diff(np.log10(psd.keys())).mean() + rho_pm25 = pm25 / (psd_pm25 * np.pi * psd_pm25.keys().values ** 3 / 6 * 1e-9).sum(axis=1, min_count=1) - return (rho['density'] - rho_pm25) ** 2 + return (rho['density'] - rho_pm25) ** 2 def merge_SMPS_APS(df_smps, df_aps, df_pm25, aps_unit='um', smps_overlap_lowbound=500, aps_fit_highbound=1000, - dndsdv_alg=True, times_range=(0.8, 1.25, .05)): - # merge_data, merge_data_dn, merge_data_dsdv, merge_data_cor_dn, density, density_dn, density_dsdv, density_cor_dn = [DataFrame([np.nan])] * 8 + dndsdv_alg=True, times_range=(0.8, 1.25, .05)): + # merge_data, merge_data_dn, merge_data_dsdv, merge_data_cor_dn, density, density_dn, density_dsdv, density_cor_dn = [DataFrame([np.nan])] * 8 - ## set to the same units - smps, aps = df_smps.copy(), df_aps.copy() - smps.columns = smps.keys().to_numpy(float) - aps.columns = aps.keys().to_numpy(float) + ## set to the same units + smps, aps = df_smps.copy(), df_aps.copy() + smps.columns = smps.keys().to_numpy(float) + aps.columns = aps.keys().to_numpy(float) - if aps_unit == 'um': - aps.columns = aps.keys() * 1e3 + if aps_unit == 'um': + aps.columns = aps.keys() * 1e3 - fitness_typ = dict(dn=[], cor_dn=[], dndsdv=[], cor_dndsdv=[]) - shift_typ = dict(dn=[], cor_dn=[], dndsdv=[], cor_dndsdv=[]) - oth_typ = dict() + fitness_typ = dict(dn=[], cor_dn=[], dndsdv=[], cor_dndsdv=[]) + shift_typ = dict(dn=[], cor_dn=[], dndsdv=[], cor_dndsdv=[]) + oth_typ = dict() - times_ary = np.arange(*times_range).round(4) - # times_ary = np.arange(*(0.8, 0.9, .05)).round(4) + times_ary = np.arange(*times_range).round(4) + # times_ary = np.arange(*(0.8, 0.9, .05)).round(4) - for times in times_ary: + for times in times_ary: - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mSMPS times value : {times}\033[0m") + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mSMPS times value : {times}\033[0m") - aps_input = aps.copy() - aps_over = aps_input.loc[:, (aps.keys() > 700) & (aps.keys() < 1000)].copy() + aps_input = aps.copy() + aps_over = aps_input.loc[:, (aps.keys() > 700) & (aps.keys() < 1000)].copy() - smps_input = (smps * times).copy() - smps_over = smps_input[smps.keys()[smps.keys() > 500]].copy() + smps_input = (smps * times).copy() + smps_over = smps_input[smps.keys()[smps.keys() > 500]].copy() - for _count in range(2): + for _count in range(2): - ## shift data calculate - ## original - if _count == 0: - alg_type = 'dn' - shift = _powerlaw_fit_dN(smps_over, aps_over, alg_type) + ## shift data calculate + ## original + if _count == 0: + alg_type = 'dn' + shift = _powerlaw_fit_dN(smps_over, aps_over, alg_type) - if dndsdv_alg: - shift_dsdv = _corr_with_dNdSdV(smps_over, aps_over, 'dndsdv').mask(shift.isna()) + if dndsdv_alg: + shift_dsdv = _corr_with_dNdSdV(smps_over, aps_over, 'dndsdv').mask(shift.isna()) - ## aps correct - else: - alg_type = 'cor_dndsdv' - shift_cor = _powerlaw_fit_dN(smps_over, aps_over, 'cor_dn') + ## aps correct + else: + alg_type = 'cor_dndsdv' + shift_cor = _powerlaw_fit_dN(smps_over, aps_over, 'cor_dn') - if dndsdv_alg: - shift = _corr_with_dNdSdV(smps_over, aps_over, alg_type).mask(shift_cor.isna()) + if dndsdv_alg: + shift = _corr_with_dNdSdV(smps_over, aps_over, alg_type).mask(shift_cor.isna()) - ## merge aps and smps - ## 1. power law fit (dn) -> return dn data and aps correct factor - ## 2. correaltion with dn, ds, dv -> return corrected dn_ds_dv data - if (alg_type == 'dn') | dndsdv_alg: - merge_arg = (smps_input, aps_input, shift, smps_overlap_lowbound, aps_fit_highbound) + ## merge aps and smps + ## 1. power law fit (dn) -> return dn data and aps correct factor + ## 2. correaltion with dn, ds, dv -> return corrected dn_ds_dv data + if (alg_type == 'dn') | dndsdv_alg: + merge_arg = (smps_input, aps_input, shift, smps_overlap_lowbound, aps_fit_highbound) - merge_data, density, _corr = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) - density.columns = ['density'] + merge_data, density, _corr = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) + density.columns = ['density'] - fitness_typ[alg_type].append(_fitness_func(merge_data, density, df_pm25)) - shift_typ[alg_type].append(shift[0]) + fitness_typ[alg_type].append(_fitness_func(merge_data, density, df_pm25)) + shift_typ[alg_type].append(shift[0]) - ## without aps correct - if _count == 0: - ## merge aps and smps - ## dn_ds_dv data - if dndsdv_alg: - alg_type = 'dndsdv' - merge_arg = (smps_input, aps_input, shift_dsdv, smps_overlap_lowbound, aps_fit_highbound) + ## without aps correct + if _count == 0: + ## merge aps and smps + ## dn_ds_dv data + if dndsdv_alg: + alg_type = 'dndsdv' + merge_arg = (smps_input, aps_input, shift_dsdv, smps_overlap_lowbound, aps_fit_highbound) - merge_data_dsdv, density_dsdv, _ = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) - density_dsdv.columns = ['density'] + merge_data_dsdv, density_dsdv, _ = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) + density_dsdv.columns = ['density'] - fitness_typ[alg_type].append(_fitness_func(merge_data_dsdv, density_dsdv, df_pm25)) - shift_typ[alg_type].append(shift_dsdv[0]) + fitness_typ[alg_type].append(_fitness_func(merge_data_dsdv, density_dsdv, df_pm25)) + shift_typ[alg_type].append(shift_dsdv[0]) - ## dn data - merge_data_dn, density_dn = merge_data.copy(), density.copy() + ## dn data + merge_data_dn, density_dn = merge_data.copy(), density.copy() - ## correct aps data - corr = _corr.resample('1d').mean().reindex(smps.index).ffill() - corr = corr.mask(corr < 1, 1) + ## correct aps data + corr = _corr.resample('1d').mean().reindex(smps.index).ffill() + corr = corr.mask(corr < 1, 1) - aps_input.loc[:, corr.keys()] *= corr - aps_over = aps_input.copy() + aps_input.loc[:, corr.keys()] *= corr + aps_over = aps_input.copy() - ## with aps correct - else: - ## merge aps and smps - ## dn data - alg_type = 'cor_dn' - merge_arg = (smps_input, aps_input, shift_cor, smps_overlap_lowbound, aps_fit_highbound) + ## with aps correct + else: + ## merge aps and smps + ## dn data + alg_type = 'cor_dn' + merge_arg = (smps_input, aps_input, shift_cor, smps_overlap_lowbound, aps_fit_highbound) - merge_data_cor_dn, density_cor_dn, _ = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) - density_cor_dn.columns = ['density'] + merge_data_cor_dn, density_cor_dn, _ = _merge_data(*merge_arg, 'mobility', _alg_type=alg_type) + density_cor_dn.columns = ['density'] - fitness_typ[alg_type].append(_fitness_func(merge_data_cor_dn, density_cor_dn, df_pm25)) - shift_typ[alg_type].append(shift_cor[0]) + fitness_typ[alg_type].append(_fitness_func(merge_data_cor_dn, density_cor_dn, df_pm25)) + shift_typ[alg_type].append(shift_cor[0]) - ## get times value and shift value - out_dic = {} - for (_typ, _lst), (_typ, _shft) in zip(fitness_typ.items(), shift_typ.items()): - oth_typ[_typ] = None - if len(_lst) == 0: continue + ## get times value and shift value + out_dic = {} + for (_typ, _lst), (_typ, _shft) in zip(fitness_typ.items(), shift_typ.items()): + oth_typ[_typ] = None + if len(_lst) == 0: continue - df_times_min = concat(_lst, axis=1, keys=range(len(_lst))).idxmin(axis=1).dropna().astype(int) - df_shift = concat(_shft, axis=1, keys=times_ary.tolist()).loc[df_times_min.index].values[ - range(len(df_times_min.index)), df_times_min.values] + df_times_min = concat(_lst, axis=1, keys=range(len(_lst))).idxmin(axis=1).dropna().astype(int) + df_shift = concat(_shft, axis=1, keys=times_ary.tolist()).loc[df_times_min.index].values[ + range(len(df_times_min.index)), df_times_min.values] - oth_typ[_typ] = DataFrame(np.array([df_shift, times_ary[df_times_min.values]]).T, - index=df_times_min.index, columns=['shift', 'times']).reindex(smps.index) + oth_typ[_typ] = DataFrame(np.array([df_shift, times_ary[df_times_min.values]]).T, + index=df_times_min.index, columns=['shift', 'times']).reindex(smps.index) - ## re-calculate merge_data - alg_type = ['dn', 'cor_dn', 'dndsdv', 'cor_dndsdv'] if dndsdv_alg else ['dn', 'cor_dn'] + ## re-calculate merge_data + alg_type = ['dn', 'cor_dn', 'dndsdv', 'cor_dndsdv'] if dndsdv_alg else ['dn', 'cor_dn'] - out_dic = {} - den_lst, times_lst = [], [] - for _typ in alg_type: - print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mre-caculate merge data with times: {_typ}\033[0m") - typ = oth_typ[_typ] - smps_input = smps.copy() * typ['times'].to_frame().values + out_dic = {} + den_lst, times_lst = [], [] + for _typ in alg_type: + print(f"\t\t{dtm.now().strftime('%m/%d %X')} : \033[92mre-caculate merge data with times: {_typ}\033[0m") + typ = oth_typ[_typ] + smps_input = smps.copy() * typ['times'].to_frame().values - corr_typ = corr if 'cor' in _typ else 1 - aps_input = aps.copy() - aps_input.loc[:, corr.keys()] *= corr_typ + corr_typ = corr if 'cor' in _typ else 1 + aps_input = aps.copy() + aps_input.loc[:, corr.keys()] *= corr_typ - merge_arg = (smps_input, aps_input, typ['shift'].to_frame(), smps_overlap_lowbound, aps_fit_highbound) + merge_arg = (smps_input, aps_input, typ['shift'].to_frame(), smps_overlap_lowbound, aps_fit_highbound) - merge_data, density, _corr = _merge_data(*merge_arg, 'mobility', _alg_type=_typ) - density.columns = ['density'] + merge_data, density, _corr = _merge_data(*merge_arg, 'mobility', _alg_type=_typ) + density.columns = ['density'] - out_dic[f'data_{_typ}'] = merge_data + out_dic[f'data_{_typ}'] = merge_data - den_lst.append(density) - times_lst.append(typ['times']) + den_lst.append(density) + times_lst.append(typ['times']) - out_rho = concat(den_lst, axis=1) - out_times = concat(times_lst, axis=1) - out_rho.columns = alg_type - out_times.columns = alg_type + out_rho = concat(den_lst, axis=1) + out_times = concat(times_lst, axis=1) + out_rho.columns = alg_type + out_times.columns = alg_type - # breakpoint() + # breakpoint() - ## out - out_dic.update(dict(density=out_rho, times=out_times)) + ## out + out_dic.update(dict(density=out_rho, times=out_times)) - # out_dic = { - # 'data_cor_dndsdv' : merge_data, - # 'data_dn' : merge_data_dn, - # 'data_dndsdv' : merge_data_dsdv, - # 'data_cor_dn' : merge_data_cor_dn, + # out_dic = { + # 'data_cor_dndsdv' : merge_data, + # 'data_dn' : merge_data_dn, + # 'data_dndsdv' : merge_data_dsdv, + # 'data_cor_dn' : merge_data_cor_dn, - # 'density' : out_rho, + # 'density' : out_rho, - # 'data_all_aer' : merge_data_aer, + # 'data_all_aer' : merge_data_aer, - # 'density_cor_dndsdv' : density, - # 'density_dn' : density_dn, - # 'density_dndsdv' : density_dsdv, - # 'density_cor_dn' : density_cor_dn, - # } + # 'density_cor_dndsdv' : density, + # 'density_dn' : density_dn, + # 'density_dndsdv' : density_dsdv, + # 'density_cor_dn' : density_cor_dn, + # } - ## process data - for _nam, _df in out_dic.items(): - out_dic[_nam] = _df.reindex(smps.index).copy() + ## process data + for _nam, _df in out_dic.items(): + out_dic[_nam] = _df.reindex(smps.index).copy() - return out_dic + return out_dic diff --git a/AeroViz/dataProcess/SizeDistr/_size_distr.py b/AeroViz/dataProcess/SizeDistr/_size_distr.py index c718329..272f60f 100644 --- a/AeroViz/dataProcess/SizeDistr/_size_distr.py +++ b/AeroViz/dataProcess/SizeDistr/_size_distr.py @@ -2,79 +2,83 @@ def _geometric_prop(_dp, _prop): - import numpy as n + import numpy as n - _prop_t = _prop.sum(axis=1) - _prop_t = _prop_t.where(_prop_t > 0).copy() + _prop_t = _prop.sum(axis=1) + _prop_t = _prop_t.where(_prop_t > 0).copy() - _dp = n.log(_dp) - _gmd = (((_prop * _dp).sum(axis=1)) / _prop_t.copy()) + _dp = n.log(_dp) + _gmd = (((_prop * _dp).sum(axis=1)) / _prop_t.copy()) - _dp_mesh, _gmd_mesh = n.meshgrid(_dp, _gmd) - _gsd = ((((_dp_mesh - _gmd_mesh) ** 2) * _prop).sum(axis=1) / _prop_t.copy()) ** .5 + _dp_mesh, _gmd_mesh = n.meshgrid(_dp, _gmd) + _gsd = ((((_dp_mesh - _gmd_mesh) ** 2) * _prop).sum(axis=1) / _prop_t.copy()) ** .5 - return _prop_t, _gmd.apply(n.exp), _gsd.apply(n.exp) + return _prop_t, _gmd.apply(n.exp), _gsd.apply(n.exp) def _basic(df, hybrid, unit, bin_rg, input_type): - import numpy as n - from pandas import DataFrame, concat + import numpy as n + from pandas import DataFrame - ## get number conc. data and total, mode - dN = df - dN.columns = dN.keys().to_numpy(float) + ## get number conc. data and total, mode + dN = df + dN.columns = dN.keys().to_numpy(float) - dN_ky = dN.keys()[(dN.keys() >= bin_rg[0]) & (dN.keys() <= bin_rg[-1])] - dN = dN[dN_ky].copy() + dN_ky = dN.keys()[(dN.keys() >= bin_rg[0]) & (dN.keys() <= bin_rg[-1])] + dN = dN[dN_ky].copy() - out_dic = {} - ## diameter - dp = dN.keys().to_numpy() - if hybrid: - dlog_dp = n.diff(n.log10(dp)).mean() - else: - dlog_dp = n.ones(dp.size) - dlog_dp[:hybrid] = n.diff(n.log10(dp[:hybrid])).mean() - dlog_dp[hybrid:] = n.diff(n.log10(dp[hybrid:])).mean() + out_dic = {} + ## diameter + dp = dN.keys().to_numpy() + if hybrid: + dlog_dp = n.diff(n.log10(dp)).mean() + else: + dlog_dp = n.ones(dp.size) + dlog_dp[:hybrid] = n.diff(n.log10(dp[:hybrid])).mean() + dlog_dp[hybrid:] = n.diff(n.log10(dp[hybrid:])).mean() - ## calculate normalize and non-normalize data - if input_type == 'norm': - out_dic['number'] = (dN * dlog_dp).copy() - out_dic['number_norm'] = dN.copy() - else: - out_dic['number'] = dN.copy() - out_dic['number_norm'] = (dN / dlog_dp).copy() + ## calculate normalize and non-normalize data + if input_type == 'norm': + out_dic['number'] = (dN * dlog_dp).copy() + out_dic['number_norm'] = dN.copy() + else: + out_dic['number'] = dN.copy() + out_dic['number_norm'] = (dN / dlog_dp).copy() - out_dic['surface'] = out_dic['number'] * n.pi * dp ** 2 - out_dic['volume'] = out_dic['number'] * n.pi * (dp ** 3) / 6 + out_dic['surface'] = out_dic['number'] * n.pi * dp ** 2 + out_dic['volume'] = out_dic['number'] * n.pi * (dp ** 3) / 6 - out_dic['surface_norm'] = out_dic['number_norm'] * n.pi * dp ** 2 - out_dic['volume_norm'] = out_dic['number_norm'] * n.pi * (dp ** 3) / 6 + out_dic['surface_norm'] = out_dic['number_norm'] * n.pi * dp ** 2 + out_dic['volume_norm'] = out_dic['number_norm'] * n.pi * (dp ** 3) / 6 - ## size range mode process - df_oth = DataFrame(index=dN.index) + ## size range mode process + df_oth = DataFrame(index=dN.index) - bound = n.array([(dp.min(), dp.max() + 1), (10, 25), (25, 100), (100, 1e3), (1e3, 2.5e3), ]) - if unit == 'um': - bound[1:] /= 1e3 + bound = n.array([(dp.min(), dp.max() + 1), (10, 25), (25, 100), (100, 1e3), (1e3, 2.5e3), ]) + if unit == 'um': + bound[1:] /= 1e3 - for _tp_nam, _tp_dt in zip(['num', 'surf', 'vol'], [out_dic['number'], out_dic['surface'], out_dic['volume']]): + for _tp_nam, _tp_dt in zip(['num', 'surf', 'vol'], [out_dic['number'], out_dic['surface'], out_dic['volume']]): - for _md_nam, _range in zip(['all', 'Nucleation', 'Aitken', 'Accumulation', 'Coarse'], bound): + for _md_nam, _range in zip(['all', 'Nucleation', 'Aitken', 'Accumulation', 'Coarse'], bound): - _dia = dp[(dp >= _range[0]) & (dp < _range[-1])] - if ~_dia.any(): continue + _dia = dp[(dp >= _range[0]) & (dp < _range[-1])] + if ~_dia.any(): continue - _dt = _tp_dt[_dia].copy() + _dt = _tp_dt[_dia].copy() - df_oth[f'total_{_tp_nam}_{_md_nam}'], df_oth[f'GMD_{_tp_nam}_{_md_nam}'], df_oth[ - f'GSD_{_tp_nam}_{_md_nam}'] = _geometric_prop(_dia, _dt) - df_oth[f'mode_{_tp_nam}_{_md_nam}'] = _dt.idxmax(axis=1) + df_oth[f'total_{_tp_nam}_{_md_nam}'], df_oth[f'GMD_{_tp_nam}_{_md_nam}'], df_oth[ + f'GSD_{_tp_nam}_{_md_nam}'] = _geometric_prop(_dia, _dt) - ## out - out_dic['other'] = df_oth + mask = _dt.notna().any(axis=1) - return out_dic + df_oth.loc[mask, f'mode_{_tp_nam}_{_md_nam}'] = _dt.loc[mask].idxmax(axis=1) + df_oth.loc[~mask, f'mode_{_tp_nam}_{_md_nam}'] = n.nan + + ## out + out_dic['other'] = df_oth + + return out_dic # old 20230113 diff --git a/AeroViz/dataProcess/SizeDistr/prop.py b/AeroViz/dataProcess/SizeDistr/prop.py new file mode 100644 index 0000000..d55a8db --- /dev/null +++ b/AeroViz/dataProcess/SizeDistr/prop.py @@ -0,0 +1,62 @@ +import numpy as np +from numpy import exp, log +from scipy.signal import find_peaks + + +def geometric(dp: np.ndarray, + dist: np.ndarray + ) -> tuple[float, float]: + """ Calculate the geometric mean and standard deviation. """ + + _gmd = (((dist * log(dp)).sum()) / dist.sum()) + + logdp_mesh, gmd_mesh = np.meshgrid(log(dp), _gmd) + _gsd = ((((logdp_mesh - gmd_mesh) ** 2) * dist).sum() / dist.sum()) ** .5 + + return exp(_gmd), exp(_gsd) + + +def contribution(dp: np.ndarray, + dist: np.ndarray + ) -> tuple[float, float, float]: + """ Calculate the relative contribution of each mode. """ + + ultra = dist[(dp >= 11.8) & (dp < 100)].sum() / dist.sum() + accum = dist[(dp >= 100) & (dp < 1000)].sum() / dist.sum() + coars = dist[(dp >= 1000) & (dp < 2500)].sum() / dist.sum() + + return ultra, accum, coars + + +def mode(dp: np.ndarray, + dist: np.ndarray + ) -> np.ndarray: + """ Find three peak mode in distribution. """ + + min_value = np.array([dist.min()]) + mode, _ = find_peaks(np.concatenate([min_value, dist, min_value]), distance=len(dist) - 1) + + return dp[mode - 1] + + +def properties(dist, + dp: np.ndarray, + dlogdp: np.ndarray, + weighting: str + ) -> dict: + """ for apply """ + dist = np.array(dist) + + gmd, gsd = geometric(dp, dist) + ultra, accum, coarse = contribution(dp, dist) + peak = mode(dp, dist) + + return {key: round(value, 3) for key, value in + {f'total_{weighting}': (dist * dlogdp).sum(), + f'GMD_{weighting}': gmd, + f'GSD_{weighting}': gsd, + f'mode_{weighting}': peak[0], + f'ultra_{weighting}': ultra, + f'accum_{weighting}': accum, + f'coarse_{weighting}': coarse} + .items()} diff --git a/AeroViz/dataProcess/VOC/__init__.py b/AeroViz/dataProcess/VOC/__init__.py index 2362eac..972bb2b 100644 --- a/AeroViz/dataProcess/VOC/__init__.py +++ b/AeroViz/dataProcess/VOC/__init__.py @@ -1,19 +1,14 @@ -from ..core import _writter, _run_process +from ..core import Writer, run_process -__all__ = [ +__all__ = ['VOC'] - 'VOC', -] +class VOC(Writer): + @run_process('VOC - basic', 'voc_basic') + def VOC_basic(self, _df_voc): + from ._potential_par import _basic -class VOC(_writter): + out = _basic(_df_voc) - ## Reconstruction - @_run_process('VOC - basic', 'voc_basic') - def VOC_basic(self, _df_voc): - from ._potential_par import _basic - - out = _basic(_df_voc) - - return self, out + return self, out diff --git a/AeroViz/dataProcess/VOC/_potential_par.py b/AeroViz/dataProcess/VOC/_potential_par.py index 4cd32d5..71e945d 100644 --- a/AeroViz/dataProcess/VOC/_potential_par.py +++ b/AeroViz/dataProcess/VOC/_potential_par.py @@ -1,76 +1,108 @@ -from datetime import datetime as dtm -from pandas import DataFrame, to_datetime, read_json from pathlib import Path -import pickle as pkl -import numpy as np +from pandas import DataFrame, read_json, concat def _basic(_df_voc): - ## parameter - _keys = _df_voc.keys() + with (Path(__file__).parent / 'support_voc.json').open('r', encoding='utf-8', errors='ignore') as f: + _par = read_json(f) - with (Path(__file__).parent / 'voc_par.pkl').open('rb') as f: - _par = pkl.load(f) - _MW, _MIR, _SOAP, _KOH = _par.loc['MW', _keys], _par.loc['MIR', _keys], _par.loc['SOAP', _keys], _par.loc[ - 'KOH', _keys] + # parameter + _keys = _df_voc.keys() - with (Path(__file__).parent / 'voc_par.json').open('r', encoding='utf-8', errors='ignore') as f: - _parr = read_json(f) - _MW, _MIR, _SOAP, _KOH = _par.loc['MW', _keys], _par.loc['MIR', _keys], _par.loc['SOAP', _keys], _par.loc[ - 'KOH', _keys] + invalid_keys = [key for key in _df_voc.keys() if key not in set(_par.keys())] - _voc_clasfy = { - 'alkane_total': ['Isopentane', 'n-Butane', '2-Methylhexane', 'Cyclopentane', '3-Methylpentane', - '2,3-Dimethylbutane', - '2-Methylheptane', 'n-Nonane', 'Methylcyclohexane', '2,4-Dimethylpentane', '2-Methylpentane', - 'n-Decane', - 'n-Heptane', 'Cyclohexane', 'n-Octane', 'Isobutane', '2,2-Dimethylbutane', - 'Methylcyclopentane', 'n-Hexane', - '2,3,4-Trimethylpentane', '3-Methylhexane', 'n-Undecane', '3-Methylheptane', 'Hexane', - '2,2,4-Trimethylpentane', 'n-Pentane', 'Ethane', 'Propane'], + if invalid_keys: + raise KeyError(f'\n\t\t{invalid_keys} are not supported keys.' + f'\n\t\tPlease check the\033[91m support_voc.md\033[0m file to use the correct name.') - 'alkane_total': ['Isoprene', '1-Butene', 'cis-2-Butene', 'Propene', '1.3-Butadiene', - 't-2-Butene', 'cis-2-Pentene', 'Propylene', 'isoprene', '1-Pentene', - 'Ethylene', 't-2-Pentene', '1-Octene'], + _MW, _MIR, _SOAP, _KOH = _par.loc['MW', :], _par.loc['MIR', :], _par.loc['SOAP', :], _par.loc['KOH', :] - 'aromatic_total': ['o-Ethyltoluene', '1,3,5-Trimethylbenzene', 'Ethylbenzene', 'm,p-Xylene', 'n-Propylbenzene', - 'Benzene', 'Toluene', '1.2.4-TMB', 'Styrene', 'p-Ethyltoluene', 'o-Xylene', - 'm-Diethylbenzene', - '1.2.3-TMB', 'Isopropylbenzene', 'm-Ethyltoluene', '2-Ethyltoluene', '1.3.5-TMB', - 'Iso-Propylbenzene', - '3.4-Ethyltoluene', 'p-Diethylbenzene', '1,2,4-Trimethylbenzene', 'm.p-Xylene', - '1,2,3-Trimethylbenzene'], + _voc_classify = { + 'alkane_total': ['Ethane', 'Propane', 'Isobutane', 'n-Butane', 'Isopentane', 'n-Pentane', 'n-Hexane', + 'n-Heptane', 'n-Octane', 'n-Nonane', 'n-Decane', 'n-Undecane', 'n-Dodecane', - 'alkyne_total': ['Acetylene'], + 'Cyclopentane', 'Methylcyclopentane', 'Cyclohexane', 'Methylcyclohexane', - 'OVOC': ['Acetaldehyde', 'Ethanol', 'Acetone', 'IPA', 'Ethyl Acetate', 'Butyl Acetate'], + '2,2-Dimethylbutane', '2,3-Dimethylbutane', '2-Methylpentane', '3-Methylpentane', + '2,4-Dimethylpentane', '2-Methylhexane', '3-Methylhexane', + '2,2,4-Trimethylpentane', '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylheptane'], - 'ClVOC': ['VCM', 'TCE', 'PCE', '1.4-DCB', '1.2-DCB'], - } + 'alkene_total': ['Ethylene', 'Propylene', '1-Butene', 't-2-Butene', 'cis-2-Butene', '1-Pentene', 't-2-Pentene', + 'cis-2-Pentene', '1-Hexene', 'Isoprene', '1.3-Butadiene', '1-Octene'], - _df_MW = (_df_voc * _MW).copy() - _df_dic = { - 'Conc': _df_voc.copy(), - 'OFP': _df_MW / 48 * _MIR, - 'SOAP': _df_MW / 24.5 * _SOAP / 100 * 0.054, - 'LOH': _df_MW / 24.5 / _MW * 0.602 * _KOH, - } + 'aromatic_total': ['Benzene', 'Toluene', 'Ethylbenzene', 'm/p-Xylene', 'o-Xylene', 'Styrene', + 'Isopropylbenzene', + 'n-Propylbenzene', 'm-Ethyltoluene', 'p-Ethyltoluene', 'o-Ethyltoluene', 'm-Diethylbenzene', + 'p-Diethylbenzene', '1,2,4-Trimethylbenzene', '1,2,3-Trimethylbenzene', + '1,3,5-Trimethylbenzene', ], - ## calculate - _out = {} - for _nam, _df in _df_dic.items(): + 'alkyne_total': ['Acetylene'], - _df_out = DataFrame(index=_df_voc.index) + 'OVOC': ['Acetaldehyde', 'Ethanol', 'Acetone', 'IPA', 'Ethyl Acetate', 'Butyl Acetate'], - for _voc_nam, _voc_lst in _voc_clasfy.items(): - _lst = list(set(_keys) & set(_voc_lst)) - if len(_lst) == 0: continue + 'ClVOC': ['VCM', 'TCE', 'PCE', '1.4-DCB', '1.2-DCB'], + } - _df_out[_voc_nam] = _df[_lst].sum(axis=1, min_count=1) + _df_MW = (_df_voc * _MW).copy() + _df_dic = { + 'Conc': _df_voc.copy(), + 'OFP': _df_MW / 48 * _MIR, + 'SOAP': _df_MW / 24.5 * _SOAP / 100 * 0.054, + 'LOH': _df_MW / 24.5 / _MW * 0.602 * _KOH, + } - _df_out['Total'] = _df.sum(axis=1, min_count=1) + # calculate + _out = {} + for _nam, _df in _df_dic.items(): - _out[_nam] = _df_out + _df_out = DataFrame(index=_df_voc.index) - return _out + for _voc_nam, _voc_lst in _voc_classify.items(): + _lst = list(set(_keys) & set(_voc_lst)) + if len(_lst) == 0: + continue + + _df_out = concat([_df[_lst], _df_out], axis=1) + + _df_out[_voc_nam] = _df[_lst].sum(axis=1, min_count=1) + + _df_out['Total'] = _df.sum(axis=1, min_count=1) + + _out[_nam] = _df_out + + return _out + + +def markdown_table_to_dataframe(): + import pandas as pd + from pathlib import Path + + # support_voc.md + with open(Path(__file__).parent / 'support_voc.md', 'r', encoding='utf-8') as file: + markdown_content = file.read() + + # 將內容分割成行 + lines = markdown_content.strip().split('\n') + + # 提取表頭 + headers = [col.strip() for col in lines[0].split('|')[1:-1]] + + # 解析數據行 + data = [] + for line in lines[2:]: # 跳過表頭和分隔行 + columns = [col.strip() for col in line.split('|')[1:-1]] + data.append(columns) + + # 創建 DataFrame + df = pd.DataFrame(data, columns=headers) + + # 轉換數據類型 + numeric_columns = ['MIR', 'MW', 'SOAP', 'KOH'] + for col in numeric_columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + df = df.set_index('Species').T + + df = df.iloc[:, :-7] + + df.to_json(Path(__file__).parent / 'support_voc.json', indent=4) diff --git a/AeroViz/dataProcess/VOC/voc_par.json b/AeroViz/dataProcess/VOC/support_voc.json similarity index 92% rename from AeroViz/dataProcess/VOC/voc_par.json rename to AeroViz/dataProcess/VOC/support_voc.json index 3a80937..f60ba5f 100644 --- a/AeroViz/dataProcess/VOC/voc_par.json +++ b/AeroViz/dataProcess/VOC/support_voc.json @@ -1,45 +1,9 @@ { - "3-Methylheptane": { - "MIR": 1.24, - "MW": 114.23, - "SOAP": 0.0, - "KOH": 5.6 - }, - "Isopentane": { - "MIR": 1.45, - "MW": 72.15, - "SOAP": 0.2, - "KOH": 3.6 - }, - "n-Propylbenzene": { - "MIR": 2.03, - "MW": 120.2, - "SOAP": 109.7, - "KOH": 5.8 - }, - "3-Methylhexane": { - "MIR": 1.61, - "MW": 100.2, - "SOAP": 0.0, - "KOH": 7.0 - }, - "2-Methylheptane": { - "MIR": 1.07, - "MW": 114.23, - "SOAP": 0.0, - "KOH": 7.0 - }, - "2-Methylhexane": { - "MIR": 1.19, - "MW": 100.2, - "SOAP": 0.0, - "KOH": 5.65 - }, - "Styrene": { - "MIR": 1.73, - "MW": 104.15, - "SOAP": 212.3, - "KOH": 58.0 + "Benzene": { + "MIR": 0.72, + "MW": 78.11, + "SOAP": 92.9, + "KOH": 1.22 }, "Toluene": { "MIR": 4.0, @@ -53,11 +17,11 @@ "SOAP": 111.6, "KOH": 7.0 }, - "Benzene": { - "MIR": 0.72, - "MW": 78.11, - "SOAP": 92.9, - "KOH": 1.22 + "m/p-Xylene": { + "MIR": 7.8, + "MW": 106.2, + "SOAP": 75.8, + "KOH": 18.95 }, "o-Xylene": { "MIR": 7.64, @@ -65,148 +29,154 @@ "SOAP": 95.5, "KOH": 13.6 }, - "Hexane": { - "MIR": 1.24, - "MW": 86.2, + "Ethane": { + "MIR": 0.28, + "MW": 30.07, "SOAP": 0.1, - "KOH": 5.2 - }, - "Acetone": { - "MIR": 0.36, - "MW": 58.1, - "SOAP": 0.3, - "KOH": 0.17 - }, - "3,4-Ethyltoluene": { - "MIR": 5.92, - "MW": 120.2, - "SOAP": 85.2, - "KOH": 15.2 - }, - "Iso-Propylbenzene": { - "MIR": 2.52, - "MW": 120.2, - "SOAP": 95.5, - "KOH": 6.3 - }, - "1.2.4-TMB": { - "MIR": 8.87, - "MW": 120.2, - "SOAP": 20.6, - "KOH": 32.5 + "KOH": 0.248 }, - "Acetaldehyde": { - "MIR": 6.54, + "Propane": { + "MIR": 0.49, "MW": 44.1, - "SOAP": 0.6, - "KOH": 15.0 - }, - "VCM": { - "MIR": 2.83, - "MW": 62.5, - "SOAP": null, - "KOH": null + "SOAP": 0.0, + "KOH": 1.09 }, - "1.3.5-TMB": { - "MIR": 11.76, - "MW": 120.2, - "SOAP": 13.5, - "KOH": 56.7 + "Isobutane": { + "MIR": 1.23, + "MW": 58.12, + "SOAP": 0.0, + "KOH": 2.12 }, - "Ethyl Acetate": { - "MIR": 0.63, - "MW": 88.1, - "SOAP": 0.1, - "KOH": null + "n-Butane": { + "MIR": 1.15, + "MW": 58.12, + "SOAP": 0.3, + "KOH": 2.36 }, - "Ethanol": { - "MIR": 1.53, - "MW": 46.1, - "SOAP": 0.6, - "KOH": 3.2 + "Isopentane": { + "MIR": 1.45, + "MW": 72.15, + "SOAP": 0.2, + "KOH": 3.6 }, - "Butyl Acetate": { - "MIR": 0.83, - "MW": 116.2, - "SOAP": 0.0, - "KOH": null + "n-Pentane": { + "MIR": 1.31, + "MW": 72.15, + "SOAP": 0.3, + "KOH": 3.8 }, - "m.p-Xylene": { - "MIR": 7.8, - "MW": 106.2, - "SOAP": 75.8, - "KOH": 18.95 + "n-Hexane": { + "MIR": 1.24, + "MW": 86.18, + "SOAP": 0.1, + "KOH": 5.2 }, - "TCE": { - "MIR": 0.64, - "MW": 131.4, - "SOAP": null, - "KOH": 1.9 + "n-Heptane": { + "MIR": 1.07, + "MW": 100.21, + "SOAP": 0.1, + "KOH": 6.76 }, - "1-Octene": { - "MIR": 3.25, - "MW": 112.2, - "SOAP": null, - "KOH": 30.0 + "n-Octane": { + "MIR": 0.9, + "MW": 114.23, + "SOAP": 0.8, + "KOH": 8.11 }, - "Isoprene": { - "MIR": 10.61, - "MW": 68.1, + "n-Nonane": { + "MIR": 0.78, + "MW": 128.2, "SOAP": 1.9, - "KOH": 100.0 + "KOH": 9.7 }, - "1.2.3-TMB": { - "MIR": 11.97, - "MW": 120.2, - "SOAP": 43.9, - "KOH": 32.7 + "n-Decane": { + "MIR": 0.68, + "MW": 142.29, + "SOAP": 7.0, + "KOH": 11.0 }, - "2-Ethyltoluene": { - "MIR": 5.59, - "MW": 120.2, - "SOAP": 94.8, - "KOH": 11.9 + "n-Undecane": { + "MIR": 0.61, + "MW": 156.31, + "SOAP": 16.2, + "KOH": 12.3 }, - "1.2-DCB": { - "MIR": 0.18, - "MW": 147.0, + "n-Dodecane": { + "MIR": 0.55, + "MW": 170.34, "SOAP": null, "KOH": null }, - "Propene": { + "Ethylene": { + "MIR": 9.0, + "MW": 28.05, + "SOAP": 1.3, + "KOH": 8.52 + }, + "Propylene": { "MIR": 11.66, - "MW": 42.1, + "MW": 42.08, "SOAP": 1.6, "KOH": 26.3 }, - "1.4-DCB": { - "MIR": 0.18, - "MW": 147.0, - "SOAP": null, - "KOH": null + "1-Butene": { + "MIR": 9.73, + "MW": 56.1, + "SOAP": 1.2, + "KOH": 31.4 }, - "PCE": { - "MIR": 0.03, - "MW": 165.8, - "SOAP": null, - "KOH": 0.16 + "t-2-Butene": { + "MIR": 15.16, + "MW": 56.1, + "SOAP": 3.1, + "KOH": 56.4 }, - "1.3-Butadiene": { - "MIR": 12.61, - "MW": 54.1, - "SOAP": 1.8, - "KOH": 66.6 + "cis-2-Butene": { + "MIR": 14.24, + "MW": 56.1, + "SOAP": 3.6, + "KOH": 64.0 }, - "IPA": { - "MIR": 0.61, - "MW": 60.1, - "SOAP": 0.4, - "KOH": 5.1 + "1-Pentene": { + "MIR": 7.21, + "MW": 70.13, + "SOAP": 0.0, + "KOH": 31.4 }, - "n-Hexane": { - "MIR": 1.24, - "MW": 86.18, + "t-2-Pentene": { + "MIR": 10.56, + "MW": 70.13, + "SOAP": 4.0, + "KOH": 67.0 + }, + "cis-2-Pentene": { + "MIR": 10.38, + "MW": 70.13, + "SOAP": 3.6, + "KOH": 65.0 + }, + "1-Hexene": { + "MIR": 5.49, + "MW": 84.16, + "SOAP": null, + "KOH": null + }, + "Acetylene": { + "MIR": 0.95, + "MW": 26.04, "SOAP": 0.1, + "KOH": 0.85 + }, + "Cyclopentane": { + "MIR": 2.39, + "MW": 70.1, + "SOAP": 0.0, + "KOH": 4.97 + }, + "Methylcyclopentane": { + "MIR": 2.19, + "MW": 84.16, + "SOAP": 0.0, "KOH": 5.2 }, "Cyclohexane": { @@ -215,113 +185,113 @@ "SOAP": 0.0, "KOH": 6.97 }, - "n-Nonane": { - "MIR": 0.78, - "MW": 128.2, - "SOAP": 1.9, - "KOH": 9.7 - }, - "Isopropylbenzene": { - "MIR": 2.52, - "MW": 120.19, - "SOAP": 95.5, - "KOH": 6.3 - }, "Methylcyclohexane": { "MIR": 1.7, "MW": 98.19, "SOAP": 0.0, "KOH": 4.97 }, - "1-Pentene": { - "MIR": 7.21, - "MW": 70.13, - "SOAP": 0.0, - "KOH": 31.4 - }, - "m-Ethyltoluene": { - "MIR": 7.39, - "MW": 120.19, - "SOAP": 100.6, - "KOH": 11.8 + "Isoprene": { + "MIR": 10.61, + "MW": 68.1, + "SOAP": 1.9, + "KOH": 100.0 }, - "t-2-Butene": { - "MIR": 15.16, - "MW": 56.1, - "SOAP": 3.1, - "KOH": 56.4 + "2,2-Dimethylbutane": { + "MIR": 1.17, + "MW": 86.17, + "SOAP": 0.0, + "KOH": 2.23 }, - "n-Butane": { - "MIR": 1.15, - "MW": 58.12, - "SOAP": 0.3, - "KOH": 2.36 + "2,3-Dimethylbutane": { + "MIR": 0.97, + "MW": 86.18, + "SOAP": 0.0, + "KOH": 5.78 }, - "2,2,4-Trimethylpentane": { - "MIR": 1.26, - "MW": 114.23, + "2-Methylpentane": { + "MIR": 1.5, + "MW": 86.18, "SOAP": 0.0, - "KOH": 3.34 + "KOH": 5.4 }, - "1,2,4-Trimethylbenzene": { - "MIR": 8.87, - "MW": 120.19, - "SOAP": 20.6, - "KOH": 32.5 + "3-Methylpentane": { + "MIR": 1.8, + "MW": 86.18, + "SOAP": 0.2, + "KOH": 5.2 }, - "n-Undecane": { - "MIR": 0.61, - "MW": 156.31, - "SOAP": 16.2, - "KOH": 12.3 + "2,3-Dimethylpentane": { + "MIR": 1.34, + "MW": 100.2, + "SOAP": 0.4, + "KOH": 1.5 }, - "Ethylene": { - "MIR": 9.0, - "MW": 28.05, - "SOAP": 1.3, - "KOH": 8.52 + "2,4-Dimethylpentane": { + "MIR": 1.55, + "MW": 100.2, + "SOAP": 0.0, + "KOH": 4.77 }, - "1,2,3-Trimethylbenzene": { - "MIR": 11.97, - "MW": 120.19, - "SOAP": 43.9, - "KOH": 32.7 + "2-Methylhexane": { + "MIR": 1.19, + "MW": 100.2, + "SOAP": 0.0, + "KOH": 5.65 }, - "2,3-Dimethylbutane": { - "MIR": 0.97, - "MW": 86.18, + "3-Methylheptane": { + "MIR": 1.24, + "MW": 114.23, + "SOAP": 0.0, + "KOH": 5.6 + }, + "2,2,4-Trimethylpentane": { + "MIR": 1.26, + "MW": 114.23, "SOAP": 0.0, - "KOH": 5.78 + "KOH": 3.34 }, - "2-Methylpentane": { - "MIR": 1.5, - "MW": 86.18, + "2,3,4-Trimethylpentane": { + "MIR": 1.03, + "MW": 114.23, "SOAP": 0.0, - "KOH": 5.4 + "KOH": 6.6 }, - "n-Decane": { - "MIR": 0.68, - "MW": 142.29, - "SOAP": 7.0, - "KOH": 11.0 + "2-Methylheptane": { + "MIR": 1.07, + "MW": 114.23, + "SOAP": 0.0, + "KOH": 7.0 }, - "2,4-Dimethylpentane": { - "MIR": 1.55, + "3-Methylhexane": { + "MIR": 1.61, "MW": 100.2, "SOAP": 0.0, - "KOH": 4.77 + "KOH": 7.0 }, - "n-Octane": { - "MIR": 0.9, - "MW": 114.23, - "SOAP": 0.8, - "KOH": 8.11 + "Styrene": { + "MIR": 1.73, + "MW": 104.15, + "SOAP": 212.3, + "KOH": 58.0 }, - "n-Heptane": { - "MIR": 1.07, - "MW": 100.21, - "SOAP": 0.1, - "KOH": 6.76 + "Isopropylbenzene": { + "MIR": 2.52, + "MW": 120.19, + "SOAP": 95.5, + "KOH": 6.3 + }, + "n-Propylbenzene": { + "MIR": 2.03, + "MW": 120.2, + "SOAP": 109.7, + "KOH": 5.8 + }, + "m-Ethyltoluene": { + "MIR": 7.39, + "MW": 120.19, + "SOAP": 100.6, + "KOH": 11.8 }, "p-Ethyltoluene": { "MIR": 4.44, @@ -329,59 +299,23 @@ "SOAP": 69.7, "KOH": 18.6 }, - "Isobutane": { - "MIR": 1.23, - "MW": 58.12, - "SOAP": 0.0, - "KOH": 2.12 - }, "o-Ethyltoluene": { "MIR": 5.59, "MW": 120.19, "SOAP": 94.8, "KOH": 11.9 }, - "Propane": { - "MIR": 0.49, - "MW": 44.1, - "SOAP": 0.0, - "KOH": 1.09 - }, "m-Diethylbenzene": { "MIR": 7.1, "MW": 134.22, "SOAP": 0.0, "KOH": 32.5 }, - "2,2-Dimethylbutane": { - "MIR": 1.17, - "MW": 86.17, + "p-Diethylbenzene": { + "MIR": 4.43, + "MW": 134.22, "SOAP": 0.0, - "KOH": 2.23 - }, - "Acetylene": { - "MIR": 0.95, - "MW": 26.04, - "SOAP": 0.1, - "KOH": 0.85 - }, - "cis-2-Pentene": { - "MIR": 10.38, - "MW": 70.13, - "SOAP": 3.6, - "KOH": 65.0 - }, - "isoprene": { - "MIR": 10.61, - "MW": 68.12, - "SOAP": 1.9, - "KOH": 37.0 - }, - "cis-2-Butene": { - "MIR": 14.24, - "MW": 56.1, - "SOAP": 3.6, - "KOH": 64.0 + "KOH": 32.7 }, "1,3,5-Trimethylbenzene": { "MIR": 11.76, @@ -389,76 +323,124 @@ "SOAP": 13.5, "KOH": 56.7 }, - "Ethane": { - "MIR": 0.28, - "MW": 30.07, - "SOAP": 0.1, - "KOH": 0.248 + "1,2,4-Trimethylbenzene": { + "MIR": 8.87, + "MW": 120.19, + "SOAP": 20.6, + "KOH": 32.5 }, - "3-Methylpentane": { - "MIR": 1.8, - "MW": 86.18, - "SOAP": 0.2, - "KOH": 5.2 + "1,2,3-Trimethylbenzene": { + "MIR": 11.97, + "MW": 120.19, + "SOAP": 43.9, + "KOH": 32.7 }, - "1-Butene": { - "MIR": 9.73, - "MW": 56.1, - "SOAP": 1.2, - "KOH": 31.4 + "1,2-DCB": { + "MIR": 0.18, + "MW": 147.0, + "SOAP": null, + "KOH": null }, - "p-Diethylbenzene": { - "MIR": 4.43, - "MW": 134.22, - "SOAP": 0.0, - "KOH": 32.7 + "1,3-Butadiene": { + "MIR": 12.61, + "MW": 54.1, + "SOAP": 1.8, + "KOH": 66.6 }, - "n-Pentane": { - "MIR": 1.31, - "MW": 72.15, + "1,4-DCB": { + "MIR": 0.18, + "MW": 147.0, + "SOAP": null, + "KOH": null + }, + "1-Octene": { + "MIR": 3.25, + "MW": 112.2, + "SOAP": null, + "KOH": 30.0 + }, + "2-Ethyltoluene": { + "MIR": 5.59, + "MW": 120.2, + "SOAP": 94.8, + "KOH": 11.9 + }, + "3,4-Ethyltoluene": { + "MIR": 5.92, + "MW": 120.2, + "SOAP": 85.2, + "KOH": 15.2 + }, + "Acetaldehyde": { + "MIR": 6.54, + "MW": 44.1, + "SOAP": 0.6, + "KOH": 15.0 + }, + "Acetone": { + "MIR": 0.36, + "MW": 58.1, "SOAP": 0.3, - "KOH": 3.8 + "KOH": 0.17 }, - "2,3,4-Trimethylpentane": { - "MIR": 1.03, - "MW": 114.23, + "Butyl Acetate": { + "MIR": 0.83, + "MW": 116.2, "SOAP": 0.0, - "KOH": 6.6 + "KOH": null }, - "2,3-Dimethylpentane": { - "MIR": 1.34, - "MW": 100.2, + "Ethanol": { + "MIR": 1.53, + "MW": 46.1, + "SOAP": 0.6, + "KOH": 3.2 + }, + "Ethyl Acetate": { + "MIR": 0.63, + "MW": 88.1, + "SOAP": 0.1, + "KOH": null + }, + "Hexane": { + "MIR": 1.24, + "MW": 86.2, + "SOAP": 0.1, + "KOH": 5.2 + }, + "IPA": { + "MIR": 0.61, + "MW": 60.1, "SOAP": 0.4, - "KOH": 1.5 + "KOH": 5.1 }, - "Cyclopentane": { - "MIR": 2.39, - "MW": 70.1, - "SOAP": 0.0, - "KOH": 4.97 + "Iso-Propylbenzene": { + "MIR": 2.52, + "MW": 120.2, + "SOAP": 95.5, + "KOH": 6.3 }, - "Propylene": { + "PCE": { + "MIR": 0.03, + "MW": 165.8, + "SOAP": null, + "KOH": 0.16 + }, + "Propene": { "MIR": 11.66, - "MW": 42.08, + "MW": 42.1, "SOAP": 1.6, "KOH": 26.3 }, - "t-2-Pentene": { - "MIR": 10.56, - "MW": 70.13, - "SOAP": 4.0, - "KOH": 67.0 - }, - "m,p-Xylene": { - "MIR": 9.75, - "MW": 106.16, - "SOAP": 75.8, - "KOH": 18.95 + "TCE": { + "MIR": 0.64, + "MW": 131.4, + "SOAP": null, + "KOH": 1.9 }, - "Methylcyclopentane": { - "MIR": 2.19, - "MW": 84.16, - "SOAP": 0.0, - "KOH": 5.2 + "VCM": { + "MIR": 2.83, + "MW": 62.5, + "SOAP": null, + "KOH": null } } \ No newline at end of file diff --git a/AeroViz/dataProcess/VOC/voc_par.pkl b/AeroViz/dataProcess/VOC/voc_par.pkl deleted file mode 100644 index 02eed938eca1166c277709c82e9280b6a0a2c4c0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4446 zcmai2TW}NS6<%L;!PnTaNpKwkA(+9^)Ha<-7{V?#23%RTd?CgJDzBt9Qdq52X%&!L z2KoTO=}z;&=7BuqC7t#yFUbQuw3EqY(#+7wWb(k|smYLD$+SrftvkuIIsZ9lf7#Ll z`=HhC`Ooe9&N=(<UTS=0u2aFo>CEd^N-u_!xq=a1F6ddqbpnTVTR(#Tuep1E;a+eD zoW^+eOu=>fH!jauwo$Njvl!0mmVU-46kVrf)XXK%M(NtU2AVyr5;rr6Vp!_9PJ6s$ z<ul2%rV&pFC0Ok|Y?ZS4GDVXunRZ4m6!bEfjTek$&MMl4QqmR+n{c7)_#k>*a?On> zm))_tg!`g9>kdTfB3mMjSKM^F)0DEyc|$1GFF)n3UL|>hQ+L3fnwt9Y|KY%&h+?~m z)zPIoMY;WY{+ZzIHTAK}cV}+=>Xb%cDatY)J=zE!|50DP4o6^C!}IkI)j!@6M~eo^ zU_sIFy5%QyHlJg?UhUoc1E*eo=fAYRWl`EEKYbed?f<&Qzb8lY!L=HFilaw+<LKur z-EVx$@tvgjR-b1-Q9gHQdmxJY{sq2X)I_@dmwU8X_`N&h*M8OwmR>rkVcLIrj@R{R z<o~ZK;`HJi*?)S0_19Fe3JLbpI?z|d@!10BzgKesX~&-^Wq@G|s-~%EW|D%Uhyt|L z35Fu|;y1k|-IneY%%;0MUVD+(ZL6c7)mNB8q@Cl;#7$<Rn<Kr=NJTk<PgG*yeU*^v z-OSYFamsi(Ob87$O8MqY9wDZG_qdc31N$^wr)jMJD9ewq|2rzmm9X|}QE0C>^ZIEz zE9)0YzJ8Gy*)8qU{k@du;w0PuLPds}pOyK0`pDqQI_G~>`lmPp8Zq?`mY-(-q^&?4 zROyeAf9E9Wug&s#m?~uD3(_~2SbzN@b;|!Z&mhO|b&{9gK@AJ-sa=G3^9<we=`CtM z74Upj$sglc8|VBrO%(9JsUC?B%HLd|>sz~-{|_jRrG$)+GF-n%mG0SRyVtohC-*;4 zALGCA8g<&ul(bK{zo$}udpN!b$MqfY^_v)X%pJymiuwDL<jdXIDd`>16gwW&7JI$6 zSZb=Y#h0R=e_7iWl>R-H8u>Vfttet_9Q}No_HQwt?*EO;YDv;UwC<E`f$+nCLwmBp zAK^0lRz<mUqRM`kMgz8w=)XE6<z!#wvit$j3-<B;lZy<`ZXQr!hINu*^|Rkej`tfH zMD3Nfr~K+U{(Ch8mgo%1w)XO?Zfk1GJ1UmXYLfl_kmDQRe0Fer2RJ?t@5g0Xd-ki# znt1t_>hXv8!Vf(ddfEPZjhy0qm>)2IRx#fXbza}c=O;NHZ!A<EK!W|1?k<*pDe)n@ zf9G=EE+0_%plkJbxykjkjs15@{%|~-GmOtLKcM(Q0sJU@P~9oAtvBJbp{?=$sXuYs z9Fh7sF7jV<`}9$rec+8Hb^>FWc>YGE2VAB8sV?ide*Ve%?B}>|sx%Jo^5f$PS-;+c zA3bO>Io6H|0(@0t?b0ZZ%|WgYuYOxN4{t2&miSTRm9dX?ara}Yzs`d)z7tQs^@kUJ z{}JJG>pNK=#Gki8>8AcJ?Uep0uQkR^Wqdko@IyO`8qbG&asx#ApRj%#=l7MYA4XoG z{(h9|KYyS3^XlhQeq5}RUVj7eaHTRIsDHb-|Gn{EJGQA$c1eE7e+S|Fc8t!KcFGM3 z^<U+&M*UsK<G)fL#IqdBJ0j5sjpaHX%VXOgwAB!sA{h12$kaxgwtE^j(OWm~yHmz_ zqZm%;McAze<CbwQK4HN<*g&O^I!)ex+_$G-FYnfFqNZR+wbiMg-n1e9dDVtk6&qsV zzI`MVHSBcROdEMyw}jV;VlHo3bkS~wVuf73Y$gnA#Sr4Qio$tZ-#%Pzp{;U<miXrE zasjn{^EtDG|60a{S83Ka>N)V|LMO^5u4z7Iuz^EK!_Eo!9V3JLLa1tcmO3He>L>$4 z;o<PU(0p_h-Y|r+Zl;WMIRzFPo*j$Ao6t~rB>W><(2Dl9<1!mo_~2T%a;EFFj+X4Q zS&;;@;k=BlVSa4fb^Jr2>7<R}`UP^7C;A^D^8m-@fPD~L=<pbX<`du&FhdY7-fM^n zv#k(c2)yg549XiOVu!$)74jh2GFDESxikX=f~lN^d~{$Gn>gzvijx~P0z;t~u#Cpr zvQ-fV?a;9BPH9;fh(Nz*cyJi-pD$#xhM-T@Nzc$`f#fx4g^oi3CowcX;($g`0RZwW zNri`o2S-AO*?s~55PoHiNr#Z|VYb>OLh>{=E&yIoJ0MWw>Tb(dr5XUZJU4B*Dk1Vi zsAj<h=Daadps^~_fT$ua-_~TN2slcZ{TW#wEhO?F1~f(np&Dz@Xa>Xx0k7>U2@Xky zI`TEaLC(T2>H=$`Yov8)0+6d<E$_BqWy#9`4FF_qi>M1>7q5cZ!Adtgw4uhNZpR2z zHInXit0CSG8!hoQVybjSya!ICBQVa=(b`GY2$O6cr&L39VisQVqYJLnI5&ML23;JP zKIRHz>EAlOx!CyF#Q2<x1`S~qzG}dy0cdkEB)yLxaVT0Hr{-yRG1YAwK5JZXo#yzm ish`2Wy}D`U&K2XSQXXDgb=!!WdcrV^?$u%`5&j+wb*;Am diff --git a/AeroViz/dataProcess/__init__.py b/AeroViz/dataProcess/__init__.py index f87c753..01a44e2 100644 --- a/AeroViz/dataProcess/__init__.py +++ b/AeroViz/dataProcess/__init__.py @@ -1,11 +1,33 @@ +from pathlib import Path + from .Chemistry import Chemistry from .Optical import Optical from .SizeDistr import SizeDistr from .VOC import VOC -__all__ = [ - 'Optical', - 'SizeDistr', - 'Chemistry', - 'VOC', -] +__all__ = ['DataProcess'] + + +def DataProcess(method: str, + path_out: Path, + excel: bool = False, + csv: bool = True, + ): + # Mapping of method names to their respective classes + method_class_map = { + 'Chemistry': Chemistry, + 'Optical': Optical, + 'SizeDistr': SizeDistr, + 'VOC': VOC + } + + if method not in method_class_map.keys(): + raise ValueError(f"Method name '{method}' is not valid. \nMust be one of: {list(method_class_map.keys())}") + + writer_module = method_class_map[method]( + path_out=path_out, + excel=excel, + csv=csv + ) + + return writer_module diff --git a/AeroViz/dataProcess/core/__init__.py b/AeroViz/dataProcess/core/__init__.py index 0d4b6f6..9c6c7aa 100644 --- a/AeroViz/dataProcess/core/__init__.py +++ b/AeroViz/dataProcess/core/__init__.py @@ -1,92 +1,86 @@ -from pandas import DatetimeIndex, DataFrame, concat -from pathlib import Path import pickle as pkl from datetime import datetime as dtm +from pathlib import Path +from pandas import concat -class _writter: - - def __init__(self, path_out=None, excel=True, csv=False): - - self.path_out = Path(path_out) if path_out is not None else path_out - self.excel = excel - self.csv = csv - - def _pre_process(self, _out): - - if type(_out) == dict: - for _ky, _df in _out.items(): - _df.index.name = 'time' - else: - _out.index.name = 'time' - return _out +class Writer: - def _save_out(self, _nam, _out): + def __init__(self, path_out=None, excel=True, csv=False): + self.path_out = Path(path_out) if path_out is not None else path_out + self.excel = excel + self.csv = csv - _check = True - while _check: + @staticmethod + def pre_process(_out): + if isinstance(_out, dict): + for _ky, _df in _out.items(): + _df.index.name = 'time' + else: + _out.index.name = 'time' - try: - if self.path_out is not None: - self.path_out.mkdir(exist_ok=True, parents=True) - with (self.path_out / f'{_nam}.pkl').open('wb') as f: - pkl.dump(_out, f, protocol=pkl.HIGHEST_PROTOCOL) + return _out - if self.excel: - from pandas import ExcelWriter - with ExcelWriter(self.path_out / f'{_nam}.xlsx') as f: - if type(_out) == dict: - for _key, _val in _out.items(): - _val.to_excel(f, sheet_name=f'{_key}') - else: - _out.to_excel(f, sheet_name=f'{_nam}') + def save_out(self, _nam, _out): + _check = True + while _check: - if self.csv: - if type(_out) == dict: - _path_out = self.path_out / _nam - _path_out.mkdir(exist_ok=True, parents=True) + try: + if self.path_out is not None: + self.path_out.mkdir(exist_ok=True, parents=True) + with (self.path_out / f'{_nam}.pkl').open('wb') as f: + pkl.dump(_out, f, protocol=pkl.HIGHEST_PROTOCOL) - for _key, _val in _out.items(): - _val.to_csv(_path_out / f'{_key}.csv') - else: - _out.to_csv(self.path_out / f'{_nam}.csv') + if self.excel: + from pandas import ExcelWriter + with ExcelWriter(self.path_out / f'{_nam}.xlsx') as f: + if type(_out) == dict: + for _key, _val in _out.items(): + _val.to_excel(f, sheet_name=f'{_key}') + else: + _out.to_excel(f, sheet_name=f'{_nam}') - _check = False + if self.csv: + if isinstance(_out, dict): + _path_out = self.path_out / _nam + _path_out.mkdir(exist_ok=True, parents=True) - except PermissionError as _err: - print('\n', _err) - input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n') + for _key, _val in _out.items(): + _val.to_csv(_path_out / f'{_key}.csv') + else: + _out.to_csv(self.path_out / f'{_nam}.csv') + _check = False -def _run_process(*_ini_set): - def _decorator(_prcs_fc): - def _wrap(*arg, **kwarg): - _fc_name, _nam = _ini_set + except PermissionError as _err: + print('\n', _err) + input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n') - if kwarg.get('nam') is not None: - _nam = kwarg.pop('nam') - print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Process \033[92m{_fc_name}\033[0m -> {_nam}") +def run_process(*_ini_set): + def _decorator(_prcs_fc): + def _wrap(*arg, **kwarg): + _fc_name, _nam = _ini_set - _class, _out = _prcs_fc(*arg, **kwarg) - _out = _class._pre_process(_out) + if kwarg.get('nam') is not None: + _nam = kwarg.pop('nam') - _class._save_out(_nam, _out) + print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Process \033[92m{_fc_name}\033[0m -> {_nam}") - return _out + _class, _out = _prcs_fc(*arg, **kwarg) + _out = _class.pre_process(_out) - return _wrap + _class.save_out(_nam, _out) - return _decorator + return _out + return _wrap -def _union_index(*_df_arg): - _idx = concat(_df_arg, axis=1).index + return _decorator - # _idx = DatetimeIndex([]) - # for _df in _df_arg: - # _idx = _idx.union(DataFrame(_df).index) +def union_index(*_df_arg): + _idx = concat(_df_arg, axis=1).index - return [_df.reindex(_idx) if _df is not None else None for _df in _df_arg] + return [_df.reindex(_idx) if _df is not None else None for _df in _df_arg] diff --git a/AeroViz/plot/__init__.py b/AeroViz/plot/__init__.py index 0c401b4..b5b648e 100644 --- a/AeroViz/plot/__init__.py +++ b/AeroViz/plot/__init__.py @@ -1,7 +1,13 @@ from . import distribution -from . import improve from . import meteorology from . import optical -from . import timeseries +from .bar import bar +from .box import box +from .pie import pie, donuts +from .radar import radar +from .regression import linear_regression, multiple_linear_regression +from .scatter import scatter from .templates import * +from .timeseries import timeseries, timeseries_template, timeseries_stacked from .utils import * +from .violin import violin diff --git a/AeroViz/plot/bar.py b/AeroViz/plot/bar.py new file mode 100644 index 0000000..0123365 --- /dev/null +++ b/AeroViz/plot/bar.py @@ -0,0 +1,126 @@ +from typing import Literal + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from matplotlib.pyplot import Figure, Axes +from pandas import DataFrame + +from AeroViz.plot.utils import * + +__all__ = ['bar'] + + +@set_figure(fw='bold') +def bar(data_set: DataFrame | dict, + data_std: DataFrame | None, + labels: list[str], + unit: str, + style: Literal["stacked", "dispersed"] = "dispersed", + orientation: Literal["va", "ha"] = 'va', + ax: Axes | None = None, + symbol=True, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Parameters + ---------- + data_set : pd.DataFrame or dict + A mapping from category names to a list of species mean or a DataFrame with columns as categories and values as means. + data_std : pd.DataFrame or None + A DataFrame with standard deviations corresponding to data_set, or None if standard deviations are not provided. + labels : list of str + The species names. + unit : str + The unit for the values. + style : {'stacked', 'dispersed'}, default 'dispersed' + Whether to display the bars stacked or dispersed. + orientation : {'va', 'ha'}, default 'va' + The orientation of the bars, 'va' for vertical and 'ha' for horizontal. + ax : plt.Axes or None, default None + The Axes object to plot on. If None, a new figure and Axes are created. + symbol : bool, default True + Whether to display values for each bar. + kwargs : dict + Additional keyword arguments passed to the barplot function. + + Returns + ------- + matplotlib.Axes + The Axes object containing the plot. + + """ + # data process + data = data_set.values + + if data_std is None: + data_std = np.zeros(data.shape) + else: + data_std = data_std.values + + groups, species = data.shape + groups_arr = np.arange(groups) + species_arr = np.arange(species) + + total = np.array([data.sum(axis=1), ] * species).T + + pct_data = data / total * 100 + data_cum = pct_data.cumsum(axis=1) + + # figure info + category_names = kwargs.get('ticks') or list(data_set.index) + title = kwargs.get('title', '') + colors = kwargs.get('colors') or (Color.colors1 if species == 6 else Color.getColor(num=species)) + + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + + if style == "stacked": + for i in range(species): + widths = pct_data[:, i] + starts = data_cum[:, i] - pct_data[:, i] + + if orientation == 'va': + _ = ax.bar(groups_arr, widths, bottom=starts, width=0.7, color=colors[i], label=labels[i], + edgecolor=None, capsize=None) + else: + _ = ax.barh(groups_arr, widths, left=starts, height=0.7, color=colors[i], label=labels[i], + edgecolor=None, capsize=None) + if symbol: + ax.bar_label(_, fmt=auto_label_pct, label_type='center', padding=0, fontsize=8, weight='bold') + + if style == "dispersed": + width = 0.1 + block = width / 4 + + for i in range(species): + val = data[:, i] + std = (0,) * groups, data_std[:, i] + if orientation == 'va': + _ = ax.bar(groups_arr + (i + 1) * (width + block), val, yerr=std, width=width, color=colors[i], + edgecolor=None, capsize=None) + else: + _ = ax.barh(groups_arr + (i + 1) * (width + block), val, xerr=std, height=width, color=colors[i], + edgecolor=None, capsize=None) + if symbol: + ax.bar_label(_, fmt=auto_label_pct, label_type='center', padding=0, fontsize=8, weight='bold') + + if orientation == 'va': + xticks = groups_arr + (species / 2 + 0.5) * (width + block) if style == "dispersed" else groups_arr + ax.set_xticks(xticks, category_names, weight='bold') + ax.set_ylabel(Unit(unit) if style == "dispersed" else '$Contribution (\\%)$') + ax.set_ylim(0, None if style == "dispersed" else 100) + ax.legend(labels, bbox_to_anchor=(1, 1), loc='upper left', prop={'size': 8}) + + if orientation == 'ha': + ax.invert_yaxis() + yticks = groups_arr + 3.5 * (width + block) if style == "dispersed" else groups_arr + ax.set_yticks(yticks, category_names, weight='bold') + ax.set_xlabel(Unit(unit) if style == "dispersed" else '$Contribution (\\%)$') + ax.set_xlim(0, None if style == "dispersed" else 100) + ax.legend(labels, bbox_to_anchor=(1, 1), loc='upper left', prop={'size': 8}) + + # fig.savefig(f"Barplot_{title}") + + plt.show() + + return fig, ax diff --git a/AeroViz/plot/box.py b/AeroViz/plot/box.py new file mode 100644 index 0000000..41fc2d4 --- /dev/null +++ b/AeroViz/plot/box.py @@ -0,0 +1,69 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from matplotlib.pyplot import Figure, Axes + +from AeroViz.plot.utils import * + +__all__ = ['box'] + + +@set_figure +def box(df: pd.DataFrame, + x: str, + y: str, + x_bins: list | np.ndarray = None, + add_scatter: bool = True, + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + + df = df.dropna(subset=[x, y]).copy() + x_data, y_data = df[x].to_numpy(), df[y].to_numpy() + + bins = np.array(x_bins) + bins = np.round(bins) + wid = (bins + (bins[1] - bins[0]) / 2)[0:-1] + + df[x + '_bin'] = pd.cut(x=x_data, bins=bins, labels=wid) + + group = x + '_bin' + column = y + grouped = df.groupby(group, observed=False) + + names, vals = [], [] + + for i, (name, subdf) in enumerate(grouped): + names.append('{:.0f}'.format(name)) + vals.append(subdf[column].dropna().values) + + plt.boxplot(vals, labels=names, positions=wid, widths=(bins[1] - bins[0]) / 3, + showfliers=False, showmeans=True, meanline=True, patch_artist=True, + boxprops=dict(facecolor='#f2c872', alpha=.7), + meanprops=dict(color='#000000', ls='none'), + medianprops=dict(ls='-', color='#000000')) + + ax.set(xlim=kwargs.get('xlim', (x_data.min(), x_data.max())), + ylim=kwargs.get('ylim', (y_data.min(), y_data.max())), + xlabel=kwargs.get('xlabel', Unit(x)), + ylabel=kwargs.get('ylabel', Unit(y)), + title=kwargs.get('title', '')) + + ax.set_xticks(bins, labels=bins.astype(int)) + + if add_scatter: + for i, (name, subdf) in enumerate(grouped): + jitter = np.random.normal(0, 0.5, len(subdf)) + ax.scatter([name] * len(subdf) + jitter, subdf[column], s=10, c='gray', alpha=0.5) + + plt.show() + + return fig, ax + + +if __name__ == '__main__': + from AeroViz import DataBase + + df = DataBase(load_data=True) + box(df, x='PM25', y='Extinction', x_bins=np.arange(0, 120, 10)) diff --git a/AeroViz/plot/distribution/distribution.py b/AeroViz/plot/distribution/distribution.py index 451dbba..4f0d58f 100644 --- a/AeroViz/plot/distribution/distribution.py +++ b/AeroViz/plot/distribution/distribution.py @@ -16,567 +16,561 @@ from AeroViz.plot.utils import * __all__ = [ - 'plot_dist', - 'heatmap', - 'heatmap_tms', - 'three_dimension', - 'curve_fitting' + 'plot_dist', + 'heatmap', + 'heatmap_tms', + 'three_dimension', + 'curve_fitting' ] @set_figure def plot_dist(data: DataFrame | np.ndarray, - data_std: DataFrame | None = None, - std_scale: float | None = 1, - unit: Literal["Number", "Surface", "Volume", "Extinction"] = 'Number', - additional: Literal["Std", "Enhancement", "Error"] = None, - fig: Figure | None = None, - ax: Axes | None = None, - **kwargs - ) -> tuple[Figure, Axes]: - """ - Plot particle size distribution curves and optionally show enhancements. - - Parameters - ---------- - data : dict or list - If dict, keys are labels and values are arrays of distribution values. - If listed, it should contain three arrays for different curves. - data_std : dict - Dictionary containing standard deviation data for ambient extinction distribution. - std_scale : float - The width of standard deviation. - unit : {'Number', 'Surface', 'Volume', 'Extinction'} - Unit of measurement for the data. - additional : {'std', 'enhancement', 'error'} - Whether to show enhancement curves. - fig : Figure, optional - Matplotlib Figure object to use. - ax : AxesSubplot, optional - Matplotlib AxesSubplot object to use. If not provided, a new subplot will be created. - **kwargs : dict - Additional keyword arguments. - - Returns - ------- - ax : AxesSubplot - Matplotlib AxesSubplot. - - Examples - -------- - >>> plot_dist(DataFrame(...), additional="Enhancement") - """ - fig, ax = plt.subplots(**{**{'figsize': (6, 2)}, **kwargs.get('fig_kws', {})}) if ax is None else ( - ax.get_figure(), ax) - - # plot_kws - plot_kws = dict(ls='solid', lw=2, alpha=0.8, **kwargs.get('plot_kws', {})) - - # Receive input data - dp = np.array(data.columns, dtype=float) - states = np.array(data.index) - - for state in states: - mean = data.loc[state].to_numpy() - ax.plot(dp, mean, label=state, color=Color.color_choose[state][0], **plot_kws) - - if additional == 'Std': - std = data_std.loc[state].to_numpy() * std_scale - ax.fill_between(dp, y1=mean - std, y2=mean + std, alpha=0.4, color=Color.color_choose[state][1], - edgecolor=None, label='__nolegend__') - - # figure_set - ax.set(xlim=(dp.min(), dp.max()), ylim=(0, None), xscale='log', - xlabel=r'$D_{p} (nm)$', ylabel=Unit(f'{unit}_dist'), title=kwargs.get('title', unit)) - - ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3), useMathText=True) - ax.grid(axis='x', which='major', color='k', linestyle='dashdot', linewidth=0.4, alpha=0.4) - - Clean = data.loc['Clean'].to_numpy() - Transition = data.loc['Transition'].to_numpy() - Event = data.loc['Event'].to_numpy() - - if additional == "Enhancement": - ax2 = ax.twinx() - ax2.plot(dp, Transition / Clean, ls='dashed', color='k', label=f'{additional} ratio 1') - ax2.plot(dp, Event / Transition, ls='dashed', color='gray', label=f'{additional} ratio 2') - ax2.set(ylabel='Enhancement ratio') - - elif additional == "Error": - ax2 = ax.twinx() - error1 = np.where(Transition != 0, np.abs(Clean - Transition) / Clean * 100, 0) - error2 = np.where(Event != 0, np.abs(Transition - Event) / Transition * 100, 0) - - ax2.plot(dp, error1, ls='--', color='k', label='Error 1 ') - ax2.plot(dp, error2, ls='--', color='gray', label='Error 2') - ax2.set(ylabel='Error (%)') - - # Combine legends from ax and ax2 - axes_list = fig.get_axes() - legends_combined = [legend for axes in axes_list for legend in axes.get_legend_handles_labels()[0]] - labels_combined = [label for axes in axes_list for label in axes.get_legend_handles_labels()[1]] - - ax.legend(legends_combined, labels_combined, prop={'weight': 'bold'}) - - plt.show() - - return fig, ax + data_std: DataFrame | None = None, + std_scale: float | None = 1, + unit: Literal["Number", "Surface", "Volume", "Extinction"] = 'Number', + additional: Literal["Std", "Enhancement", "Error"] = None, + fig: Figure | None = None, + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Plot particle size distribution curves and optionally show enhancements. + + Parameters + ---------- + data : dict or list + If dict, keys are labels and values are arrays of distribution values. + If listed, it should contain three arrays for different curves. + data_std : dict + Dictionary containing standard deviation data for ambient extinction distribution. + std_scale : float + The width of standard deviation. + unit : {'Number', 'Surface', 'Volume', 'Extinction'} + Unit of measurement for the data. + additional : {'std', 'enhancement', 'error'} + Whether to show enhancement curves. + fig : Figure, optional + Matplotlib Figure object to use. + ax : AxesSubplot, optional + Matplotlib AxesSubplot object to use. If not provided, a new subplot will be created. + **kwargs : dict + Additional keyword arguments. + + Returns + ------- + ax : AxesSubplot + Matplotlib AxesSubplot. + + Examples + -------- + >>> plot_dist(DataFrame(...), additional="Enhancement") + """ + fig, ax = plt.subplots(**{**{'figsize': (6, 2)}, **kwargs.get('fig_kws', {})}) if ax is None else ( + ax.get_figure(), ax) + + # plot_kws + plot_kws = dict(ls='solid', lw=2, alpha=0.8, **kwargs.get('plot_kws', {})) + + # Receive input data + dp = np.array(data.columns, dtype=float) + states = np.array(data.index) + + for state in states: + mean = data.loc[state].to_numpy() + ax.plot(dp, mean, label=state, color=Color.color_choose[state][0], **plot_kws) + + if additional == 'Std': + std = data_std.loc[state].to_numpy() * std_scale + ax.fill_between(dp, y1=mean - std, y2=mean + std, alpha=0.4, color=Color.color_choose[state][1], + edgecolor=None, label='__nolegend__') + + # figure_set + ax.set(xlim=(dp.min(), dp.max()), ylim=(0, None), xscale='log', + xlabel=r'$D_{p} (nm)$', ylabel=Unit(f'{unit}_dist'), title=kwargs.get('title', unit)) + + ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3), useMathText=True) + ax.grid(axis='x', which='major', color='k', linestyle='dashdot', linewidth=0.4, alpha=0.4) + + Clean = data.loc['Clean'].to_numpy() + Transition = data.loc['Transition'].to_numpy() + Event = data.loc['Event'].to_numpy() + + if additional == "Enhancement": + ax2 = ax.twinx() + ax2.plot(dp, Transition / Clean, ls='dashed', color='k', label=f'{additional} ratio 1') + ax2.plot(dp, Event / Transition, ls='dashed', color='gray', label=f'{additional} ratio 2') + ax2.set(ylabel='Enhancement ratio') + + else: + ax2 = ax.twinx() + error1 = np.where(Transition != 0, np.abs(Clean - Transition) / Clean * 100, 0) + error2 = np.where(Event != 0, np.abs(Transition - Event) / Transition * 100, 0) + + ax2.plot(dp, error1, ls='--', color='k', label='Error 1 ') + ax2.plot(dp, error2, ls='--', color='gray', label='Error 2') + ax2.set(ylabel='Error (%)') + + ax.legend(*combine_legends(fig.get_axes()), prop={'weight': 'bold'}) + + plt.show() + + return fig, ax @set_figure def heatmap(data: DataFrame, - unit: Literal["Number", "Surface", "Volume", "Extinction"], - cmap: str = 'Blues', - colorbar: bool = False, - magic_number: int = 11, - ax: Axes | None = None, - **kwargs - ) -> tuple[Figure, Axes]: - """ - Plot a heatmap of particle size distribution. + unit: Literal["Number", "Surface", "Volume", "Extinction"], + cmap: str = 'Blues', + colorbar: bool = False, + magic_number: int = 11, + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Plot a heatmap of particle size distribution. - Parameters - ---------- - data : pandas.DataFrame - The data containing particle size distribution values. Each column corresponds to a size bin, - and each row corresponds to a different distribution. + Parameters + ---------- + data : pandas.DataFrame + The data containing particle size distribution values. Each column corresponds to a size bin, + and each row corresponds to a different distribution. - unit : {'Number', 'Surface', 'Volume', 'Extinction'}, optional - The unit of measurement for the data. + unit : {'Number', 'Surface', 'Volume', 'Extinction'}, optional + The unit of measurement for the data. - cmap : str, default='Blues' - The colormap to use for the heatmap. + cmap : str, default='Blues' + The colormap to use for the heatmap. - colorbar : bool, default=False - Whether to show the colorbar. + colorbar : bool, default=False + Whether to show the colorbar. - magic_number : int, default=11 - The number of bins to use for the histogram. + magic_number : int, default=11 + The number of bins to use for the histogram. - ax : matplotlib.axes.Axes, optional - The axes to plot the heatmap on. If not provided, a new subplot will be created. + ax : matplotlib.axes.Axes, optional + The axes to plot the heatmap on. If not provided, a new subplot will be created. - **kwargs - Additional keyword arguments to pass to matplotlib functions. + **kwargs + Additional keyword arguments to pass to matplotlib functions. - Returns - ------- - matplotlib.axes.Axes - The Axes object containing the heatmap. + Returns + ------- + matplotlib.axes.Axes + The Axes object containing the heatmap. - Examples - -------- - >>> heatmap(DataFrame(...), unit='Number') + Examples + -------- + >>> heatmap(DataFrame(...), unit='Number') - Notes - ----- - This function calculates a 2D histogram of the log-transformed particle sizes and the distribution values. - It then plots the heatmap using a logarithmic color scale. + Notes + ----- + This function calculates a 2D histogram of the log-transformed particle sizes and the distribution values. + It then plots the heatmap using a logarithmic color scale. - """ - fig, ax = plt.subplots(**{**{'figsize': (3, 3)}, **kwargs.get('fig_kws', {})}) if ax is None else ( - ax.get_figure(), ax) + """ + fig, ax = plt.subplots(**{**{'figsize': (3, 3)}, **kwargs.get('fig_kws', {})}) if ax is None else ( + ax.get_figure(), ax) - min_value = 1e-8 - dp = np.array(data.columns, dtype=float) - x = np.append(np.tile(dp, data.to_numpy().shape[0]), np.log(dp).max()) - y = np.append(data.to_numpy().flatten(), min_value) + min_value = 1e-8 + dp = np.array(data.columns, dtype=float) + x = np.append(np.tile(dp, data.to_numpy().shape[0]), np.log(dp).max()) + y = np.append(data.to_numpy().flatten(), min_value) - # mask NaN - x = x[~np.isnan(y)] - y = y[~np.isnan(y)] + # mask NaN + x = x[~np.isnan(y)] + y = y[~np.isnan(y)] - # using log(x) - histogram, xedges, yedges = np.histogram2d(np.log(x), y, bins=len(dp) + magic_number) - histogram[histogram == 0] = min_value # Avoid log(0) + # using log(x) + histogram, xedges, yedges = np.histogram2d(np.log(x), y, bins=len(dp) + magic_number) + histogram[histogram == 0] = min_value # Avoid log(0) - plot_kws = dict(norm=colors.LogNorm(vmin=1, vmax=histogram.max()), cmap=cmap, **kwargs.get('plot_kws', {})) + plot_kws = dict(norm=colors.LogNorm(vmin=1, vmax=histogram.max()), cmap=cmap, **kwargs.get('plot_kws', {})) - pco = ax.pcolormesh(xedges[:-1], yedges[:-1], histogram.T, shading='gouraud', **plot_kws) + pco = ax.pcolormesh(xedges[:-1], yedges[:-1], histogram.T, shading='gouraud', **plot_kws) - # TODO: - ax.plot(np.log(dp), data.mean() + data.std(), ls='dashed', color='r', label='pollutant') - ax.plot(np.log(dp), data.mean(), ls='dashed', color='k', alpha=0.5, label='mean') - ax.plot(np.log(dp), data.mean() - data.std(), ls='dashed', color='b', label='clean') + ax.plot(np.log(dp), data.mean() + data.std(), ls='dashed', color='r', label='pollutant') + ax.plot(np.log(dp), data.mean(), ls='dashed', color='k', alpha=0.5, label='mean') + ax.plot(np.log(dp), data.mean() - data.std(), ls='dashed', color='b', label='clean') - ax.set(xlim=(np.log(dp).min(), np.log(dp).max()), ylim=(0, None), - xlabel=r'$D_{p} (nm)$', ylabel=Unit(f'{unit}_dist'), title=kwargs.get('title', unit)) + ax.set(xlim=(np.log(dp).min(), np.log(dp).max()), ylim=(0, None), + xlabel=r'$D_{p} (nm)$', ylabel=Unit(f'{unit}_dist'), title=kwargs.get('title', unit)) - major_ticks = np.power(10, np.arange(np.ceil(np.log10(dp.min())), np.floor(np.log10(dp.max())) + 1)) - minor_ticks = [v for v in np.concatenate([_ * np.arange(2, 10) for _ in major_ticks]) if min(dp) <= v <= max(dp)] + major_ticks = np.power(10, np.arange(np.ceil(np.log10(dp.min())), np.floor(np.log10(dp.max())) + 1)) + minor_ticks = [v for v in np.concatenate([_ * np.arange(2, 10) for _ in major_ticks]) if min(dp) <= v <= max(dp)] - ax.set_xticks(np.log(major_ticks)) - ax.set_xticks(np.log(minor_ticks), minor=True) - ax.xaxis.set_major_formatter(FuncFormatter(lambda tick, pos: "{:.0f}".format(np.exp(tick)))) + ax.set_xticks(np.log(major_ticks)) + ax.set_xticks(np.log(minor_ticks), minor=True) + ax.xaxis.set_major_formatter(FuncFormatter(lambda tick, pos: "{:.0f}".format(np.exp(tick)))) - ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3), useMathText=True) - ax.grid(axis='x', which='major', color='k', linestyle='dashdot', linewidth=0.4, alpha=0.4) - ax.legend(prop={'weight': 'bold'}) + ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3), useMathText=True) + ax.grid(axis='x', which='major', color='k', linestyle='dashdot', linewidth=0.4, alpha=0.4) + ax.legend(prop={'weight': 'bold'}) - if colorbar: - plt.colorbar(pco, pad=0.02, fraction=0.05, label='Counts', **kwargs.get('cbar_kws', {})) + if colorbar: + plt.colorbar(pco, pad=0.02, fraction=0.05, label='Counts', **kwargs.get('cbar_kws', {})) - plt.show() + plt.show() - return fig, ax + return fig, ax @set_figure def heatmap_tms(data: DataFrame, - unit: Literal["Number", "Surface", "Volume", "Extinction"], - cmap: str = 'jet', - ax: Axes | None = None, - **kwargs - ) -> tuple[Figure, Axes]: - """ Plot the size distribution over time. + unit: Literal["Number", "Surface", "Volume", "Extinction"], + cmap: str = 'jet', + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + """ Plot the size distribution over time. - Parameters - ---------- - data : DataFrame - A DataFrame of particle concentrations to plot the heatmap. + Parameters + ---------- + data : DataFrame + A DataFrame of particle concentrations to plot the heatmap. - ax : matplotlib.axis.Axis - An axis object to plot on. If none is provided, one will be created. + ax : matplotlib.axis.Axis + An axis object to plot on. If none is provided, one will be created. - unit : Literal["Number", "Surface", "Volume", "Extinction"] - default='Number' + unit : Literal["Number", "Surface", "Volume", "Extinction"] + default='Number' - cmap : matplotlib.colormap, default='viridis' - The colormap to use. Can be anything other that 'jet'. + cmap : matplotlib.colormap, default='viridis' + The colormap to use. Can be anything other that 'jet'. - Returns - ------- - ax : matplotlib.axis.Axis + Returns + ------- + ax : matplotlib.axis.Axis - Notes - ----- - Do not dropna when using this code. + Notes + ----- + Do not dropna when using this code. - Examples - -------- - Plot a SPMS + APS data: - >>> heatmap_tms(DataFrame(...), cmap='jet') - """ - fig, ax = plt.subplots( - **{**{'figsize': (len(data.index) * 0.01, 2)}, **kwargs.get('fig_kws', {})}) if ax is None else ( - ax.get_figure(), ax) + Examples + -------- + Plot a SPMS + APS data: + >>> heatmap_tms(DataFrame(...), cmap='jet') + """ + fig, ax = plt.subplots( + **{**{'figsize': (len(data.index) * 0.01, 2)}, **kwargs.get('fig_kws', {})}) if ax is None else ( + ax.get_figure(), ax) - time = data.index - dp = np.array(data.columns, dtype=float) + time = data.index + dp = np.array(data.columns, dtype=float) - # data = data.interpolate(method='linear', axis=0) - data = np.nan_to_num(data.to_numpy()) + # data = data.interpolate(method='linear', axis=0) + data = np.nan_to_num(data.to_numpy()) - vmin_mapping = {'Number': 1e2, 'Surface': 1e8, 'Volume': 1e9, 'Extinction': 1} + vmin_mapping = {'Number': 1e2, 'Surface': 1e8, 'Volume': 1e9, 'Extinction': 1} - # Set the colorbar min and max based on the min and max of the values - cbar_min = kwargs.get('cbar_kws', {}).pop('cbar_min', vmin_mapping[unit]) - cbar_max = kwargs.get('cbar_kws', {}).pop('cbar_max', np.nanmax(data)) + # Set the colorbar min and max based on the min and max of the values + cbar_min = kwargs.get('cbar_kws', {}).pop('cbar_min', vmin_mapping[unit]) + cbar_max = kwargs.get('cbar_kws', {}).pop('cbar_max', np.nanmax(data)) - # Set the plot_kws - plot_kws = dict(norm=colors.LogNorm(vmin=cbar_min, vmax=cbar_max), cmap=cmap, **kwargs.get('plot_kws', {})) + # Set the plot_kws + plot_kws = dict(norm=colors.LogNorm(vmin=cbar_min, vmax=cbar_max), cmap=cmap, **kwargs.get('plot_kws', {})) - # main plot - pco = ax.pcolormesh(time, dp, data.T, shading='auto', **plot_kws) + # main plot + pco = ax.pcolormesh(time, dp, data.T, shading='auto', **plot_kws) - # Set ax - st_tm, fn_tm = time[0], time[-1] - tick_time = date_range(st_tm, fn_tm, freq=kwargs.get('freq', '10d')).strftime("%F") + # Set ax + st_tm, fn_tm = time[0], time[-1] + tick_time = date_range(st_tm, fn_tm, freq=kwargs.get('freq', '10d')).strftime("%F") - ax.set(xlim=(st_tm, fn_tm), - ylim=(dp.min(), dp.max()), - ylabel='$D_p (nm)$', - xticks=tick_time, - xticklabels=tick_time, - yscale='log', - title=kwargs.get('title', f'{st_tm.strftime("%F")} - {fn_tm.strftime("%F")}')) + ax.set(xlim=(st_tm, fn_tm), + ylim=(dp.min(), dp.max()), + ylabel='$D_p (nm)$', + xticks=tick_time, + xticklabels=tick_time, + yscale='log', + title=kwargs.get('title', f'{st_tm.strftime("%F")} - {fn_tm.strftime("%F")}')) - plt.colorbar(pco, pad=0.02, fraction=0.02, label=Unit(f'{unit}_dist'), **kwargs.get('cbar_kws', {})) + plt.colorbar(pco, pad=0.02, fraction=0.02, label=Unit(f'{unit}_dist'), **kwargs.get('cbar_kws', {})) - plt.show() + plt.show() - return fig, ax + return fig, ax @set_figure def three_dimension(data: DataFrame | np.ndarray, - unit: Literal["Number", "Surface", "Volume", "Extinction"], - cmap: str = 'Blues', - ax: Axes | None = None, - **kwargs - ) -> tuple[Figure, Axes]: - """ - Create a 3D plot with data from a pandas DataFrame or numpy array. + unit: Literal["Number", "Surface", "Volume", "Extinction"], + cmap: str = 'Blues', + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Create a 3D plot with data from a pandas DataFrame or numpy array. - Parameters - ---------- - data : DataFrame or ndarray - Input data containing the values to be plotted. + Parameters + ---------- + data : DataFrame or ndarray + Input data containing the values to be plotted. - unit : {'Number', 'Surface', 'Volume', 'Extinction'} - Unit of measurement for the data. + unit : {'Number', 'Surface', 'Volume', 'Extinction'} + Unit of measurement for the data. - cmap : str, default='Blues' - The colormap to use for the facecolors. + cmap : str, default='Blues' + The colormap to use for the facecolors. - ax : AxesSubplot, optional - Matplotlib AxesSubplot. If not provided, a new subplot will be created. - **kwargs - Additional keyword arguments to customize the plot. + ax : AxesSubplot, optional + Matplotlib AxesSubplot. If not provided, a new subplot will be created. + **kwargs + Additional keyword arguments to customize the plot. - Returns - ------- - Axes - Matplotlib Axes object representing the 3D plot. + Returns + ------- + Axes + Matplotlib Axes object representing the 3D plot. - Notes - ----- - - The function creates a 3D plot with data provided in a pandas DataFrame or numpy array. - - The x-axis is logarithmically scaled, and ticks and labels are formatted accordingly. - - Additional customization can be done using the **kwargs. + Notes + ----- + - The function creates a 3D plot with data provided in a pandas DataFrame or numpy array. + - The x-axis is logarithmically scaled, and ticks and labels are formatted accordingly. + - Additional customization can be done using the **kwargs. - Example - ------- - >>> three_dimension(DataFrame(...), unit='Number', cmap='Blues') - """ - fig, ax = plt.subplots(figsize=(4, 4), subplot_kw={"projection": "3d"}, - **kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + Example + ------- + >>> three_dimension(DataFrame(...), unit='Number', cmap='Blues') + """ + fig, ax = plt.subplots(figsize=(4, 4), subplot_kw={"projection": "3d"}, + **kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - dp = np.array(['11.7', *data.columns, '2437.4'], dtype=float) - lines = data.shape[0] + dp = np.array(['11.7', *data.columns, '2437.4'], dtype=float) + lines = data.shape[0] - _X, _Y = np.meshgrid(np.log(dp), np.arange(lines)) - _Z = np.pad(data, ((0, 0), (1, 1)), 'constant') + _X, _Y = np.meshgrid(np.log(dp), np.arange(lines)) + _Z = np.pad(data, ((0, 0), (1, 1)), 'constant') - verts = [] - for i in range(_X.shape[0]): - verts.append(list(zip(_X[i, :], _Z[i, :]))) + verts = [] + for i in range(_X.shape[0]): + verts.append(list(zip(_X[i, :], _Z[i, :]))) - facecolors = plt.colormaps[cmap](np.linspace(0, 1, len(verts))) - poly = PolyCollection(verts, facecolors=facecolors, edgecolors='k', lw=0.5, alpha=.7) - ax.add_collection3d(poly, zs=range(1, lines + 1), zdir='y') + facecolors = plt.colormaps[cmap](np.linspace(0, 1, len(verts))) + poly = PolyCollection(verts, facecolors=facecolors, edgecolors='k', lw=0.5, alpha=.7) + ax.add_collection3d(poly, zs=range(1, lines + 1), zdir='y') - ax.set(xlim=(np.log(11.7), np.log(2437.4)), ylim=(1, lines), zlim=(0, np.nanmax(_Z)), - xlabel='$D_{p} (nm)$', ylabel='Class', zlabel=Unit(f'{unit}_dist')) + ax.set(xlim=(np.log(11.7), np.log(2437.4)), ylim=(1, lines), zlim=(0, np.nanmax(_Z)), + xlabel='$D_{p} (nm)$', ylabel='Class', zlabel=Unit(f'{unit}_dist')) - ax.set_xticks(np.log([10, 100, 1000])) - ax.set_xticks(np.log([20, 30, 40, 50, 60, 70, 80, 90, 200, 300, 400, 500, 600, 700, 800, 900, 2000]), minor=True) - ax.xaxis.set_major_formatter(FuncFormatter((lambda tick, pos: "{:.0f}".format(np.exp(tick))))) - ax.ticklabel_format(axis='z', style='sci', scilimits=(0, 3), useMathText=True) + ax.set_xticks(np.log([10, 100, 1000])) + ax.set_xticks(np.log([20, 30, 40, 50, 60, 70, 80, 90, 200, 300, 400, 500, 600, 700, 800, 900, 2000]), minor=True) + ax.xaxis.set_major_formatter(FuncFormatter((lambda tick, pos: "{:.0f}".format(np.exp(tick))))) + ax.ticklabel_format(axis='z', style='sci', scilimits=(0, 3), useMathText=True) - ax.zaxis.get_offset_text().set_visible(False) - exponent = np.floor(np.log10(np.nanmax(data))).astype(int) - ax.text(ax.get_xlim()[1] * 1.05, ax.get_ylim()[1], ax.get_zlim()[1] * 1.1, s=fr'${{\times}}\ 10^{exponent}$') + ax.zaxis.get_offset_text().set_visible(False) + exponent = np.floor(np.log10(np.nanmax(data))).astype(int) + ax.text(ax.get_xlim()[1] * 1.05, ax.get_ylim()[1], ax.get_zlim()[1] * 1.1, s=fr'${{\times}}\ 10^{exponent}$') - plt.show() + plt.show() - return fig, ax + return fig, ax @set_figure def curve_fitting(dp: np.ndarray, - dist: np.ndarray | Series | DataFrame, - mode: int = None, - unit: Literal["Number", "Surface", "Volume", "Extinction"] = None, - ax: Axes | None = None, - **kwargs - ) -> tuple[Figure, Axes]: - """ - Fit a log-normal distribution to the given data and plot the result. + dist: np.ndarray | Series | DataFrame, + mode: int = None, + unit: Literal["Number", "Surface", "Volume", "Extinction"] = None, + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Fit a log-normal distribution to the given data and plot the result. - Parameters - ---------- - - dp (array): Array of diameter values. - - dist (array): Array of distribution values corresponding to each diameter. - - mode (int, optional): Number of log-normal distribution to fit (default is None). - - **kwargs: Additional keyword arguments to be passed to the plot_function. + Parameters + ---------- + - dp (array): Array of diameter values. + - dist (array): Array of distribution values corresponding to each diameter. + - mode (int, optional): Number of log-normal distribution to fit (default is None). + - **kwargs: Additional keyword arguments to be passed to the plot_function. - Returns - ------- - None + Returns + ------- + None - Notes - ----- - - The function fits a sum of log-normal distribution to the input data. - - The number of distribution is determined by the 'mode' parameter. - - Additional plotting customization can be done using the **kwargs. + Notes + ----- + - The function fits a sum of log-normal distribution to the input data. + - The number of distribution is determined by the 'mode' parameter. + - Additional plotting customization can be done using the **kwargs. - Example - ------- - >>> curve_fitting(dp, dist, mode=2, xlabel="Diameter (nm)", ylabel="Distribution") - """ - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + Example + ------- + >>> curve_fitting(dp, dist, mode=2, xlabel="Diameter (nm)", ylabel="Distribution") + """ + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - # Calculate total number concentration and normalize distribution - total_num = np.sum(dist * log(dp)) - norm_data = dist / total_num + # Calculate total number concentration and normalize distribution + total_num = np.sum(dist * log(dp)) + norm_data = dist / total_num - def lognorm_func(x, *params): - num_distributions = len(params) // 3 - result = np.zeros_like(x) + def lognorm_func(x, *params): + num_distributions = len(params) // 3 + result = np.zeros_like(x) - for i in range(num_distributions): - offset = i * 3 - _number, _geomean, _geostd = params[offset: offset + 3] + for i in range(num_distributions): + offset = i * 3 + _number, _geomean, _geostd = params[offset: offset + 3] - result += (_number / (log(_geostd) * sqrt(2 * pi)) * - exp(-(log(x) - log(_geomean)) ** 2 / (2 * log(_geostd) ** 2))) + result += (_number / (log(_geostd) * sqrt(2 * pi)) * + exp(-(log(x) - log(_geomean)) ** 2 / (2 * log(_geostd) ** 2))) - return result + return result - # initial gauss - min_value = np.array([min(dist)]) - extend_ser = np.concatenate([min_value, dist, min_value]) - _mode, _ = find_peaks(extend_ser, distance=20) - peak = dp[_mode - 1] - mode = mode or len(peak) + # initial gauss + min_value = np.array([min(dist)]) + extend_ser = np.concatenate([min_value, dist, min_value]) + _mode, _ = find_peaks(extend_ser, distance=20) + peak = dp[_mode - 1] + mode = mode or len(peak) - # 初始參數猜測 - initial_guess = [0.05, 20., 2.] * mode + # 初始參數猜測 + initial_guess = [0.05, 20., 2.] * mode - # 設定參數範圍 - bounds = ([1e-6, 10, 1] * mode, [1, 3000, 8] * mode) + # 設定參數範圍 + bounds = ([1e-6, 10, 1] * mode, [1, 3000, 8] * mode) - # 使用 curve_fit 函數進行擬合 - result = curve_fit(lognorm_func, dp, norm_data, p0=initial_guess, bounds=bounds) + # 使用 curve_fit 函數進行擬合 + result = curve_fit(lognorm_func, dp, norm_data, p0=initial_guess, bounds=bounds) - # 獲取擬合的參數 - params = result[0].tolist() + # 獲取擬合的參數 + params = result[0].tolist() - print('\n' + "Fitting Results:") - table = [] + print('\n' + "Fitting Results:") + table = [] - for i in range(mode): - offset = i * 3 - num, mu, sigma = params[offset:offset + 3] - table.append([f'log-{i + 1}', f"{num * total_num:.3f}", f"{mu:.3f}", f"{sigma:.3f}"]) + for i in range(mode): + offset = i * 3 + num, mu, sigma = params[offset:offset + 3] + table.append([f'log-{i + 1}', f"{num * total_num:.3f}", f"{mu:.3f}", f"{sigma:.3f}"]) - # 使用 tabulate 來建立表格並印出 - print(tabulate(table, headers=["log-", "number", "mu", "sigma"], floatfmt=".3f", tablefmt="fancy_grid")) + # 使用 tabulate 來建立表格並印出 + print(tabulate(table, headers=["log-", "number", "mu", "sigma"], floatfmt=".3f", tablefmt="fancy_grid")) - fit_curve = total_num * lognorm_func(dp, *params) + fit_curve = total_num * lognorm_func(dp, *params) - plt.plot(dp, fit_curve, color='#c41b1b', label='Fitting curve', lw=2.5) - plt.plot(dp, dist, color='b', label='Observed curve', lw=2.5) + plt.plot(dp, fit_curve, color='#c41b1b', label='Fitting curve', lw=2.5) + plt.plot(dp, dist, color='b', label='Observed curve', lw=2.5) - ax.set(xlim=(dp.min(), dp.max()), ylim=(0, None), xscale='log', - xlabel=r'$\bf D_{p}\ (nm)$', ylabel=Unit(f'{unit}_dist'), title=kwargs.get('title')) + ax.set(xlim=(dp.min(), dp.max()), ylim=(0, None), xscale='log', + xlabel=r'$\bf D_{p}\ (nm)$', ylabel=Unit(f'{unit}_dist'), title=kwargs.get('title')) - plt.grid(color='k', axis='x', which='major', linestyle='dashdot', linewidth=0.4, alpha=0.4) - ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3), useMathText=True) - ax.legend(prop={'weight': 'bold'}) + plt.grid(color='k', axis='x', which='major', linestyle='dashdot', linewidth=0.4, alpha=0.4) + ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 3), useMathText=True) + ax.legend(prop={'weight': 'bold'}) - plt.show(block=True) + plt.show(block=True) - return fig, ax + return fig, ax @set_figure def ls_mode(**kwargs) -> tuple[Figure, Axes]: - """ - Plot log-normal mass size distribution for small mode, large mode, and sea salt particles. + """ + Plot log-normal mass size distribution for small mode, large mode, and sea salt particles. - Parameters - ---------- - **kwargs : dict - Additional keyword arguments. + Parameters + ---------- + **kwargs : dict + Additional keyword arguments. - Examples - -------- - Example : Plot log-normal mass size distribution with default settings - >>> ls_mode() - """ + Examples + -------- + Example : Plot log-normal mass size distribution with default settings + >>> ls_mode() + """ - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) - geoMean = [0.2, 0.5, 2.5] - geoStdv = [2.2, 1.5, 2.0] - color = ['g', 'r', 'b'] - label = [r'$\bf Small\ mode\ :D_{g}\ =\ 0.2\ \mu m,\ \sigma_{{g}}\ =\ 2.2$', - r'$\bf Large\ mode\ :D_{g}\ =\ 0.5\ \mu m,\ \sigma_{{g}}\ =\ 1.5$', - r'$\bf Sea\ salt\ :D_{g}\ =\ 2.5\ \mu m,\ \sigma_{{g}}\ =\ 2.0$'] + geoMean = [0.2, 0.5, 2.5] + geoStdv = [2.2, 1.5, 2.0] + color = ['g', 'r', 'b'] + label = [r'$\bf Small\ mode\ :D_{g}\ =\ 0.2\ \mu m,\ \sigma_{{g}}\ =\ 2.2$', + r'$\bf Large\ mode\ :D_{g}\ =\ 0.5\ \mu m,\ \sigma_{{g}}\ =\ 1.5$', + r'$\bf Sea\ salt\ :D_{g}\ =\ 2.5\ \mu m,\ \sigma_{{g}}\ =\ 2.0$'] - x = np.geomspace(0.001, 20, 10000) - for _gmd, _gsd, _color, _label in zip(geoMean, geoStdv, color, label): - lognorm = 1 / (log(_gsd) * sqrt(2 * pi)) * (exp(-(log(x) - log(_gmd)) ** 2 / (2 * log(_gsd) ** 2))) + x = np.geomspace(0.001, 20, 10000) + for _gmd, _gsd, _color, _label in zip(geoMean, geoStdv, color, label): + lognorm = 1 / (log(_gsd) * sqrt(2 * pi)) * (exp(-(log(x) - log(_gmd)) ** 2 / (2 * log(_gsd) ** 2))) - ax.semilogx(x, lognorm, color=_color, label=_label) - ax.fill_between(x, lognorm, 0, where=(lognorm > 0), color=_color, alpha=0.3, label='__nolegend__') + ax.semilogx(x, lognorm, color=_color, label=_label) + ax.fill_between(x, lognorm, 0, where=(lognorm > 0), color=_color, alpha=0.3, label='__nolegend__') - ax.set(xlim=(0.001, 20), ylim=(0, None), xscale='log', xlabel=r'$\bf D_{p}\ (nm)$', - ylabel=r'$\bf Probability\ (dM/dlogdp)$', title=r'Log-normal Mass Size Distribution') + ax.set(xlim=(0.001, 20), ylim=(0, None), xscale='log', xlabel=r'$\bf D_{p}\ (nm)$', + ylabel=r'$\bf Probability\ (dM/dlogdp)$', title=r'Log-normal Mass Size Distribution') - ax.grid(color='k', axis='x', which='major', linestyle='dashdot', linewidth=0.4, alpha=0.4) - ax.legend(prop={'weight': 'bold'}) + ax.grid(color='k', axis='x', which='major', linestyle='dashdot', linewidth=0.4, alpha=0.4) + ax.legend(prop={'weight': 'bold'}) - plt.show() + plt.show() - return fig, ax + return fig, ax @set_figure def lognorm_dist(**kwargs) -> tuple[Figure, Axes]: - # - """ - Plot various particle size distribution to illustrate log-normal distribution and transformations. + # + """ + Plot various particle size distribution to illustrate log-normal distribution and transformations. - Parameters - ---------- - **kwargs : dict - Additional keyword arguments. + Parameters + ---------- + **kwargs : dict + Additional keyword arguments. - Examples - -------- - Example : Plot default particle size distribution - >>> lognorm_dist() - """ + Examples + -------- + Example : Plot default particle size distribution + >>> lognorm_dist() + """ - fig, ax = plt.subplots(2, 2, **kwargs.get('fig_kws', {})) - ([ax1, ax2], [ax3, ax4]) = ax - fig.suptitle('Particle Size Distribution', fontweight='bold') - plt.subplots_adjust(left=0.125, right=0.925, bottom=0.1, top=0.93, wspace=0.4, hspace=0.4) + fig, ax = plt.subplots(2, 2, **kwargs.get('fig_kws', {})) + ([ax1, ax2], [ax3, ax4]) = ax + fig.suptitle('Particle Size Distribution', fontweight='bold') + plt.subplots_adjust(left=0.125, right=0.925, bottom=0.1, top=0.93, wspace=0.4, hspace=0.4) - # pdf - normpdf = lambda x, mu, sigma: (1 / (sigma * sqrt(2 * pi))) * exp(-(x - mu) ** 2 / (2 * sigma ** 2)) - lognormpdf = lambda x, gmean, gstd: (1 / (log(gstd) * sqrt(2 * pi))) * exp( - -(log(x) - log(gmean)) ** 2 / (2 * log(gstd) ** 2)) - lognormpdf2 = lambda x, gmean, gstd: (1 / (x * log(gstd) * sqrt(2 * pi))) * exp( - -(log(x) - log(gmean)) ** 2 / (2 * log(gstd) ** 2)) + # pdf + normpdf = lambda x, mu, sigma: (1 / (sigma * sqrt(2 * pi))) * exp(-(x - mu) ** 2 / (2 * sigma ** 2)) + lognormpdf = lambda x, gmean, gstd: (1 / (log(gstd) * sqrt(2 * pi))) * exp( + -(log(x) - log(gmean)) ** 2 / (2 * log(gstd) ** 2)) + lognormpdf2 = lambda x, gmean, gstd: (1 / (x * log(gstd) * sqrt(2 * pi))) * exp( + -(log(x) - log(gmean)) ** 2 / (2 * log(gstd) ** 2)) - # 生成x - x = np.linspace(-10, 10, 1000) - x2 = np.geomspace(0.01, 100, 1000) + # 生成x + x = np.linspace(-10, 10, 1000) + x2 = np.geomspace(0.01, 100, 1000) - # Question 1 - # 若對數常態分布x有gmd=3, gstd=2,ln(x) ~ 常態分佈,試問其分布的平均值與標準差?? Y ~ N(mu=log(gmean), sigma=log(gstd)) - data1 = lognorm(scale=3, s=log(2)).rvs(size=5000) + # Question 1 + # 若對數常態分布x有gmd=3, gstd=2,ln(x) ~ 常態分佈,試問其分布的平均值與標準差?? Y ~ N(mu=log(gmean), sigma=log(gstd)) + data1 = lognorm(scale=3, s=log(2)).rvs(size=5000) - # Question 2 - # 若常態分布x有平均值3 標準差1,exp(x)則為一對數常態分佈? 由對數常態分佈的定義 若隨機變數ln(Z)是常態分布 則Z為對數常態分布 - # 因此已知Z = exp(x), so ln(Z)=x,Z ~ 對數常態分佈,試問其分布的幾何平均值與幾何標準差是?? Z ~ LN(geoMean=exp(mu), geoStd=exp(sigma)) - data2 = norm(loc=3, scale=1).rvs(size=5000) + # Question 2 + # 若常態分布x有平均值3 標準差1,exp(x)則為一對數常態分佈? 由對數常態分佈的定義 若隨機變數ln(Z)是常態分布 則Z為對數常態分布 + # 因此已知Z = exp(x), so ln(Z)=x,Z ~ 對數常態分佈,試問其分布的幾何平均值與幾何標準差是?? Z ~ LN(geoMean=exp(mu), geoStd=exp(sigma)) + data2 = norm(loc=3, scale=1).rvs(size=5000) - def plot_distribution(ax, x, pdf, color='k-', xscale='linear'): - ax.plot(x, pdf, color) - ax.set(xlabel='Particle Size (micron)', ylabel='Probability Density', xlim=(x.min(), x.max()), xscale=xscale) + def plot_distribution(ax, x, pdf, color='k-', xscale='linear'): + ax.plot(x, pdf, color) + ax.set(xlabel='Particle Size (micron)', ylabel='Probability Density', xlim=(x.min(), x.max()), xscale=xscale) - # 繪製粒徑分布 - plot_distribution(ax1, x, normpdf(x, mu=0, sigma=2)) + # 繪製粒徑分布 + plot_distribution(ax1, x, normpdf(x, mu=0, sigma=2)) - plot_distribution(ax2, x2, lognormpdf(x2, gmean=0.8, gstd=1.5), 'g-', xscale='log') - plot_distribution(ax2, x2, lognormpdf2(x2, gmean=0.8, gstd=1.5), 'r--', xscale='log') - plot_distribution(ax2, x2, lognorm(scale=0.8, s=log(1.5)).pdf(x2), 'b--', xscale='log') + plot_distribution(ax2, x2, lognormpdf(x2, gmean=0.8, gstd=1.5), 'g-', xscale='log') + plot_distribution(ax2, x2, lognormpdf2(x2, gmean=0.8, gstd=1.5), 'r--', xscale='log') + plot_distribution(ax2, x2, lognorm(scale=0.8, s=log(1.5)).pdf(x2), 'b--', xscale='log') - plot_distribution(ax3, x, normpdf(x, mu=log(3), sigma=log(2)), 'k-') - ax3.hist(log(data1), bins=100, density=True, alpha=0.6, color='g') + plot_distribution(ax3, x, normpdf(x, mu=log(3), sigma=log(2)), 'k-') + ax3.hist(log(data1), bins=100, density=True, alpha=0.6, color='g') - plot_distribution(ax4, x2, lognormpdf2(x2, gmean=exp(3), gstd=exp(1)), 'r-', xscale='log') - ax4.hist(exp(data2), bins=100, density=True, alpha=0.6, color='g') + plot_distribution(ax4, x2, lognormpdf2(x2, gmean=exp(3), gstd=exp(1)), 'r-', xscale='log') + ax4.hist(exp(data2), bins=100, density=True, alpha=0.6, color='g') - plt.show() + plt.show() - return fig, ax + return fig, ax if __name__ == '__main__': - lognorm_dist() + lognorm_dist() diff --git a/AeroViz/plot/improve/__init__.py b/AeroViz/plot/improve/__init__.py deleted file mode 100644 index a054a2c..0000000 --- a/AeroViz/plot/improve/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .improve import * diff --git a/AeroViz/plot/improve/improve.py b/AeroViz/plot/improve/improve.py deleted file mode 100644 index 8867f98..0000000 --- a/AeroViz/plot/improve/improve.py +++ /dev/null @@ -1,240 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from matplotlib.pyplot import Figure, Axes -from pandas import DataFrame, read_json -from scipy.optimize import curve_fit -from pathlib import Path - -from AeroViz import plot -from AeroViz.plot.utils import * -from AeroViz.tools import DataBase, DataReader, DataClassifier - -# TODO: this file has to be reorganized - -__all__ = ['chemical_enhancement', - 'ammonium_rich', - 'pie_IMPROVE', - 'MLR_IMPROVE', - 'fRH_plot', - ] - - -@set_figure -def chemical_enhancement(data_set: DataFrame = None, - data_std: DataFrame = None, - ax: Axes | None = None, - **kwargs - ) -> tuple[Figure, Axes]: - fig, ax = plt.subplots() if ax is None else (ax.get_figure(), ax) - - ser_grp_sta, ser_grp_sta_std = DataClassifier(DataBase('/Users/chanchihyu/NTU/2020能見度計畫/data/All_data.csv'), - by='State') - species = ['AS', 'AN', 'POC', 'SOC', 'Soil', 'SS', 'EC', 'ALWC'] - data_set, data_std = ser_grp_sta.loc[:, species], ser_grp_sta_std.loc[:, species] - - width = 0.20 - block = width / 4 - - x = np.array([1, 2, 3, 4, 5, 6, 7]) - for i, state in enumerate(['Clean', 'Transition', 'Event']): - val = np.array(data_set.iloc[i, :-1]) - std = (0,) * 7, np.array(data_std.iloc[i, :-1]) - - plt.bar(x + (i + 1) * (width + block), val, yerr=std, width=width, color=Color.colors3[:-1], - alpha=0.6 + (0.2 * i), - edgecolor=None, capsize=None, label=state) - - ax.set(xlabel=r'$\bf Chemical\ species$', - ylabel=r'$\bf Mass\ concentration\ ({\mu}g/m^3)$', - xticks=x + 2 * (width + block), - xticklabels=species, - ylim=(0, 25), - title=r'$\bf Chemical\ enhancement$') - - ax.vlines(8, 0, 25, linestyles='--', colors='k') - - ax2 = ax.twinx() - for i, state in enumerate(['Clean', 'Transition', 'Event']): - val = np.array(data_set.iloc[i, -1]) - std = np.array([[0], [data_std.iloc[i, -1]]]) - plt.bar(8 + (i + 1) * (width + block), val, yerr=std, width=width, color='#96c8e6', - alpha=0.6 + (0.2 * i), edgecolor=None, capsize=None, label=state) - - ax2.set(ylabel=r'$\bf Mass\ concentration\ ({\mu}g/m^3)$', - ylim=(0, 100), - xticks=x + 2 * (width + block), - xticklabels=species - ) - - a = (np.array(data_set.loc['Event']) + np.array(data_set.loc['Transition'])) / 2 - b = (np.array(data_set.loc['Transition']) + np.array(data_set.loc['Clean'])) / 2 - c = np.array(data_set.loc['Event']) / np.array(data_set.loc['Transition']) - d = np.array(data_set.loc['Transition']) / np.array(data_set.loc['Clean']) - - for i, (posa, posb, vala, valb) in enumerate(zip(a, b, c, d)): - if i < 7: - ax.text(i + 1.5, posa, '{:.2f}'.format(vala), fontsize=6, weight='bold', zorder=1) - ax.text(i + 1.25, posb, '{:.2f}'.format(valb), fontsize=6, weight='bold', zorder=1) - else: - ax2.text(i + 1.5, posa, '{:.2f}'.format(vala), fontsize=6, weight='bold', zorder=1) - ax2.text(i + 1.25, posb, '{:.2f}'.format(valb), fontsize=6, weight='bold', zorder=1) - - plt.show() - - return fig, ax - - -@set_figure -def ammonium_rich(df: DataFrame, - **kwargs - ) -> tuple[Figure, Axes]: - df = df[['NH4+', 'SO42-', 'NO3-', 'PM25']].dropna().copy().div([18, 96, 62, 1]) - df['required_ammonium'] = df['NO3-'] + 2 * df['SO42-'] - - fig, ax = plt.subplots() - - scatter = ax.scatter(df['required_ammonium'].to_numpy(), df['NH4+'].to_numpy(), c=df['PM25'].to_numpy(), - vmin=0, vmax=70, cmap='jet', marker='o', s=10, alpha=1) - - ax.axline((0, 0), slope=1., color='k', lw=2, ls='--', alpha=0.5, label='1:1') - plt.text(0.97, 0.97, r'$\bf 1:1\ Line$', color='k', ha='right', va='top', transform=ax.transAxes) - - ax.set(xlim=(0, 1.2), - ylim=(0, 1.2), - xlabel=r'$\bf NO_{3}^{-}\ +\ 2\ \times\ SO_{4}^{2-}\ (mole\ m^{-3})$', - ylabel=r'$\bf NH_{4}^{+}\ (mole\ m^{-3})$', - title=kwargs.get('title', '')) - - color_bar = plt.colorbar(scatter, label=Unit('PM25'), extend='both') - - # fig.savefig(f'Ammonium_rich_{title}') - plt.show() - - return fig, ax - - -def pie_IMPROVE(): - Species1 = ['AS_ext_dry', 'AN_ext_dry', 'OM_ext_dry', 'Soil_ext_dry', 'SS_ext_dry', 'EC_ext_dry'] - Species2 = ['AS_ext_dry', 'AN_ext_dry', 'OM_ext_dry', 'Soil_ext_dry', 'SS_ext_dry', 'EC_ext_dry', 'ALWC_ext'] - Species3 = ['AS_ext', 'AN_ext', 'OM_ext', 'Soil_ext', 'SS_ext', 'EC_ext'] - - ser_grp_sta, _ = DataClassifier(DataBase(), by='State') - - ext_dry_dict = ser_grp_sta.loc[:, Species1] - ext_amb_dict = ser_grp_sta.loc[:, Species2] - ext_mix_dict = ser_grp_sta.loc[:, Species3] - - plot.donuts(data_set=ext_dry_dict, labels=['AS', 'AN', 'OM', 'Soil', 'SS', 'BC'], unit='Extinction') - plot.donuts(data_set=ext_mix_dict, labels=['AS', 'AN', 'OM', 'Soil', 'SS', 'BC'], unit='Extinction') - plot.donuts(data_set=ext_amb_dict, labels=['AS', 'AN', 'OM', 'Soil', 'SS', 'BC', 'ALWC'], - unit='Extinction', colors=Color.colors2) - - -def MLR_IMPROVE(**kwargs): - """ - Perform multiple linear regression analysis and generate plots based on IMPROVE dataset. - - Parameters - ---------- - **kwargs : dict - Additional keyword arguments for customization. - - Returns - ------- - None - - Examples - -------- - Example usage of MLR_IMPROVE function: - - >>> MLR_IMPROVE() - - Notes - ----- - This function performs multiple linear regression analysis on the IMPROVE dataset and generates plots for analysis. - - - The function first selects specific species from the dataset and drops NaN values. - - It calculates a 'Localized' value based on a multiplier and the sum of selected species. - - Data from 'modified_IMPROVE.csv' and 'revised_IMPROVE.csv' are read and concatenated with the dataset. - - Statistical analysis is performed using DataClassifier to calculate mean and standard deviation. - - Plots are generated using linear_regression for Extinction vs. Revised/Modified/Localized and Pie.donuts for a - pie chart showing the distribution of species based on Extinction. - - """ - species = ['Extinction', 'Scattering', 'Absorption', - 'total_ext_dry', 'AS_ext_dry', 'AN_ext_dry', 'OM_ext_dry', 'Soil_ext_dry', 'SS_ext_dry', 'EC_ext_dry', - 'AS', 'AN', 'POC', 'SOC', 'Soil', 'SS', 'EC', 'OM'] - - df = DataBase('/Users/chanchihyu/NTU/2020能見度計畫/data/All_data.csv')[species].dropna().copy() - - # multiple_linear_regression(df, x=['AS', 'AN', 'POC', 'SOC', 'Soil', 'SS'], y='Scattering', add_constant=True) - # multiple_linear_regression(df, x=['POC', 'SOC', 'EC'], y='Absorption', add_constant=True) - # multiple_linear_regression(df, x=['AS', 'AN', 'POC', 'SOC', 'Soil', 'SS', 'EC'], y='Extinction', add_constant=False) - - multiplier = [2.675, 4.707, 11.6, 7.272, 0, 0.131, 10.638] - df['Localized'] = df[['AS', 'AN', 'POC', 'SOC', 'Soil', 'SS', 'EC']].mul(multiplier).sum(axis=1) - # TODO: remove name - modify_IMPROVE = DataReader('modified_IMPROVE.csv')['total_ext_dry'].rename('Modified') - revised_IMPROVE = DataReader('revised_IMPROVE.csv')['total_ext_dry'].rename('Revised') - - df = pd.concat([df, revised_IMPROVE, modify_IMPROVE], axis=1) - - n_df = df[['AS', 'AN', 'POC', 'SOC', 'Soil', 'SS', 'EC']].mul(multiplier) - mean, std = DataClassifier(n_df, 'State') - - ser_grp_sta, _ = DataClassifier(DataBase(), by='State') - mass_comp = ser_grp_sta.loc[:, ['AS', 'AN', 'POC', 'SOC', 'Soil', 'SS', 'EC']] - - # plot - plot.linear_regression(df, x='Extinction', y=['Revised', 'Modified', 'Localized'], xlim=[0, 400], ylim=[0, 400], - regression=True, diagonal=True) - plot.donuts(data_set=mass_comp, labels=['AS', 'AN', 'POC', 'SOC', 'Soil', 'SS', 'EC'], - unit='PM25', colors=Color.colors3) - plot.donuts(mean, labels=['AS', 'AN', 'POC', 'SOC', 'Soil', 'SS', 'EC'], unit='Extinction', colors=Color.colors3) - - -@set_figure -def fRH_plot(**kwargs) -> tuple[Figure, Axes]: - frh = read_json(Path(__file__).parent.parent / 'utils' / 'fRH.json') - - def fitting_func(RH, a, b, c): - f = a + b * (RH / 100) ** c - return f - - x = frh.index.to_numpy() - y = frh['fRHs'].to_numpy() - - result = curve_fit(fitting_func, x, y) - params = result[0].tolist() - val_fit = fitting_func(x, *params) - - fig, ax = plt.subplots(figsize=(3, 3)) - - ax.plot(frh.index, frh['fRH'], 'k-o', ms=2, label='$f(RH)_{original}$') - ax.plot(frh.index, frh['fRHs'], 'g-o', ms=2, label='$f(RH)_{small\\ mode}$') - ax.plot(frh.index, frh['fRHl'], 'r-o', ms=2, label='$f(RH)_{large\\ mode}$') - ax.plot(frh.index, frh['fRHSS'], 'b-o', ms=2, label='$f(RH)_{sea\\ salt}$') - - ax.set(xlim=(0, 100), - ylim=(1, None), - xlabel='$RH (\\%)$', - ylabel='$f(RH)$', - title=f'$Hygroscopic\\ growth\\ factor$' - ) - - ax.grid(axis='y', color='gray', linestyle='dashed', linewidth=0.4, alpha=0.4) - - ax.legend() - - plt.show() - # fig.savefig('fRH_plot') - - return fig, ax - - -if __name__ == '__main__': - # chemical_enhancement() - # MLR_IMPROVE() - # ammonium_rich() - fRH_plot() diff --git a/AeroViz/plot/meteorology/CBPF.py b/AeroViz/plot/meteorology/CBPF.py new file mode 100644 index 0000000..193fb37 --- /dev/null +++ b/AeroViz/plot/meteorology/CBPF.py @@ -0,0 +1,295 @@ +import math + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from matplotlib.pyplot import Figure, Axes +from pandas import DataFrame, Series +from scipy.ndimage import gaussian_filter + +from AeroViz.plot.utils import * + +__all__ = ['CBPF'] + + +def improve_density_estimation(df, WS, WD, val, resolution=100, bandwidth=None): + """ + 改進的密度估計函數,使用KDE方法來產生更平滑的分布 + + Parameters: + ----------- + df : DataFrame + 包含風速風向數據的DataFrame + WS : str + 風速列名 + WD : str + 風向列名 + val : str + 要分析的變量列名 + resolution : int + 網格解析度 + bandwidth : float or tuple + KDE的頻寬參數,如果為None則自動選擇 + """ + from scipy.stats import gaussian_kde + import numpy as np + + # 轉換為笛卡爾坐標 + u = df[WS] * np.sin(np.radians(df[WD])) + v = df[WS] * np.cos(np.radians(df[WD])) + + # 創建網格 + u_range = np.linspace(u.min(), u.max(), resolution) + v_range = np.linspace(v.min(), v.max(), resolution) + U, V = np.meshgrid(u_range, v_range) + + # 準備KDE的位置 + positions = np.vstack([U.ravel(), V.ravel()]) + values = np.vstack([u, v]) + + # 使用KDE進行密度估計 + kernel = gaussian_kde(values, bw_method=bandwidth) + Z = np.reshape(kernel(positions), U.shape) + + # 將密度值歸一化到[0,1]區間 + Z = (Z - Z.min()) / (Z.max() - Z.min()) + + # 應用極坐標遮罩 + center_u = len(u_range) // 2 + center_v = len(v_range) // 2 + max_radius = min(center_u, center_v) + + Y, X = np.ogrid[-center_v:resolution - center_v, -center_u:resolution - center_u] + mask = X * X + Y * Y > max_radius * max_radius + Z[mask] = np.nan + + return Z, U, V + + +def smooth_and_clean(Z, smooth_radius=2, min_density=1): + """ + 平滑並清理密度圖,去除孤立點 + + Parameters: + ----------- + Z : ndarray + 密度估計結果 + smooth_radius : int + 平滑半徑 + min_density : float + 最小密度閾值 + """ + from scipy.ndimage import gaussian_filter + + # 先進行高斯平滑 + Z_smooth = gaussian_filter(Z, sigma=smooth_radius) + + # 去除低於閾值的點 + # Z_smooth[Z_smooth < min_density] = np.nan + + # 去除孤立點 + rows, cols = Z_smooth.shape + for i in range(rows): + for j in range(cols): + if not np.isnan(Z_smooth[i, j]): + # 檢查周圍點 + neighborhood = Z_smooth[ + max(0, i - smooth_radius):min(rows, i + smooth_radius + 1), + max(0, j - smooth_radius):min(cols, j + smooth_radius + 1) + ] + if np.count_nonzero(~np.isnan(neighborhood)) < 1: # 如果周圍有效點太少 + Z_smooth[i, j] = np.nan + + return Z_smooth + + +def is_within_circle(center_row, center_col, row, col, radius): + return np.sqrt((center_row - row) ** 2 + (center_col - col) ** 2) <= radius + + +def remove_lonely_point(filtered_histogram, radius=4, magic_num=13): + rows, cols = filtered_histogram.shape + data_positions = np.where(~np.isnan(filtered_histogram)) + + for row, col in zip(*data_positions): + valid_data_count = 0 + for i in range(max(0, row - radius), min(rows, row + radius + 1)): + for j in range(max(0, col - radius), min(cols, col + radius + 1)): + if (i, j) != (row, col) and is_within_circle(row, col, i, j, radius): + if not np.isnan(filtered_histogram[i, j]): + valid_data_count += 1 + + if valid_data_count <= magic_num: + filtered_histogram[row, col] = np.nan + + return filtered_histogram + + +def fill_nan_with_mean(filtered_histogram, radius=4, magic_num=13): + rows, cols = filtered_histogram.shape + nan_positions = np.where(np.isnan(filtered_histogram)) + + for row, col in zip(*nan_positions): + surrounding_values = [] + surrounding_values_within_one = [] + nan_count = 0 + + for i in range(max(0, row - radius), min(rows, row + radius + 1)): + for j in range(max(0, col - radius), min(cols, col + radius + 1)): + if (i, j) != (row, col) and is_within_circle(row, col, i, j, radius): + if np.isnan(filtered_histogram[i, j]): + nan_count += 1 + else: + surrounding_values.append(filtered_histogram[i, j]) + + for i in range(max(0, row - 2), min(rows, row + 2 + 1)): + for j in range(max(0, col - 2), min(cols, col + 2 + 1)): + if (i, j) != (row, col) and is_within_circle(row, col, i, j, 2): + if np.isnan(filtered_histogram[i, j]): + pass + else: + surrounding_values_within_one.append(filtered_histogram[i, j]) + + if nan_count < magic_num and surrounding_values_within_one: + filtered_histogram[row, col] = np.mean(surrounding_values) + + return filtered_histogram + + +# TODO: fix the bug of the CBPF function +@set_figure(figsize=(4.3, 4)) +def CBPF(df: DataFrame, + WS: Series | str, + WD: Series | str, + val: Series | str | None = None, + percentile: list | float | int | None = None, + max_ws: float | None = 5, + resolution: int = 50, + sigma: float | tuple = 2, + rlabel_pos: float = 30, + bottom_text: str | bool | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + # conditional bivariate probability function (cbpf) python + # https://davidcarslaw.github.io/openair/reference/polarPlot.html + # https://github.com/davidcarslaw/openair/blob/master/R/polarPlot.R + + df = df.dropna(subset=[WS, WD] + ([val] if val is not None else [])).copy() + + df['u'] = df[WS].to_numpy() * np.sin(np.radians(df[WD].to_numpy())) + df['v'] = df[WS].to_numpy() * np.cos(np.radians(df[WD].to_numpy())) + + u_bins = np.linspace(df.u.min(), df.u.max(), resolution) + v_bins = np.linspace(df.v.min(), df.v.max(), resolution) + + # 使用 u_group 和 v_group 進行分組 + df['u_group'] = pd.cut(df['u'], u_bins) + df['v_group'] = pd.cut(df['v'], v_bins) + grouped = df.groupby(['u_group', 'v_group'], observed=False) + + X, Y = np.meshgrid(u_bins, v_bins) + + # Note: + # The CBPF is the ratio between the number of points in each cell and the total number of points. + # So, it is not equal to the probability density function (PDF) of the wind speed and wind direction. + + if percentile is None: + histogram = (grouped[val].count() / grouped[val].count().sum()).unstack().values.T + # histogram, v_edges, u_edges = np.histogram2d(df.v, df.u, bins=(v_bins, u_bins)) + # histogram = histogram / histogram.sum() + histogram = np.where(histogram == 0, np.nan, histogram) + bottom_text = rf'$PDF\ plot$' + + else: + if not all(0 <= p <= 100 for p in (percentile if isinstance(percentile, list) else [percentile])): + raise ValueError("Percentile must be between 0 and 100") + + if isinstance(percentile, (float, int)): + bottom_text = rf'$CPF:\ >{int(percentile)}^{{th}}$' + thershold = df[val].quantile(percentile / 100) + cond = lambda x: (x >= thershold).sum() + + elif isinstance(percentile, list) and len(percentile) == 1: + # Extract the single element from the list + single_percentile = percentile[0] + bottom_text = rf'$CPF:\ >{int(single_percentile)}^{{th}}$' + threshold = df[val].quantile(single_percentile / 100) + cond = lambda x: (x >= threshold).sum() + + else: + bottom_text = rf'$CPF:\ {int(percentile[0])}^{{th}}\ to\ {int(percentile[1])}^{{th}}$' + thershold_small, thershold_large = df[val].quantile([percentile[0] / 100, percentile[1] / 100]) + cond = lambda x: ((x >= thershold_small) & (x < thershold_large)).sum() + + histogram = (grouped[val].apply(cond) / grouped[val].count()).unstack().values.T + + # if np.isnan(histogram).all(): + # raise "CBPF_array contains only NaN values." + # else: + # print(f"\nHistogram contains NaN before masking: {np.isnan(histogram).sum()}") + + histogram_filled = np.nan_to_num(histogram, nan=0) # 將 NaN 替換為 0 + + filtered_histogram = gaussian_filter(histogram_filled, sigma=sigma) + # filtered_histogram[np.isnan(histogram)] = np.nan + # breakpoint() + # filtered_histogram = smooth_and_clean(filtered_histogram) + + # Apply the function to your data + # fil_radius, magic_num = 3, 13 + # filtered_histogram = remove_lonely_point(filtered_histogram, fil_radius, magic_num) + # filtered_histogram = fill_nan_with_mean(filtered_histogram, fil_radius, magic_num) + + if np.all(np.isnan(filtered_histogram)): + raise ValueError("All values in the filtered histogram are NaN. Please decrease the resolution.") + + # plot + fig, ax = plt.subplots() + fig.subplots_adjust(left=0) + + surf = ax.pcolormesh(X, Y, filtered_histogram, shading='auto', cmap='jet', antialiased=True) + + max_ws = max_ws or np.concatenate((abs(df.u), abs(df.v))).max() # Get the maximum value of the wind speed + + radius_lst = np.arange(1, math.ceil(max_ws) + 1) # Create a list of radius + + for i, radius in enumerate(radius_lst): + circle = plt.Circle((0, 0), radius, fill=False, color='gray', linewidth=1, linestyle='--', alpha=0.5) + ax.add_artist(circle) + + for angle, label in zip(range(0, 360, 90), ["E", "N", "W", "S"]): + radian = np.radians(angle) + line_x, line_y = radius * np.cos(radian), radius * np.sin(radian) + + if i + 2 == len(radius_lst): # Add wind direction line and direction label at the edge of the circle + ax.plot([0, line_x * 1.05], [0, line_y * 1.05], color='k', linestyle='-', linewidth=1, alpha=0.5) + ax.text(line_x * 1.15, line_y * 1.15, label, ha='center', va='center') + + ax.text(radius * np.cos(np.radians(rlabel_pos)), radius * np.sin(np.radians(rlabel_pos)), + str(radius) + ' m/s', ha='center', va='center', fontsize=8) + + for radius in range(math.ceil(max_ws) + 1, 10): + circle = plt.Circle((0, 0), radius, fill=False, color='gray', linewidth=1, linestyle='--', alpha=0.5) + ax.add_artist(circle) + + ax.set(xlim=(-max_ws * 1.02, max_ws * 1.02), + ylim=(-max_ws * 1.02, max_ws * 1.02), + xticks=[], + yticks=[], + xticklabels=[], + yticklabels=[], + aspect='equal') + + if bottom_text: + ax.text(0.50, -0.05, bottom_text, fontweight='bold', fontsize=8, va='center', ha='center', + transform=ax.transAxes) + + ax.text(0.5, 1.05, Unit(val), fontweight='bold', fontsize=12, va='center', ha='center', transform=ax.transAxes) + + cbar = plt.colorbar(surf, ax=ax, label='Frequency', pad=0.01, fraction=0.04) + cbar.ax.yaxis.label.set_fontsize(8) + cbar.ax.tick_params(labelsize=8) + + plt.show() + + return fig, ax diff --git a/AeroViz/plot/meteorology/__init__.py b/AeroViz/plot/meteorology/__init__.py index d2a0d9f..de5ea1f 100644 --- a/AeroViz/plot/meteorology/__init__.py +++ b/AeroViz/plot/meteorology/__init__.py @@ -1 +1,3 @@ -from .meteorology import * +from .CBPF import CBPF +from .hysplit import hysplit +from .wind_rose import wind_rose diff --git a/AeroViz/plot/meteorology/hysplit.py b/AeroViz/plot/meteorology/hysplit.py new file mode 100644 index 0000000..395fe5b --- /dev/null +++ b/AeroViz/plot/meteorology/hysplit.py @@ -0,0 +1,90 @@ +from pathlib import Path + +import cartopy.crs as ccrs +import cartopy.feature as cfeature +import matplotlib.pyplot as plt +import pandas as pd + +from AeroViz.plot.utils import set_figure + +# Hybrid Single-Particle Lagrangian Integrated Trajectory (HYSPLIT) model + + +__all__ = ['hysplit'] + +# 設置默認文件路徑 +DEFAULT_FILE = Path(__file__).parent.parent.parent / 'data' / 'hysplit_example_data.txt' + + +def read_hysplit_data(file: Path): + data = pd.read_csv(file, skiprows=8, sep=r'\s+', names=range(0, 12), engine='python') + data = data.reset_index(drop=False) + data.columns = ['category', 'name', 'year', 'month', 'day', 'hour', 'minute', 'count', 'backward', 'lat', 'lon', + 'height', 'pressure'] + + time_cols = ['year', 'month', 'day', 'hour', 'minute'] + + data['time'] = pd.to_datetime(data[time_cols].astype(str).agg(''.join, axis=1), format='%y%m%d%H%M') + + data = data.drop(columns=time_cols) + + data = data[['time'] + [col for col in data.columns if col != 'time']] + + return data + + +@set_figure +def hysplit(file: Path = DEFAULT_FILE): + data = read_hysplit_data(file) + + # 創建地圖 + fig, ax = plt.subplots(figsize=(4, 5), subplot_kw={'projection': ccrs.PlateCarree()}) + + # 設置地圖範圍 + ax.set_extent([116, 126, 17, 30], crs=ccrs.PlateCarree()) + + # 添加自然地理特徵 + ax.add_feature(cfeature.LAND) + ax.add_feature(cfeature.OCEAN) + ax.add_feature(cfeature.COASTLINE) + ax.add_feature(cfeature.BORDERS, linestyle=':') + + # 添加經緯度網格 + ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False) + + # 定義四種顏色 + colors = ['red', 'blue', 'green', 'purple'] + + # 繪製四條軌跡線 + group = data.groupby('category') + for i, (name, _data) in enumerate(group): + trajectory = _data + ax.plot(trajectory['lon'], trajectory['lat'], color=colors[i], + linewidth=2, transform=ccrs.Geodetic(), + label=f'Trajectory {name}') + + # 添加起點和終點標記 + # ax.plot(trajectory['lon'].iloc[-1], trajectory['lat'].iloc[-1], 'o', + # color=colors[i], markersize=4, transform=ccrs.Geodetic()) + # ax.plot(trajectory['lon'].iloc[0], trajectory['lat'].iloc[0], 's', + # color=colors[i], markersize=4, transform=ccrs.Geodetic()) + + ax.legend(loc='upper right') + # 添加色標 + # cbar = plt.colorbar(scatter, ax=ax, shrink=0.6, pad=0.12) + # cbar.set_label('Height (m)') + + # 添加標題 + plt.title("HYSPLIT model", pad=12) + + plt.tight_layout() + + # 保存地圖 + plt.savefig('backward_hysplit.png', dpi=300, bbox_inches='tight') + + # 顯示地圖(可選) + plt.show() + + +if __name__ == "__main__": + hysplit() # 請替換為您的實際檔案路徑 diff --git a/AeroViz/plot/meteorology/meteorology.py b/AeroViz/plot/meteorology/meteorology.py deleted file mode 100644 index 4b408ac..0000000 --- a/AeroViz/plot/meteorology/meteorology.py +++ /dev/null @@ -1,317 +0,0 @@ -import math -from typing import Literal - -import matplotlib.colors as plc -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import seaborn as sns -import windrose -from matplotlib.pyplot import Figure, Axes -from pandas import DataFrame, Series -from scipy.ndimage import gaussian_filter - -from AeroViz.plot.utils import * - -__all__ = ['wind_tms', - 'wind_rose', - 'CBPF' - ] - - -@set_figure(fs=6) -def wind_tms(df: DataFrame, - WS: Series | str, - WD: Series | str, - **kwargs - ) -> tuple[Figure, Axes]: - def drawArrow(A, B, ax: plt.Axes): # 畫箭頭 - _ax = ax.twinx() - if A[0] == B[0] and A[1] == B[1]: # 靜風畫點 - _ax.plot(A[0], A[1], 'ko') - else: - _ax.annotate("", xy=(B[0], B[1]), xytext=(A[0], A[1]), arrowprops=dict(arrowstyle="->")) - - _ax.spines['left'].set_visible(False) - _ax.spines['right'].set_visible(False) - _ax.spines['top'].set_visible(False) - _ax.spines['bottom'].set_visible(False) - _ax.set_xlim(0, ) - _ax.set_ylim(0, 5) - _ax.get_yaxis().set_visible(False) - _ax.set_aspect('equal') # x轴y轴等比例 - - _ax.tick_params(axis='x', rotation=90) - ax.tick_params(axis='x', rotation=90) - plt.tight_layout() - - fig, ax = plt.subplots(figsize=(8, 2)) - uniform_data = [WS] - colors = ['lightskyblue', 'darkturquoise', 'lime', 'greenyellow', 'orangered', 'red'] - clrmap = plc.LinearSegmentedColormap.from_list("mycmap", colors) # 自定义色标 - sns.heatmap(uniform_data, square=True, annot=True, fmt=".2f", linewidths=.5, cmap=clrmap, - yticklabels=['Wind speed (m/s)'], xticklabels=kwargs.get('xticklabels', None), cbar=False, vmin=0, - vmax=5, ax=ax) - ax.set_xticklabels(ax.get_xticklabels(), rotation=90) - ax.set_yticklabels(ax.get_yticklabels(), rotation=0) - ax.spines['bottom'].set_position(('data', 1)) # 移动x轴 - - for idx, (x, value) in enumerate(WD.items()): - if not pd.isna(value): - a = np.array([0.5 + 0.5 * np.sin(value / 180 * np.pi) + idx, 3.5 + 0.5 * np.cos(value / 180 * np.pi)]) - b = np.array([0.5 - 0.5 * np.sin(value / 180 * np.pi) + idx, 3.5 - 0.5 * np.cos(value / 180 * np.pi)]) - drawArrow(a, b, ax) - else: - a = np.array([0.5 + idx, 3.5]) - drawArrow(a, a, ax) - - plt.show() - - return fig, ax - - -@set_figure(figsize=(4.3, 4)) -def wind_rose(df: DataFrame, - WS: Series | str, - WD: Series | str, - val: Series | str | None = None, - typ: Literal['bar', 'scatter'] = 'scatter', - rlabel_pos: float = 30, - **kwargs - ) -> tuple[Figure, Axes]: - # conditional bivariate probability function (cbpf) python - # https://davidcarslaw.github.io/openair/reference/polarPlot.html - # https://github.com/davidcarslaw/openair/blob/master/R/polarPlot.R - windrose.WindroseAxes._info = 'WindroseAxes' - - df = df.dropna(subset=[WS, WD] + ([val] if val is not None else [])) - - radius = df[WS].to_numpy() - theta = df[WD].to_numpy() - radian = np.radians(theta) - values = df[val].to_numpy() if val is not None else None - - # In this case, the windrose is a simple frequency diagram, - # the function automatically calculates the radians of the given wind direction. - if typ == 'bar': - fig, ax = plt.subplots(figsize=(5.5, 4), subplot_kw={'projection': 'windrose'}) - fig.subplots_adjust(left=0) - - ax.bar(theta, radius, bins=[0, 1, 2, 3], normed=True, colors=['#0F1035', '#365486', '#7FC7D9', '#DCF2F1']) - ax.set( - ylim=(0, 30), - yticks=[0, 15, 30], - yticklabels=['', '15 %', '30 %'], - rlabel_position=rlabel_pos - ) - ax.set_thetagrids(angles=[0, 45, 90, 135, 180, 225, 270, 315], - labels=["E", "NE", "N", "NW", "W", "SW", "S", "SE"]) - - ax.legend(units='m/s', bbox_to_anchor=[1.1, 0.5], loc='center left', ncol=1) - - # In this case, the windrose is a scatter plot, - # in contrary, this function does not calculate the radians, so user have to input the radian. - else: - fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={'projection': 'windrose'}) - fig.subplots_adjust(left=0) - - scatter = ax.scatter(radian, radius, s=15, c=values, vmax=np.quantile(values, 0.90), edgecolors='none', - cmap='jet', alpha=0.8) - ax.set( - ylim=(0, 7), - yticks=[1, 3, 5, 7], - yticklabels=['1 m/s', '3 m/s', '5 m/s', '7 m/s'], - rlabel_position=rlabel_pos, - theta_direction=-1, - theta_zero_location='N', - ) - ax.set_thetagrids(angles=[0, 45, 90, 135, 180, 225, 270, 315], - labels=["N", "NE", "E", "SE", "S", "SW", "W", "NW"]) - - plt.colorbar(scatter, ax=ax, label=Unit(val), pad=0.1, fraction=0.04) - - plt.show() - - return fig, ax - - -@set_figure(figsize=(4.3, 4)) -def CBPF(df: DataFrame, - WS: Series | str, - WD: Series | str, - val: Series | str | None = None, - percentile: list | float | int | None = None, - max_ws: float | None = 5, - resolution: int = 100, - sigma: float | tuple = 2, - rlabel_pos: float = 30, - bottom_text: str | bool | None = None, - **kwargs - ) -> tuple[Figure, Axes]: - # conditional bivariate probability function (cbpf) python - # https://davidcarslaw.github.io/openair/reference/polarPlot.html - # https://github.com/davidcarslaw/openair/blob/master/R/polarPlot.R - - df = df.dropna(subset=[WS, WD] + ([val] if val is not None else [])).copy() - - df['u'] = df[WS].to_numpy() * np.sin(np.radians(df[WD].to_numpy())) - df['v'] = df[WS].to_numpy() * np.cos(np.radians(df[WD].to_numpy())) - - u_bins = np.linspace(df.u.min(), df.u.max(), resolution) - v_bins = np.linspace(df.v.min(), df.v.max(), resolution) - - # 使用 u_group 和 v_group 進行分組 - df['u_group'] = pd.cut(df['u'], u_bins) - df['v_group'] = pd.cut(df['v'], v_bins) - grouped = df.groupby(['u_group', 'v_group'], observed=False) - - X, Y = np.meshgrid(u_bins, v_bins) - - # Note: - # The CBPF is the ratio between the number of points in each cell and the total number of points. - # So, it is not equal to the probability density function (PDF) of the wind speed and wind direction. - - if percentile is None: - histogram = (grouped[val].count() / grouped[val].count().sum()).unstack().values.T - # histogram, v_edges, u_edges = np.histogram2d(df.v, df.u, bins=(v_bins, u_bins)) - # histogram = histogram / histogram.sum() - histogram = np.where(histogram == 0, np.nan, histogram) - bottom_text = rf'$PDF\ plot$' - - else: - if not all(0 <= p <= 100 for p in (percentile if isinstance(percentile, list) else [percentile])): - raise ValueError("Percentile must be between 0 and 100") - - if isinstance(percentile, (float, int)): - bottom_text = rf'$CPF:\ >{int(percentile)}^{{th}}$' - thershold = df[val].quantile(percentile / 100) - cond = lambda x: (x >= thershold).sum() - - elif isinstance(percentile, list) and len(percentile) == 1: - # Extract the single element from the list - single_percentile = percentile[0] - bottom_text = rf'$CPF:\ >{int(single_percentile)}^{{th}}$' - threshold = df[val].quantile(single_percentile / 100) - cond = lambda x: (x >= threshold).sum() - - else: - bottom_text = rf'$CPF:\ {int(percentile[0])}^{{th}}\ to\ {int(percentile[1])}^{{th}}$' - thershold_small, thershold_large = df[val].quantile([percentile[0] / 100, percentile[1] / 100]) - cond = lambda x: ((x >= thershold_small) & (x < thershold_large)).sum() - - histogram = (grouped[val].apply(cond) / grouped[val].count()).unstack().values.T - - # if np.isnan(histogram).all(): - # raise "CBPF_array contains only NaN values." - # else: - # print(f"\nHistogram contains NaN before masking: {np.isnan(histogram).sum()}") - - histogram_filled = np.nan_to_num(histogram, nan=0) # 將 NaN 替換為 0 - - filtered_histogram = gaussian_filter(histogram_filled, sigma=sigma) - filtered_histogram[np.isnan(histogram)] = np.nan - - def is_within_circle(center_row, center_col, row, col, radius): - return np.sqrt((center_row - row) ** 2 + (center_col - col) ** 2) <= radius - - def remove_lonely_point(filtered_histogram, radius=3): - rows, cols = filtered_histogram.shape - data_positions = np.where(~np.isnan(filtered_histogram)) - - for row, col in zip(*data_positions): - valid_data_count = 0 - for i in range(max(0, row - radius), min(rows, row + radius + 1)): - for j in range(max(0, col - radius), min(cols, col + radius + 1)): - if (i, j) != (row, col) and is_within_circle(row, col, i, j, radius): - if not np.isnan(filtered_histogram[i, j]): - valid_data_count += 1 - - if valid_data_count <= 13: - filtered_histogram[row, col] = np.nan - - return filtered_histogram - - def fill_nan_with_mean(filtered_histogram, radius=3): - rows, cols = filtered_histogram.shape - nan_positions = np.where(np.isnan(filtered_histogram)) - - for row, col in zip(*nan_positions): - surrounding_values = [] - surrounding_values_within_one = [] - nan_count = 0 - - for i in range(max(0, row - radius), min(rows, row + radius + 1)): - for j in range(max(0, col - radius), min(cols, col + radius + 1)): - if (i, j) != (row, col) and is_within_circle(row, col, i, j, radius): - if np.isnan(filtered_histogram[i, j]): - nan_count += 1 - else: - surrounding_values.append(filtered_histogram[i, j]) - - for i in range(max(0, row - 2), min(rows, row + 2 + 1)): - for j in range(max(0, col - 2), min(cols, col + 2 + 1)): - if (i, j) != (row, col) and is_within_circle(row, col, i, j, 2): - if np.isnan(filtered_histogram[i, j]): - pass - else: - surrounding_values_within_one.append(filtered_histogram[i, j]) - - if nan_count < 13 and surrounding_values_within_one: - filtered_histogram[row, col] = np.mean(surrounding_values) - - return filtered_histogram - - # Apply the function to your data - filtered_histogram = remove_lonely_point(filtered_histogram) - filtered_histogram = fill_nan_with_mean(filtered_histogram) - - # plot - fig, ax = plt.subplots() - fig.subplots_adjust(left=0) - - surf = ax.pcolormesh(X, Y, filtered_histogram, shading='auto', cmap='jet', antialiased=True) - - max_ws = max_ws or np.concatenate((abs(df.u), abs(df.v))).max() # Get the maximum value of the wind speed - - radius_lst = np.arange(1, math.ceil(max_ws) + 1) # Create a list of radius - - for i, radius in enumerate(radius_lst): - circle = plt.Circle((0, 0), radius, fill=False, color='gray', linewidth=1, linestyle='--', alpha=0.5) - ax.add_artist(circle) - - for angle, label in zip(range(0, 360, 90), ["E", "N", "W", "S"]): - radian = np.radians(angle) - line_x, line_y = radius * np.cos(radian), radius * np.sin(radian) - - if i + 2 == len(radius_lst): # Add wind direction line and direction label at the edge of the circle - ax.plot([0, line_x * 1.05], [0, line_y * 1.05], color='k', linestyle='-', linewidth=1, alpha=0.5) - ax.text(line_x * 1.15, line_y * 1.15, label, ha='center', va='center') - - ax.text(radius * np.cos(np.radians(rlabel_pos)), radius * np.sin(np.radians(rlabel_pos)), - str(radius) + ' m/s', ha='center', va='center', fontsize=8) - - for radius in range(math.ceil(max_ws) + 1, 10): - circle = plt.Circle((0, 0), radius, fill=False, color='gray', linewidth=1, linestyle='--', alpha=0.5) - ax.add_artist(circle) - - ax.set(xlim=(-max_ws * 1.02, max_ws * 1.02), - ylim=(-max_ws * 1.02, max_ws * 1.02), - xticks=[], - yticks=[], - xticklabels=[], - yticklabels=[], - aspect='equal', - ) - if bottom_text: - ax.text(0.50, -0.05, bottom_text, fontweight='bold', fontsize=8, va='center', ha='center', - transform=ax.transAxes) - - ax.text(0.5, 1.05, Unit(val), fontweight='bold', fontsize=12, va='center', ha='center', transform=ax.transAxes) - - cbar = plt.colorbar(surf, ax=ax, label='Frequency', pad=0.01, fraction=0.04) - cbar.ax.yaxis.label.set_fontsize(8) - cbar.ax.tick_params(labelsize=8) - - plt.show() - - return fig, ax diff --git a/AeroViz/plot/meteorology/wind_rose.py b/AeroViz/plot/meteorology/wind_rose.py new file mode 100644 index 0000000..6ffc58b --- /dev/null +++ b/AeroViz/plot/meteorology/wind_rose.py @@ -0,0 +1,77 @@ +from typing import Literal + +import matplotlib.pyplot as plt +import numpy as np +import windrose +from matplotlib.pyplot import Figure, Axes +from pandas import DataFrame, Series + +from AeroViz.plot.utils import * + +__all__ = ['wind_rose'] + + +@set_figure(figsize=(4.3, 4)) +def wind_rose(df: DataFrame, + WS: Series | str, + WD: Series | str, + val: Series | str | None = None, + typ: Literal['bar', 'scatter'] = 'scatter', + rlabel_pos: float = 30, + **kwargs + ) -> tuple[Figure, Axes]: + # conditional bivariate probability function (cbpf) python + # https://davidcarslaw.github.io/openair/reference/polarPlot.html + # https://github.com/davidcarslaw/openair/blob/master/R/polarPlot.R + windrose.WindroseAxes._info = 'WindroseAxes' + + df = df.dropna(subset=[WS, WD] + ([val] if val is not None else [])) + + radius = df[WS].to_numpy() + theta = df[WD].to_numpy() + radian = np.radians(theta) + values = df[val].to_numpy() if val is not None else None + + # In this case, the windrose is a simple frequency diagram, + # the function automatically calculates the radians of the given wind direction. + if typ == 'bar': + fig, ax = plt.subplots(figsize=(5.5, 4), subplot_kw={'projection': 'windrose'}) + fig.subplots_adjust(left=0) + + ax.bar(theta, radius, bins=[0, 1, 2, 3], normed=True, colors=['#0F1035', '#365486', '#7FC7D9', '#DCF2F1']) + ax.set( + ylim=(0, 30), + yticks=[0, 15, 30], + yticklabels=['', '15 %', '30 %'], + rlabel_position=rlabel_pos + ) + ax.set_thetagrids(angles=[0, 45, 90, 135, 180, 225, 270, 315], + labels=["E", "NE", "N", "NW", "W", "SW", "S", "SE"]) + + ax.legend(units='m/s', bbox_to_anchor=[1.1, 0.5], loc='center left', ncol=1) + + # In this case, the windrose is a scatter plot, + # in contrary, this function does not calculate the radians, so user have to input the radian. + else: + fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={'projection': 'windrose'}) + fig.subplots_adjust(left=0) + + scatter = ax.scatter(radian, radius, s=15, c=values, vmax=np.quantile(values, 0.90), edgecolors='none', + cmap='jet', alpha=0.8) + ax.set( + ylim=(0, 7), + yticks=[1, 3, 5, 7], + yticklabels=['1 m/s', '3 m/s', '5 m/s', '7 m/s'], + rlabel_position=rlabel_pos, + theta_direction=-1, + theta_zero_location='N', + title=kwargs.get('title', None) + ) + ax.set_thetagrids(angles=[0, 45, 90, 135, 180, 225, 270, 315], + labels=["N", "NE", "E", "SE", "S", "SW", "W", "NW"]) + + plt.colorbar(scatter, ax=ax, label=Unit(val), pad=0.1, fraction=0.04) + + plt.show() + + return fig, ax diff --git a/AeroViz/plot/optical/__init__.py b/AeroViz/plot/optical/__init__.py index 18aab38..0378d74 100644 --- a/AeroViz/plot/optical/__init__.py +++ b/AeroViz/plot/optical/__init__.py @@ -1,2 +1 @@ -from .aethalometer import * from .optical import * diff --git a/AeroViz/plot/optical/aethalometer.py b/AeroViz/plot/optical/aethalometer.py deleted file mode 100644 index 4325c91..0000000 --- a/AeroViz/plot/optical/aethalometer.py +++ /dev/null @@ -1,77 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -from pandas import date_range - -from AeroViz.plot.utils import * - -__all__ = ['plot_MA350', - 'plot_MA3502', - 'plot_day_night'] - - -@set_figure(figsize=(15, 5)) -def plot_MA350(df, **kwargs): - fig, ax = plt.subplots() - - # ax.scatter(df.index, df['UV BCc'], marker='o', c='purple', alpha=0.5, label='UV BCc') - # ax.scatter(df.index, df['Blue BCc'], c='b', alpha=0.5, label='Blue BCc') - # ax.scatter(df.index, df['Green BCc'], c='g', alpha=0.5, label='Green BCc') - # ax.scatter(df.index, df['Red BCc'], c='r', alpha=0.5, label='Red BCc') - mean, std = round(df.mean(), 2), round(df.std(), 2) - - label1 = rf'$MA350-0171\ :\;{mean["MA350_0171 IR BCc"]}\;\pm\;{std["MA350_0171 IR BCc"]}\;(ng/m^3)$' - label2 = rf'$MA350-0176\ :\;{mean["MA350_0176 IR BCc"]}\;\pm\;{std["MA350_0176 IR BCc"]}\;(ng/m^3)$' - label3 = rf'$BC-1054\ :\;{mean["BC1054 IR BCc"]}\;\pm\;{std["BC1054 IR BCc"]}\;(ng/m^3)$' - ax.scatter(df.index, df['MA350_0171 IR BCc'], s=10, ls='-', marker='o', c='#a3b18a', alpha=0.5, label=label1) - ax.scatter(df.index, df['MA350_0176 IR BCc'], s=10, ls='-', marker='o', c='#3a5a40', alpha=0.5, label=label2) - ax.scatter(df.index, df['BC1054 IR BCc'], s=10, ls='-', marker='o', c='g', alpha=0.5, label=label3) - ax.legend(prop={'weight': 'bold'}, loc='upper left') - - st_tm, fn_tm = df.index[0], df.index[-1] - tick_time = date_range(st_tm, fn_tm, freq=kwargs.get('freq', '10d')) - - ax.set(xlabel=kwargs.get('xlabel', ''), - ylabel=kwargs.get('ylabel', r'$BC\ (ng/m^3)$'), - xticks=kwargs.get('xticks', tick_time), - xticklabels=kwargs.get('xticklabels', [_tm.strftime("%F") for _tm in tick_time]), - xlim=kwargs.get('xlim', (st_tm, fn_tm)), - ylim=kwargs.get('ylim', (0, None)), - ) - - -@set_figure -def plot_MA3502(df): - fig, ax = plt.subplots() - - bins = np.array([375, 470, 528, 625, 880]) - vals = df.dropna().iloc[:, -5:].values - - ax.boxplot(vals, positions=bins, widths=20, - showfliers=False, showmeans=True, meanline=True, patch_artist=True, - boxprops=dict(facecolor='#f2c872', alpha=.7), - meanprops=dict(color='#000000', ls='none'), - medianprops=dict(ls='-', color='#000000')) - - ax.set(xlim=(355, 900), - ylim=(0, None), - xlabel=r'$\lambda\ (nm)$', - ylabel=r'$Absorption\ (1/Mm)$', ) - - -@set_figure(figsize=(6, 5)) -def plot_day_night(df): - # Group by hour of day and calculate mean - df_grouped = df.groupby(df.index.hour).mean() - - # Create figure and plot - fig, ax = plt.subplots() - ax.plot(df_grouped.index, df_grouped['MA350_0171 IR BCc'], marker='o', c='k', alpha=0.5, label='MA350-0171') - ax.plot(df_grouped.index, df_grouped['MA350_0176 IR BCc'], marker='o', c='r', alpha=0.5, label='MA350-0176') - ax.plot(df_grouped.index, df_grouped['BC1054 IR BCc'], marker='o', c='b', alpha=0.5, label='BC-1054') - - ax.set(xlim=(0, 23), - xlabel='Hour of Day', - ylabel=r'$BC\ (ng/m^3)$', - title=f'Diurnal pattern', ) - - ax.legend() diff --git a/AeroViz/plot/optical/optical.py b/AeroViz/plot/optical/optical.py index b7bd041..7b606be 100644 --- a/AeroViz/plot/optical/optical.py +++ b/AeroViz/plot/optical/optical.py @@ -3,386 +3,386 @@ import matplotlib.pyplot as plt import numpy as np -# from PyMieScatt import ScatteringFunction from matplotlib.pyplot import Figure, Axes +from AeroViz.dataProcess.Optical.PyMieScatt_update import ScatteringFunction +from AeroViz.dataProcess.Optical.mie_theory import Mie_Q, Mie_MEE, Mie_PESD from AeroViz.plot.utils import * -from AeroViz.process.method.mie_theory import Mie_Q, Mie_MEE, Mie_PESD __all__ = ['Q_plot', - 'RI_couple', - 'RRI_2D', - # 'scattering_phase', - 'response_surface', - ] + 'RI_couple', + 'RRI_2D', + 'scattering_phase', + 'response_surface', + ] mapping_dic = {'AS': {'m': 1.53 + 0j, 'density': 1.73, 'label': fr'$NH_{4}NO_{3}$', 'color': '#A65E58'}, - 'AN': {'m': 1.55 + 0j, 'density': 1.77, 'label': fr'$(NH_{4})_{2}SO_{4}$', 'color': '#A5BF6B'}, - 'OM': {'m': 1.54 + 0j, 'density': 1.40, 'label': 'OM', 'color': '#F2BF5E'}, - 'Soil': {'m': 1.56 + 0.01j, 'density': 2.60, 'label': 'Soil', 'color': '#3F83BF'}, - 'SS': {'m': 1.54 + 0j, 'density': 1.90, 'label': 'SS', 'color': '#B777C2'}, - 'BC': {'m': 1.80 + 0.54j, 'density': 1.50, 'label': 'BC', 'color': '#D1CFCB'}, - 'Water': {'m': 1.333 + 0j, 'density': 1.00, 'label': 'Water', 'color': '#96c8e6'}} + 'AN': {'m': 1.55 + 0j, 'density': 1.77, 'label': fr'$(NH_{4})_{2}SO_{4}$', 'color': '#A5BF6B'}, + 'OM': {'m': 1.54 + 0j, 'density': 1.40, 'label': 'OM', 'color': '#F2BF5E'}, + 'Soil': {'m': 1.56 + 0.01j, 'density': 2.60, 'label': 'Soil', 'color': '#3F83BF'}, + 'SS': {'m': 1.54 + 0j, 'density': 1.90, 'label': 'SS', 'color': '#B777C2'}, + 'BC': {'m': 1.80 + 0.54j, 'density': 1.50, 'label': 'BC', 'color': '#D1CFCB'}, + 'Water': {'m': 1.333 + 0j, 'density': 1.00, 'label': 'Water', 'color': '#96c8e6'}} @set_figure def Q_plot(species: Literal["AS", "AN", "OM", "Soil", "SS", "BC", "Water"] | list[ - Literal["AS", "AN", "OM", "Soil", "SS", "BC", "Water"]], - x: Literal["dp", "sp"] = 'dp', - y: Literal["Q", "MEE"] = "Q", - mode: Literal["ext", "sca", "abs"] = 'ext', - **kwargs) -> tuple[Figure, Axes]: - """ - Generate a plot showing optical efficiency or mass optical efficiency for different particle species. + Literal["AS", "AN", "OM", "Soil", "SS", "BC", "Water"]], + x: Literal["dp", "sp"] = 'dp', + y: Literal["Q", "MEE"] = "Q", + mode: Literal["ext", "sca", "abs"] = 'ext', + **kwargs) -> tuple[Figure, Axes]: + """ + Generate a plot showing optical efficiency or mass optical efficiency for different particle species. - Parameters - ---------- - species : Union[Literal["AS", "AN", "OM", "Soil", "SS", "BC", "Water"], list[Literal["AS", "AN", "OM", "Soil", "SS", "BC", "Water"]]] - The particle species or list of particle species to plot. Valid species include 'AS' (Ammonium Sulfate), - 'AN' (Ammonium Nitrate), 'OM' (Organic Matter), 'Soil', 'SS' (Sea Salt), 'BC' (Black Carbon), and 'Water'. + Parameters + ---------- + species : Union[Literal["AS", "AN", "OM", "Soil", "SS", "BC", "Water"], list[Literal["AS", "AN", "OM", "Soil", "SS", "BC", "Water"]]] + The particle species or list of particle species to plot. Valid species include 'AS' (Ammonium Sulfate), + 'AN' (Ammonium Nitrate), 'OM' (Organic Matter), 'Soil', 'SS' (Sea Salt), 'BC' (Black Carbon), and 'Water'. - x : Literal["dp", "sp"], optional - The x-axis parameter. 'dp' represents particle diameter, and 'sp' represents size parameter (alpha). - Default is 'dp'. + x : Literal["dp", "sp"], optional + The x-axis parameter. 'dp' represents particle diameter, and 'sp' represents size parameter (alpha). + Default is 'dp'. - y : Literal["Q", "MEE"], optional - The y-axis parameter. 'Q' represents optical efficiency (Q_ext, Q_sca, Q_abs), and 'MEE' represents - mass optical efficiency (MEE, MSE, MAE). Default is 'Q'. + y : Literal["Q", "MEE"], optional + The y-axis parameter. 'Q' represents optical efficiency (Q_ext, Q_sca, Q_abs), and 'MEE' represents + mass optical efficiency (MEE, MSE, MAE). Default is 'Q'. - mode : Literal["ext", "sca", "abs"], optional - The mode of efficiency to plot. 'ext' for extinction efficiency, 'sca' for scattering efficiency, - and 'abs' for absorption efficiency. Default is 'ext'. + mode : Literal["ext", "sca", "abs"], optional + The mode of efficiency to plot. 'ext' for extinction efficiency, 'sca' for scattering efficiency, + and 'abs' for absorption efficiency. Default is 'ext'. - **kwargs - Additional keyword arguments to pass to the plot function. + **kwargs + Additional keyword arguments to pass to the plot function. - Returns - ------- - ax : Axes - Matplotlib Axes object containing the generated plot. + Returns + ------- + ax : Axes + Matplotlib Axes object containing the generated plot. - Examples - -------- - Example usage of the Q_plot function: + Examples + -------- + Example usage of the Q_plot function: - >>> Q_plot('AS', x='dp', y='Q', mode='ext') - >>> Q_plot(['AS', 'AN'], x='sp', y='MEE') - """ - dp = np.geomspace(10, 10000, 2000) + >>> Q_plot('AS', x='dp', y='Q', mode='ext') + >>> Q_plot(['AS', 'AN'], x='sp', y='MEE') + """ + dp = np.geomspace(10, 10000, 2000) - mode_mapping = {'ext': 0, 'sca': 1, 'abs': 2} + mode_mapping = {'ext': 0, 'sca': 1, 'abs': 2} - xlabel_mapping = {'dp': 'Particle Diameter (nm)', - 'sp': 'Size parameter (\\alpha)'} + xlabel_mapping = {'dp': 'Particle Diameter (nm)', + 'sp': 'Size parameter (\\alpha)'} - ylabel_mapping = {'Q': {'ext': r'$Extinction\ efficiency\ (Q_{ext})$', - 'sca': r'$Scattering\ efficiency\ (Q_{sca})$', - 'abs': r'$Absorption\ efficiency\ (Q_{abs})$'}, - 'MEE': {'ext': r'$MEE\ (m^{2}/g)$', - 'sca': r'$MSE\ (m^{2}/g)$', - 'abs': r'$MAE\ (m^{2}/g)$'}} + ylabel_mapping = {'Q': {'ext': r'$Extinction\ efficiency\ (Q_{ext})$', + 'sca': r'$Scattering\ efficiency\ (Q_{sca})$', + 'abs': r'$Absorption\ efficiency\ (Q_{abs})$'}, + 'MEE': {'ext': r'$MEE\ (m^{2}/g)$', + 'sca': r'$MSE\ (m^{2}/g)$', + 'abs': r'$MAE\ (m^{2}/g)$'}} - typ = mode_mapping.get(mode, None) - xlabel = xlabel_mapping.get(x, None) - ylabel = ylabel_mapping.get(y, None).get(mode, None) + typ = mode_mapping.get(mode, None) + xlabel = xlabel_mapping.get(x, None) + ylabel = ylabel_mapping.get(y, None).get(mode, None) - fig, ax = plt.subplots() + fig, ax = plt.subplots() - if x == "sp": - size_para = math.pi * dp.copy() / 550 - dp_ = size_para + if x == "sp": + size_para = math.pi * dp.copy() / 550 + dp_ = size_para - else: - plt.semilogx() - dp_ = dp.copy() + else: + plt.semilogx() + dp_ = dp.copy() - if isinstance(species, list): - for i, specie in enumerate(species): - label = mapping_dic[specie].get('label', None) - color = mapping_dic[specie].get('color', None) + if isinstance(species, list): + for i, specie in enumerate(species): + label = mapping_dic[specie].get('label', None) + color = mapping_dic[specie].get('color', None) - mapping_dic[specie]['Q'] = Mie_Q(mapping_dic[specie]['m'], 550, dp) - mapping_dic[specie]['MEE'] = Mie_MEE(mapping_dic[specie]['m'], 550, dp, mapping_dic[specie]['density']) + mapping_dic[specie]['Q'] = Mie_Q(mapping_dic[specie]['m'], 550, dp) + mapping_dic[specie]['MEE'] = Mie_MEE(mapping_dic[specie]['m'], 550, dp, mapping_dic[specie]['density']) - plt.plot(dp_, mapping_dic[specie][f'{y}'][typ], color=color, label=label, alpha=1, lw=2) + plt.plot(dp_, mapping_dic[specie][f'{y}'][typ], color=color, label=label, alpha=1, lw=2) - else: - legend_label = {'Q': [r'$\bf Q_{{ext}}$', r'$\bf Q_{{scat}}$', r'$\bf Q_{{abs}}$'], - 'MEE': [r'$\bf MEE$', r'$\bf MSE$', r'$\bf MAE$']} + else: + legend_label = {'Q': [r'$\bf Q_{{ext}}$', r'$\bf Q_{{scat}}$', r'$\bf Q_{{abs}}$'], + 'MEE': [r'$\bf MEE$', r'$\bf MSE$', r'$\bf MAE$']} - ylabel_mapping = {'Q': r'$\bf Optical\ efficiency\ (Q_{{ext, sca, abs}})$', - 'MEE': r'$\bf Mass\ Optical\ Efficiency\ (m^2/g)$'} + ylabel_mapping = {'Q': r'$\bf Optical\ efficiency\ (Q_{{ext, sca, abs}})$', + 'MEE': r'$\bf Mass\ Optical\ Efficiency\ (m^2/g)$'} - legend = legend_label.get(y, None) - ylabel = ylabel_mapping.get(y, None) + legend = legend_label.get(y, None) + ylabel = ylabel_mapping.get(y, None) - mapping_dic[species]['Q'] = Mie_Q(mapping_dic[species]['m'], 550, dp) - mapping_dic[species]['MEE'] = Mie_MEE(mapping_dic[species]['m'], 550, dp, mapping_dic[species]['density']) + mapping_dic[species]['Q'] = Mie_Q(mapping_dic[species]['m'], 550, dp) + mapping_dic[species]['MEE'] = Mie_MEE(mapping_dic[species]['m'], 550, dp, mapping_dic[species]['density']) - plt.plot(dp_, mapping_dic[species][f'{y}'][0], color='b', label=legend[0]) - plt.plot(dp_, mapping_dic[species][f'{y}'][1], color='g', label=legend[1]) - plt.plot(dp_, mapping_dic[species][f'{y}'][2], color='r', label=legend[2]) - plt.text(0.04, 0.92, mapping_dic[species]['label'], transform=ax.transAxes, weight='bold') + plt.plot(dp_, mapping_dic[species][f'{y}'][0], color='b', label=legend[0]) + plt.plot(dp_, mapping_dic[species][f'{y}'][1], color='g', label=legend[1]) + plt.plot(dp_, mapping_dic[species][f'{y}'][2], color='r', label=legend[2]) + plt.text(0.04, 0.92, mapping_dic[species]['label'], transform=ax.transAxes, weight='bold') - ax.set(xlim=(dp.min(), dp.max()), ylim=(0, None), xlabel=xlabel, ylabel=ylabel) - ax.grid(color='k', axis='x', which='major', linestyle='dashdot', linewidth=0.4, alpha=0.4) - ax.legend(loc='best', prop={'weight': 'bold'}) + ax.set(xlim=(dp.min(), dp.max()), ylim=(0, None), xlabel=xlabel, ylabel=ylabel) + ax.grid(color='k', axis='x', which='major', linestyle='dashdot', linewidth=0.4, alpha=0.4) + ax.legend(loc='best', prop={'weight': 'bold'}) - # fig.savefig(PATH_MAIN/f'Q_{species}') - plt.show() + # fig.savefig(PATH_MAIN/f'Q_{species}') + plt.show() - return fig, ax + return fig, ax @set_figure(figsize=(9, 4)) def RI_couple(**kwargs) -> tuple[Figure, Axes]: - """ - Generate a plot to test the influence of imaginary parts on scattering and absorption efficiencies. + """ + Generate a plot to test the influence of imaginary parts on scattering and absorption efficiencies. - Parameters - ---------- - **kwargs - Additional keyword arguments to pass to the plot function. + Parameters + ---------- + **kwargs + Additional keyword arguments to pass to the plot function. - Returns - ------- - ax : Axes - Matplotlib Axes object containing the generated plot. + Returns + ------- + ax : Axes + Matplotlib Axes object containing the generated plot. - Examples - -------- - Example usage of the IJ_couple function: + Examples + -------- + Example usage of the IJ_couple function: - >>> ax = RI_couple() - """ - dp = np.geomspace(10, 10000, 5000) + >>> ax = RI_couple() + """ + dp = np.geomspace(10, 10000, 5000) - a = Mie_Q(1.50 + 0.01j, 550, dp) - b = Mie_Q(1.50 + 0.1j, 550, dp) - c = Mie_Q(1.50 + 0.5j, 550, dp) + a = Mie_Q(1.50 + 0.01j, 550, dp) + b = Mie_Q(1.50 + 0.1j, 550, dp) + c = Mie_Q(1.50 + 0.5j, 550, dp) - fig, ax = plt.subplots(1, 2) - plt.subplots_adjust(right=0.9, wspace=0.4) - (ax1, ax2) = ax - size_para = math.pi * dp / 550 + fig, ax = plt.subplots(1, 2) + plt.subplots_adjust(right=0.9, wspace=0.4) + (ax1, ax2) = ax + size_para = math.pi * dp / 550 - ax1.plot(size_para, a[1], 'k-', alpha=1, label=r'$\bf\ k\ =\ 0.01$') - ax1.plot(size_para, b[1], 'b-', alpha=1, label=r'$\bf\ k\ =\ 0.10$') - ax1.plot(size_para, c[1], 'g-', alpha=1, label=r'$\bf\ k\ =\ 0.50$') - ax1.legend() + ax1.plot(size_para, a[1], 'k-', alpha=1, label=r'$\bf\ k\ =\ 0.01$') + ax1.plot(size_para, b[1], 'b-', alpha=1, label=r'$\bf\ k\ =\ 0.10$') + ax1.plot(size_para, c[1], 'g-', alpha=1, label=r'$\bf\ k\ =\ 0.50$') + ax1.legend() - ax1.set_xlim(0, size_para[-1]) - ax1.set_ylim(0, None) - ax1.set_xlabel(r'$\bf Size\ parameter\ (\alpha)$') - ax1.set_ylabel(r'$\bf Scattering\ efficiency\ (Q_{{scat}})$') + ax1.set_xlim(0, size_para[-1]) + ax1.set_ylim(0, None) + ax1.set_xlabel(r'$\bf Size\ parameter\ (\alpha)$') + ax1.set_ylabel(r'$\bf Scattering\ efficiency\ (Q_{{scat}})$') - ax2.plot(size_para, a[2], 'k-', alpha=1, label=r'$\bf\ k\ =\ 0.01$') - ax2.plot(size_para, b[2], 'b-', alpha=1, label=r'$\bf\ k\ =\ 0.10$') - ax2.plot(size_para, c[2], 'g-', alpha=1, label=r'$\bf\ k\ =\ 0.50$') - ax2.legend() + ax2.plot(size_para, a[2], 'k-', alpha=1, label=r'$\bf\ k\ =\ 0.01$') + ax2.plot(size_para, b[2], 'b-', alpha=1, label=r'$\bf\ k\ =\ 0.10$') + ax2.plot(size_para, c[2], 'g-', alpha=1, label=r'$\bf\ k\ =\ 0.50$') + ax2.legend() - ax2.set_xlim(0, size_para[-1]) - ax2.set_ylim(0, None) - ax2.set_xlabel(r'$\bf Size\ parameter\ (\alpha)$') - ax2.set_ylabel(r'$\bf Absorption\ efficiency\ (Q_{{abs}})$') + ax2.set_xlim(0, size_para[-1]) + ax2.set_ylim(0, None) + ax2.set_xlabel(r'$\bf Size\ parameter\ (\alpha)$') + ax2.set_ylabel(r'$\bf Absorption\ efficiency\ (Q_{{abs}})$') - fig.suptitle(r'$\bf n\ =\ 1.50 $') - # fig.savefig(PATH_MAIN/f'IJ_couple') + fig.suptitle(r'$\bf n\ =\ 1.50 $') + # fig.savefig(PATH_MAIN/f'IJ_couple') - plt.show() + plt.show() - return fig, ax + return fig, ax @set_figure def RRI_2D(mode: Literal["ext", "sca", "abs"] = 'ext', - **kwargs) -> tuple[Figure, Axes]: - """ - Generate a 2D plot of scattering efficiency (Q) against real and imaginary parts of the refractive index. - - Parameters - ---------- - mode : {'ext', 'sca', 'abs'}, optional - The mode of scattering efficiency to plot: - - 'ext' for extinction efficiency (Q_ext) - - 'sca' for scattering efficiency (Q_sca) - - 'abs' for absorption efficiency (Q_abs) - Default is 'ext'. - - **kwargs - Additional keyword arguments to pass to the plot function. - - Returns - ------- - ax : Axes - Matplotlib Axes object containing the generated 2D plot. - - Examples - -------- - Example usage of the RRI_2D function: - - >>> RRI_2D(mode='sca', xlabel='Real Part (n)', ylabel='Imaginary Part (k)', title='Scattering Efficiency 2D Plot') - """ - mode_mapping = {'ext': 0, 'sca': 1, 'abs': 2} - - typ = mode_mapping.get(mode, None) - - for dp in [400, 550, 700]: - RRI = np.linspace(1.3, 2, 100) - IRI = np.linspace(0, 0.7, 100) - arr = np.zeros((RRI.size, IRI.size)) - - for i, I_RI in enumerate(IRI): - for j, R_RI in enumerate(RRI): - arr[i, j] = Mie_Q(R_RI + 1j * I_RI, 550, dp)[typ] - - fig, ax = plt.subplots() - plt.title(fr'$\bf dp\ = {dp}\ nm$', ) - plt.xlabel(r'$\bf Real\ part\ (n)$', ) - plt.ylabel(r'$\bf Imaginary\ part\ (k)$', ) - - im = plt.imshow(arr, extent=(1.3, 2, 0, 0.7), cmap='jet', origin='lower') - color_bar = plt.colorbar(im, extend='both') - color_bar.set_label(label=fr'$\bf Q_{{{mode}}}$') - - # fig.savefig(PATH_MAIN/f'RRI_{mode}_{dp}') - - plt.show() - - return fig, ax - - -# @set_figure -# def scattering_phase(m: complex = 1.55 + 0.01j, -# wave: float = 600, -# dp: float = 200) -> tuple[Figure, Axes]: -# """ -# Generate a polar plot to visualize the scattering phase function. -# -# Parameters -# ---------- -# m : complex, optional -# The complex refractive index of the scattering medium. Default is 1.55 + 0.01j. -# wave : float, optional -# The wavelength of the incident light in nanometers. Default is 600 nm. -# dp : float, optional -# The particle diameter in nanometers. Default is 200 nm. -# -# Returns -# ------- -# ax : Axes -# Matplotlib Axes object containing the generated polar plot. -# -# Examples -# -------- -# Example usage of the scattering_phase function: -# -# >>> ax = scattering_phase(m=1.55 + 0.01j, wave=600, dp=200) -# """ -# theta, _SL, _SR, _SU = ScatteringFunction(m, wave, dp) -# -# SL = np.append(_SL, _SL[::-1]) -# SR = np.append(_SR, _SR[::-1]) -# SU = np.append(_SU, _SU[::-1]) -# -# angles = ['0', '60', '120', '180', '240', '300'] -# -# fig, ax = plt.subplots(subplot_kw={'projection': 'polar'}) -# -# theta = np.linspace(0, 2 * np.pi, len(SL)) -# -# plt.thetagrids(range(0, 360, int(360 / len(angles))), angles) -# -# plt.plot(theta, SL, '-', linewidth=2, color='#115162', label='SL') -# plt.fill(theta, SL, '#afe0f5', alpha=0.5) -# plt.plot(theta, SR, '-', linewidth=2, color='#7FAE80', label='SR') -# plt.fill(theta, SR, '#b5e6c5', alpha=0.5) -# plt.plot(theta, SU, '-', linewidth=2, color='#621129', label='SU') -# plt.fill(theta, SU, '#f5afbd', alpha=0.5) -# -# plt.legend(loc='best', bbox_to_anchor=(1, 0, 0.2, 1), prop={'weight': 'bold'}) -# plt.title(r'$\bf Scattering\ phase\ function$') -# -# plt.show() -# return fig, ax -# + **kwargs) -> tuple[Figure, Axes]: + """ + Generate a 2D plot of scattering efficiency (Q) against real and imaginary parts of the refractive index. + + Parameters + ---------- + mode : {'ext', 'sca', 'abs'}, optional + The mode of scattering efficiency to plot: + - 'ext' for extinction efficiency (Q_ext) + - 'sca' for scattering efficiency (Q_sca) + - 'abs' for absorption efficiency (Q_abs) + Default is 'ext'. + + **kwargs + Additional keyword arguments to pass to the plot function. + + Returns + ------- + ax : Axes + Matplotlib Axes object containing the generated 2D plot. + + Examples + -------- + Example usage of the RRI_2D function: + + >>> RRI_2D(mode='sca', xlabel='Real Part (n)', ylabel='Imaginary Part (k)', title='Scattering Efficiency 2D Plot') + """ + mode_mapping = {'ext': 0, 'sca': 1, 'abs': 2} + + typ = mode_mapping.get(mode, None) + + for dp in [400, 550, 700]: + RRI = np.linspace(1.3, 2, 100) + IRI = np.linspace(0, 0.7, 100) + arr = np.zeros((RRI.size, IRI.size)) + + for i, I_RI in enumerate(IRI): + for j, R_RI in enumerate(RRI): + arr[i, j] = Mie_Q(R_RI + 1j * I_RI, 550, dp)[typ] + + fig, ax = plt.subplots() + plt.title(fr'$\bf dp\ = {dp}\ nm$', ) + plt.xlabel(r'$\bf Real\ part\ (n)$', ) + plt.ylabel(r'$\bf Imaginary\ part\ (k)$', ) + + im = plt.imshow(arr, extent=(1.3, 2, 0, 0.7), cmap='jet', origin='lower') + color_bar = plt.colorbar(im, extend='both') + color_bar.set_label(label=fr'$\bf Q_{{{mode}}}$') + + # fig.savefig(PATH_MAIN/f'RRI_{mode}_{dp}') + + plt.show() + + return fig, ax + + +@set_figure +def scattering_phase(m: complex = 1.55 + 0.01j, + wave: float = 600, + dp: float = 200) -> tuple[Figure, Axes]: + """ + Generate a polar plot to visualize the scattering phase function. + + Parameters + ---------- + m : complex, optional + The complex refractive index of the scattering medium. Default is 1.55 + 0.01j. + wave : float, optional + The wavelength of the incident light in nanometers. Default is 600 nm. + dp : float, optional + The particle diameter in nanometers. Default is 200 nm. + + Returns + ------- + ax : Axes + Matplotlib Axes object containing the generated polar plot. + + Examples + -------- + Example usage of the scattering_phase function: + + >>> ax = scattering_phase(m=1.55 + 0.01j, wave=600, dp=200) + """ + theta, _SL, _SR, _SU = ScatteringFunction(m, wave, dp) + + SL = np.append(_SL, _SL[::-1]) + SR = np.append(_SR, _SR[::-1]) + SU = np.append(_SU, _SU[::-1]) + + angles = ['0', '60', '120', '180', '240', '300'] + + fig, ax = plt.subplots(subplot_kw={'projection': 'polar'}) + + theta = np.linspace(0, 2 * np.pi, len(SL)) + + plt.thetagrids(range(0, 360, int(360 / len(angles))), angles) + + plt.plot(theta, SL, '-', linewidth=2, color='#115162', label='SL') + plt.fill(theta, SL, '#afe0f5', alpha=0.5) + plt.plot(theta, SR, '-', linewidth=2, color='#7FAE80', label='SR') + plt.fill(theta, SR, '#b5e6c5', alpha=0.5) + plt.plot(theta, SU, '-', linewidth=2, color='#621129', label='SU') + plt.fill(theta, SU, '#f5afbd', alpha=0.5) + + plt.legend(loc='best', bbox_to_anchor=(1, 0, 0.2, 1), prop={'weight': 'bold'}) + plt.title(r'$\bf Scattering\ phase\ function$') + + plt.show() + return fig, ax + @set_figure def response_surface(real_range=(1.33, 1.7), - gmd_range=(10, 500), - num=50, - **kwargs) -> tuple[Figure, Axes]: - """ - Generate a response surface plot for sensitivity tests of extinction based on Mie scattering. + gmd_range=(10, 500), + num=50, + **kwargs) -> tuple[Figure, Axes]: + """ + Generate a response surface plot for sensitivity tests of extinction based on Mie scattering. - Parameters - ---------- - real_range : tuple, optional - The range of real part (refractive index) values for sensitivity testing. Default is (1.33, 1.7). + Parameters + ---------- + real_range : tuple, optional + The range of real part (refractive index) values for sensitivity testing. Default is (1.33, 1.7). - gmd_range : tuple, optional - The range of geometric mean diameter (GMD) values for sensitivity testing. Default is (60, 400). + gmd_range : tuple, optional + The range of geometric mean diameter (GMD) values for sensitivity testing. Default is (60, 400). - num : int, optional - The number of points to generate within the specified ranges. Default is 50. + num : int, optional + The number of points to generate within the specified ranges. Default is 50. - **kwargs - Additional keyword arguments to pass to the plot function. + **kwargs + Additional keyword arguments to pass to the plot function. - Returns - ------- - ax : Axes - Matplotlib Axes object containing the generated response surface plot. + Returns + ------- + ax : Axes + Matplotlib Axes object containing the generated response surface plot. - Examples - -------- - Example usage of the response_surface function: + Examples + -------- + Example usage of the response_surface function: - >>> response_surface(real_range=(1.4, 1.6), gmd_range=(100, 300), num=30, xlabel='Real Part (n)', - ... ylabel='GMD (nm)', zlabel='Extinction (1/Mm)', title='Sensitivity Tests of Extinction') - """ + >>> response_surface(real_range=(1.4, 1.6), gmd_range=(100, 300), num=30, xlabel='Real Part (n)', + ... ylabel='GMD (nm)', zlabel='Extinction (1/Mm)', title='Sensitivity Tests of Extinction') + """ - def function(RI, GMD): - Z = np.zeros_like(RI) # 使用 np.zeros_like 可以確保 Z 和 RI 具有相同的形狀 + def function(RI, GMD): + Z = np.zeros_like(RI) # 使用 np.zeros_like 可以確保 Z 和 RI 具有相同的形狀 - for i in range(RI.shape[0]): - for j in range(RI.shape[1]): - _RI, _GMD = RI[i, j], GMD[i, j] - Bext, Bsca, Babs = Mie_PESD(m=_RI, lognormal=True, geoMean=_GMD, geoStdDev=2.) - Z[i, j] = np.sum(Bext) + for i in range(RI.shape[0]): + for j in range(RI.shape[1]): + _RI, _GMD = RI[i, j], GMD[i, j] + Bext, Bsca, Babs = Mie_PESD(m=_RI, lognormal=True, geoMean=_GMD, geoStdDev=2.) + Z[i, j] = np.sum(Bext) - return Z + return Z - # 假設 RI、GSD、GMD - RI = np.linspace(real_range[0], real_range[1], num) - GMD = np.linspace(gmd_range[0], gmd_range[1], num) + # 假設 RI、GSD、GMD + RI = np.linspace(real_range[0], real_range[1], num) + GMD = np.linspace(gmd_range[0], gmd_range[1], num) - # 建立三維 meshgrid - real, gmd = np.meshgrid(RI, GMD, indexing='xy') + # 建立三維 meshgrid + real, gmd = np.meshgrid(RI, GMD, indexing='xy') - # Result - ext = function(real, gmd) + # Result + ext = function(real, gmd) - # plot - fig, ax = plt.subplots(subplot_kw={"projection": "3d"}) - ax.plot_surface(real, gmd, ext, rstride=1, cstride=1, cmap=plt.get_cmap('jet'), edgecolor='none') + # plot + fig, ax = plt.subplots(subplot_kw={"projection": "3d"}) + ax.plot_surface(real, gmd, ext, rstride=1, cstride=1, cmap=plt.get_cmap('jet'), edgecolor='none') - ax.set(xlabel='Real part (n)', ylabel='GMD (nm)', zlabel=Unit('Extinction'), - title='Sensitive tests of Extinction') + ax.set(xlabel='Real part (n)', ylabel='GMD (nm)', zlabel=Unit('Extinction'), + title='Sensitive tests of extinction') - ax.zaxis.get_offset_text().set_visible(False) - exponent = math.floor(math.log10(np.max(ext))) - ax.text(ax.get_xlim()[1] * 1.01, ax.get_ylim()[1], ax.get_zlim()[1] * 1.1, s=fr'${{\times}}\ 10^{exponent}$') - ax.ticklabel_format(style='sci', axis='z', scilimits=(0, 0), useOffset=False) + ax.zaxis.get_offset_text().set_visible(False) + exponent = math.floor(math.log10(np.max(ext))) + ax.text(ax.get_xlim()[1] * 1.01, ax.get_ylim()[1], ax.get_zlim()[1] * 1.1, s=fr'${{\times}}\ 10^{exponent}$') + ax.ticklabel_format(style='sci', axis='z', scilimits=(0, 0), useOffset=False) - plt.show() + plt.show() - return fig, ax + return fig, ax if __name__ == '__main__': - Q_plot(['AS', 'AN', 'OM', 'Soil', 'SS', 'BC'], x='dp', y='MEE') - Q_plot(['AS', 'AN', 'OM', 'Soil', 'SS', 'BC'], x='dp', y='Q') + Q_plot(['AS', 'AN', 'OM', 'Soil', 'SS', 'BC'], x='dp', y='MEE') + Q_plot(['AS', 'AN', 'OM', 'Soil', 'SS', 'BC'], x='dp', y='Q') - # RI_couple() - # response_surface() + RI_couple() + response_surface() diff --git a/AeroViz/plot/pie.py b/AeroViz/plot/pie.py new file mode 100644 index 0000000..2f72b6e --- /dev/null +++ b/AeroViz/plot/pie.py @@ -0,0 +1,210 @@ +from typing import Literal + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from matplotlib.pyplot import Figure, Axes +from pandas import DataFrame + +from AeroViz.plot.utils import * + +__all__ = [ + 'pie', + 'donuts' +] + + +@set_figure(fw='bold') +def pie(data_set: DataFrame | dict, + labels: list[str], + unit: str, + style: Literal["pie", 'donut'], + ax: Axes | None = None, + symbol: bool = True, + **kwargs) -> tuple[Figure, Axes]: + """ + Create a pie or donut chart based on the provided data. + + Parameters + ---------- + data_set : pd.DataFrame | dict + A pandas DataFrame or dictionary mapping category names to a list of species. + If a DataFrame is provided, the index represents the categories, and each column contains species data. + If a dictionary is provided, it maps category names to lists of species data. + It is assumed that all lists or DataFrame columns contain the same number of entries as the *labels* list. + labels : list of str + The labels for each category. + unit : str + The unit to display in the center of the donut chart. + style : Literal["pie", 'donut'] + The style of the chart, either 'pie' for a standard pie chart or 'donut' for a donut chart. + ax : plt.Axes or None, optional + The Axes object to plot the chart onto. If None, a new figure and Axes will be created. + symbol : bool, optional + Whether to display values for each species in the chart. + **kwargs + Additional keyword arguments to be passed to the plotting function. + + Returns + ------- + matplotlib.axes.Axes + The Axes object containing the violin plot. + + Notes + ----- + - If *data_set* is a dictionary, it should contain lists of species that correspond to each category in *labels*. + - The length of each list in *data_set* or the number of columns in the DataFrame should match the length of the *labels* list. + + Examples + -------- + >>> data_set = {'Category 1': [10, 20, 30], 'Category 2': [15, 25, 35]} + >>> labels = ['Species 1', 'Species 2', 'Species 3'] + >>> pie(data_set, labels, unit='kg', style='pie', symbol=True) + """ + if isinstance(data_set, DataFrame): + category_names = list(data_set.index) + data = data_set.to_numpy() + + pies, species = data.shape + + elif isinstance(data_set, dict): + category_names = list(data_set.keys()) + data = np.array(list(data_set.values())) + + pies, species = data.shape + + else: + raise ValueError('data_set must be a DataFrame or a dictionary.') + + colors = kwargs.get('colors') or (Color.colors1 if species == 6 else Color.getColor(num=species)) + + radius = 4 + width = 4 if style == 'pie' else 1 + + text = [''] * pies if style == 'pie' else [Unit(unit) + '\n\n' + + '{:.2f} ± {:.2f}'.format(x, s) + for x, s in zip(data.sum(axis=1), data.std(axis=1))] + pct_distance = 0.6 if style == 'pie' else 0.88 + + fig, ax = plt.subplots(1, pies, figsize=((pies * 2) + 1, 2)) if ax is None else (ax.get_figure(), ax) + + if pies == 1: + ax = [ax] + + for i in range(pies): + ax[i].pie(data[i], labels=None, colors=colors, textprops=None, + autopct=lambda pct: auto_label_pct(pct, symbol=symbol, include_pct=True), + pctdistance=pct_distance, radius=radius, wedgeprops=dict(width=width, edgecolor='w')) + + ax[i].pie(data[i], labels=None, colors=colors, textprops=None, + autopct=lambda pct: auto_label_pct(pct, symbol=symbol, ignore='outer', include_pct=True), + pctdistance=1.3, radius=radius, wedgeprops=dict(width=width, edgecolor='w')) + ax[i].axis('equal') + ax[i].text(0, 0, text[i], ha='center', va='center') + + if kwargs.get('title') is None: + ax[i].set_title(category_names[i]) + + else: + if len(kwargs.get('title')) == pies: + title = kwargs.get('title') + else: + raise ValueError('The length of the title list must match the number of pies.') + + ax[i].set_title(title[i]) + + ax[-1].legend(labels, loc='center left', prop={'size': 8, 'weight': 'normal'}, bbox_to_anchor=(1, 0, 1.15, 1)) + + # fig.savefig(f"pie_{style}_{title}") + + plt.show() + + return fig, ax + + +@set_figure(fw='bold') +def donuts(data_set: DataFrame | dict, + labels: list[str], + unit: str, + ax: Axes | None = None, + symbol=True, + **kwargs) -> tuple[Figure, Axes]: + """ + Plot a donut chart based on the data set. + + Parameters + ---------- + data_set : pd.DataFrame | dict + A pandas DataFrame or a dictionary mapping category names to a list of species. + If a DataFrame is provided, the index represents the categories, and each column contains species data. + If a dictionary is provided, it maps category names to lists of species data. + It is assumed that all lists or DataFrame columns contain the same number of entries as the *labels* list. + labels : list of str + The category labels. + unit : str + The unit to be displayed in the center of the donut chart. + ax : matplotlib.axes.Axes, optional + The axes to plot on. If None, the current axes will be used (default). + symbol : bool, optional + Whether to display values for each species (default is True). + **kwargs : dict, optional + Additional keyword arguments to pass to the matplotlib pie chart function. + + Returns + ------- + matplotlib.axes.Axes + The axes containing the donut chart. + """ + + if isinstance(data_set, DataFrame): + category_names = list(data_set.index) + data = data_set.to_numpy() + + pies, species = data.shape + + elif isinstance(data_set, dict): + category_names = list(data_set.keys()) + data = np.array(list(data_set.values())) + + pies, species = data.shape + + else: + raise ValueError('data_set must be a DataFrame or a dictionary.') + + colors1 = kwargs.get('colors') or (Color.colors1 if species == 6 else Color.getColor(num=species)) + colors2 = Color.adjust_opacity(colors1, 0.8) + colors3 = Color.adjust_opacity(colors1, 0.6) + + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + + ax.pie(data[2], labels=None, colors=colors1, textprops=None, + autopct=lambda pct: auto_label_pct(pct, symbol=symbol, include_pct=True), + pctdistance=0.9, radius=14, wedgeprops=dict(width=3, edgecolor='w')) + + ax.pie(data[1], labels=None, colors=colors2, textprops=None, + autopct=lambda pct: auto_label_pct(pct, symbol=symbol, include_pct=True), + pctdistance=0.85, radius=11, wedgeprops=dict(width=3, edgecolor='w')) + + ax.pie(data[0], labels=None, colors=colors3, textprops=None, + autopct=lambda pct: auto_label_pct(pct, symbol=symbol, include_pct=True), + pctdistance=0.80, radius=8, wedgeprops=dict(width=3, edgecolor='w')) + + text = (Unit(f'{unit}') + '\n\n' + + 'Event : ' + "{:.2f}".format(np.sum(data[2])) + '\n' + + 'Transition : ' + "{:.2f}".format(np.sum(data[1])) + '\n' + + 'Clean : ' + "{:.2f}".format(np.sum(data[0]))) + + ax.text(0, 0, text, ha='center', va='center') + ax.axis('equal') + + ax.set_title(kwargs.get('title', '')) + + ax.legend(labels, loc='center', prop={'size': 8}, title_fontproperties={'weight': 'bold'}, + title=f'Outer : {category_names[2]}' + '\n' + f'Middle : {category_names[1]}' + '\n' + f'Inner : {category_names[0]}', + bbox_to_anchor=(0.8, 0, 0.5, 1)) + + # fig.savefig(f"donuts_{title}") + + plt.show() + + return fig, ax diff --git a/AeroViz/plot/radar.py b/AeroViz/plot/radar.py new file mode 100644 index 0000000..f4b37a6 --- /dev/null +++ b/AeroViz/plot/radar.py @@ -0,0 +1,184 @@ +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.patches import Circle, RegularPolygon +from matplotlib.path import Path +from matplotlib.projections import register_projection +from matplotlib.projections.polar import PolarAxes +from matplotlib.spines import Spine +from matplotlib.transforms import Affine2D + +from AeroViz.plot.utils import * + +__all__ = ['radar'] + + +def radar_factory(num_vars, frame='circle'): + """ + Create a radar chart with `num_vars` axes. + + This function creates a RadarAxes projection and registers it. + + Parameters + ---------- + num_vars : int + Number of variables for radar chart. + frame : {'circle', 'polygon'} + Shape of frame surrounding axes. + + """ + # calculate evenly-spaced axis angles + theta = np.linspace(0, 2 * np.pi, num_vars, endpoint=False) + + class RadarTransform(PolarAxes.PolarTransform): + + def transform_path_non_affine(self, path): + # Paths with non-unit interpolation steps correspond to gridlines, + # in which case we force interpolation (to defeat PolarTransform's + # autoconversion to circular arcs). + if path._interpolation_steps > 1: + path = path.interpolated(num_vars) + return Path(self.transform(path.vertices), path.codes) + + class RadarAxes(PolarAxes): + + name = 'radar' + PolarTransform = RadarTransform + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # rotate plot such that the first axis is at the top + self.set_theta_zero_location('N') + + def fill(self, *args, closed=True, **kwargs): + """Override fill so that line is closed by default""" + return super().fill(closed=closed, *args, **kwargs) + + def plot(self, *args, **kwargs): + """Override plot so that line is closed by default""" + lines = super().plot(*args, **kwargs) + for line in lines: + self._close_line(line) + + def _close_line(self, line): + x, y = line.get_data() + if x[0] != x[-1]: + x = np.append(x, x[0]) + y = np.append(y, y[0]) + line.set_data(x, y) + + def set_varlabels(self, labels): + self.set_thetagrids(np.degrees(theta), labels) + + @staticmethod + def _gen_axes_patch(): + # The Axes patch must be centered at (0.5, 0.5) and of radius 0.5 + # in axes coordinates. + if frame == 'circle': + return Circle((0.5, 0.5), 0.5) + elif frame == 'polygon': + return RegularPolygon((0.5, 0.5), num_vars, + radius=.5, edgecolor="k") + else: + raise ValueError("Unknown value for 'frame': %s" % frame) + + def _gen_axes_spines(self): + if frame == 'circle': + return super()._gen_axes_spines() + elif frame == 'polygon': + # spine_type must be 'left'/'right'/'top'/'bottom'/'circle'. + spine = Spine(axes=self, + spine_type='circle', + path=Path.unit_regular_polygon(num_vars)) + # unit_regular_polygon gives a polygon of radius 1 centered at + # (0, 0) but we want a polygon of radius 0.5 centered at (0.5, + # 0.5) in axes coordinates. + spine.set_transform(Affine2D().scale(.5).translate(.5, .5) + + self.transAxes) + return {'polar': spine} + else: + raise ValueError("Unknown value for 'frame': %s" % frame) + + register_projection(RadarAxes) + return theta + + +@set_figure(figsize=(3, 3)) +def radar(data, labels=None, legend_labels=None, **kwargs) -> tuple[plt.Figure, plt.Axes]: + """ + Creates a radar chart based on the provided data. + + Parameters + ---------- + data : list of list + A 2D list where each inner list represents a factor, and each element + within the inner lists represents a value for a species. + Shape: (n_factors, n_species) + Example: [[0.88, 0.01, 0.03, ...], [0.07, 0.95, 0.04, ...], ...] + + labels : list, optional + A list of strings representing the names of species (variables). + If provided, it should have the same length as the number of elements + in each inner list of `data`. + Example: ['Sulfate', 'Nitrate', 'EC', 'OC1', 'OC2', 'OP', 'CO', 'O3'] + + legend_labels : list, optional + A list of strings for labeling each factor in the legend. + If provided, it should have the same length as the number of inner lists in `data`. + + **kwargs : dict + Additional keyword arguments to be passed to the plotting function. + This may include 'title' for setting the chart title. + + Returns + ------- + tuple[plt.Figure, plt.Axes] + A tuple containing the Figure and Axes objects of the created plot. + + Example + ------- + >>> data = [[0.88, 0.01, 0.03, 0.03, 0.00, 0.06, 0.01, 0.00], + >>> [0.07, 0.95, 0.04, 0.05, 0.00, 0.02, 0.01, 0.00], + >>> [0.01, 0.02, 0.85, 0.19, 0.05, 0.10, 0.00, 0.00], + >>> [0.02, 0.01, 0.07, 0.01, 0.21, 0.12, 0.98, 0.00], + >>> [0.01, 0.01, 0.02, 0.71, 0.74, 0.70, 0.30, 0.20]] + >>> labels = ['Sulfate', 'Nitrate', 'EC', 'OC1', 'OC2', 'OP', 'CO', 'O3'] + >>> fig, ax = radar(data, labels=labels, title='Basecase') + + Note + ---- + The first dimension of `data` represents each factor, while the second + dimension represents each species. + """ + theta = radar_factory(np.array(data).shape[1], frame='polygon') + + fig, ax = plt.subplots(subplot_kw=dict(projection='radar')) + fig.subplots_adjust(wspace=0.25, hspace=0.20, top=0.80, bottom=0.05, right=0.80) + + colors = ['b', 'r', 'g', 'm', 'y'] + + # Plot the four cases from the example data on separate axes + for d, color in zip(data, colors): + ax.plot(theta, d, color=color) + ax.fill(theta, d, facecolor=color, alpha=0.25, label='_nolegend_') + + ax.set_varlabels(labels) + ax.set_rgrids([0.2, 0.4, 0.6, 0.8]) + ax.set(title=kwargs.get('title', '')) + + # add legend relative to top-left plot + legend_labels = legend_labels or ('Factor 1', 'Factor 2', 'Factor 3', 'Factor 4', 'Factor 5') + legend = ax.legend(legend_labels, loc=(0.95, 0.95), labelspacing=0.1) + + plt.show() + + return fig, ax + + +if __name__ == '__main__': + data = [[0.88, 0.01, 0.03, 0.03, 0.00, 0.06, 0.01, 0.00], + [0.07, 0.95, 0.04, 0.05, 0.00, 0.02, 0.01, 0.00], + [0.01, 0.02, 0.85, 0.19, 0.05, 0.10, 0.00, 0.00], + [0.02, 0.01, 0.07, 0.01, 0.21, 0.12, 0.98, 0.00], + [0.01, 0.01, 0.02, 0.71, 0.74, 0.70, 0.30, 0.20]] + + fig, ax = radar(data=data, labels=['Sulfate', 'Nitrate', 'EC', 'OC1', 'OC2', 'OP', 'CO', 'O3'], title='Basecase') diff --git a/AeroViz/plot/regression.py b/AeroViz/plot/regression.py new file mode 100644 index 0000000..1d4a672 --- /dev/null +++ b/AeroViz/plot/regression.py @@ -0,0 +1,196 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from matplotlib.pyplot import Figure, Axes + +from AeroViz.plot.utils import * + +__all__ = [ + 'linear_regression', + 'multiple_linear_regression', +] + + +@set_figure +def linear_regression(df: pd.DataFrame, + x: str | list[str], + y: str | list[str], + labels: str | list[str] = None, + ax: Axes | None = None, + diagonal=False, + positive: bool = True, + fit_intercept: bool = True, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Create a scatter plot with regression lines for the given data. + + Parameters + ---------- + df : pd.DataFrame + Input DataFrame containing the data. + x : str or list of str + Column name(s) for the x-axis variable(s). If a list, only the first element is used. + y : str or list of str + Column name(s) for the y-axis variable(s). + labels : str or list of str, optional + Labels for the y-axis variable(s). If None, column names are used as labels. Default is None. + ax : Axes, optional + Matplotlib Axes object to use for the plot. If None, a new subplot is created. Default is None. + diagonal : bool, optional + If True, a diagonal line (1:1 line) is added to the plot. Default is False. + positive : bool, optional + Whether to constrain the regression coefficients to be positive. Default is True. + fit_intercept: bool, optional + Whether to calculate the intercept for this model. Default is True. + **kwargs + Additional keyword arguments for plot customization. + + Returns + ------- + fig : Figure + The matplotlib Figure object. + ax : Axes + The matplotlib Axes object with the scatter plot. + + Notes + ----- + - The function creates a scatter plot with optional regression lines. + - The regression line is fitted for each y variable. + - Customization options are provided via **kwargs. + + Example + ------- + >>> linear_regression(df, x='X', y=['Y1', 'Y2'], labels=['Label1', 'Label2'], + ... diagonal=True, xlim=(0, 10), ylim=(0, 20), + ... xlabel="X-axis", ylabel="Y-axis", title="Scatter Plot with Regressions") + """ + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + + if not isinstance(x, str): + x = x[0] + + if not isinstance(y, list): + y = [y] + + if labels is None: + labels = y + + df = df.dropna(subset=[x, *y]) + x_array = df[[x]].to_numpy() + + color_cycle = Color.linecolor + + handles, text_list = [], [] + + for i, y_var in enumerate(y): + y_array = df[[y_var]].to_numpy() + + color = color_cycle[i % len(color_cycle)] + + scatter = ax.scatter(x_array, y_array, s=25, color=color['face'], edgecolors=color['edge'], alpha=0.8, + label=labels[i]) + handles.append(scatter) + + text, y_predict, slope = linear_regression_base(x_array, y_array, + columns=labels[i], + positive=positive, + fit_intercept=fit_intercept) + + text_list.append(f'{labels[i]}:\n{text}') + plt.plot(x_array, y_predict, linewidth=3, color=color['line'], alpha=1, zorder=3) + + ax.set(xlim=kwargs.get('xlim'), ylim=kwargs.get('ylim'), xlabel=Unit(x), ylabel=Unit(y[0]), + title=kwargs.get('title')) + + # Add regression info to the legend + leg = plt.legend(handles=handles, labels=text_list, loc='upper left', prop={'weight': 'bold'}) + + for text, color in zip(leg.get_texts(), [color['line'] for color in color_cycle]): + text.set_color(color) + + if diagonal: + ax.axline((0, 0), slope=1., color='k', lw=2, ls='--', alpha=0.5, label='1:1') + plt.text(0.97, 0.97, r'$\bf 1:1\ Line$', color='k', ha='right', va='top', transform=ax.transAxes) + + plt.show() + + return fig, ax + + +@set_figure +def multiple_linear_regression(df: pd.DataFrame, + x: str | list[str], + y: str | list[str], + labels: str | list[str] = None, + ax: Axes | None = None, + diagonal=False, + positive: bool = True, + fit_intercept: bool = True, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Perform multiple linear regression analysis and plot the results. + + Parameters + ---------- + df : pd.DataFrame + Input DataFrame containing the data. + x : str or list of str + Column name(s) for the independent variable(s). Can be a single string or a list of strings. + y : str or list of str + Column name(s) for the dependent variable(s). Can be a single string or a list of strings. + labels : str or list of str, optional + Labels for the dependent variable(s). If None, column names are used as labels. Default is None. + ax : Axes, optional + Matplotlib Axes object to use for the plot. If None, a new subplot is created. Default is None. + diagonal : bool, optional + Whether to include a diagonal line (1:1 line) in the plot. Default is False. + positive : bool, optional + Whether to constrain the regression coefficients to be positive. Default is True. + fit_intercept: bool, optional + Whether to calculate the intercept for this model. Default is True. + **kwargs + Additional keyword arguments for plot customization. + + Returns + ------- + tuple[Figure, Axes] + The Figure and Axes containing the regression plot. + + Notes + ----- + This function performs multiple linear regression analysis using the input DataFrame. + It supports multiple independent variables and can plot the regression results. + + Example + ------- + >>> multiple_linear_regression(df, x=['X1', 'X2'], y='Y', labels=['Y1', 'Y2'], + ... diagonal=True, fit_intercept=True, + ... xlabel="X-axis", ylabel="Y-axis", title="Multiple Linear Regression Plot") + """ + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + + if not isinstance(x, list): + x = [x] + + if not isinstance(y, str): + y = y[0] + + if labels is None: + labels = x + + df = df[[*x, y]].dropna() + x_array = df[[*x]].to_numpy() + y_array = df[[y]].to_numpy() + + text, y_predict, coefficients = linear_regression_base(x_array, y_array, + columns=labels, + positive=positive, + fit_intercept=fit_intercept) + + df = pd.DataFrame(np.concatenate([y_array, y_predict], axis=1), columns=['y_actual', 'y_predict']) + + linear_regression(df, x='y_actual', y='y_predict', ax=ax, regression=True, diagonal=diagonal) + + return fig, ax diff --git a/AeroViz/plot/scatter.py b/AeroViz/plot/scatter.py new file mode 100644 index 0000000..a32d1e5 --- /dev/null +++ b/AeroViz/plot/scatter.py @@ -0,0 +1,174 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +from matplotlib.colors import Normalize +from matplotlib.pyplot import Figure, Axes +from matplotlib.ticker import ScalarFormatter + +from AeroViz.plot.utils import * + +__all__ = ['scatter'] + + +def check_empty(*arrays): + for i, arr in enumerate(arrays): + if arr.size == 0: + raise ValueError(f"Array is empty!") + + +@set_figure +def scatter(df: pd.DataFrame, + x: str, + y: str, + c: str | None = None, + color: str | None = '#7a97c9', + s: str | None = None, + cmap='jet', + regression=False, + regression_line_color: str | None = sns.xkcd_rgb["denim blue"], + diagonal=False, + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Creates a scatter plot with optional color and size encoding. + + Parameters + ---------- + df : pd.DataFrame + The DataFrame containing the data to plot. + x : str + The column name for the x-axis values. + y : str + The column name for the y-axis values. + c : str, optional + The column name for c encoding. Default is None. + color : str, optional + The column name for color encoding. Default is None. + s : str, optional + The column name for size encoding. Default is None. + cmap : str, optional + The colormap to use for the color encoding. Default is 'jet'. + regression : bool, optional + If True, fits and plots a linear regression line. Default is False. + regression_line_color : str, optional + The color of the regression line. Default is 'sns.xkcd_rgb["denim blue"]'. + diagonal : bool, optional + If True, plots a 1:1 diagonal line. Default is False. + ax : Axes, optional + The matplotlib Axes to plot on. If not provided, a new figure and axes are created. + **kwargs : Any + Additional keyword arguments passed to customize the plot, such as `fig_kws` for figure creation and `xlabel`, + `ylabel`, `xlim`, `ylim`, `title` for axis labeling and limits. + + Returns + ------- + fig : Figure + The matplotlib Figure object. + ax : Axes + The matplotlib Axes object with the scatter plot. + + Notes + ----- + - If both `c` and `s` are provided, the scatter plot will encode data points using both color and size. + - If only `c` is provided, data points will be color-coded according to the values in the `c` column. + - If only `s` is provided, data points will be sized according to the values in the `s` column. + - If neither `c` nor `s` is provided, a basic scatter plot is created. + - The `regression` option will add a linear regression line and display the equation on the plot. + - The `diagonal` option will add a 1:1 reference line to the plot. + + Examples + -------- + >>> import pandas as pd + >>> from AeroViz.plot import scatter + >>> df = pd.DataFrame({ + >>> 'x': [1, 2, 3, 4], + >>> 'y': [1.1, 2.0, 2.9, 4.1], + >>> 'color': [10, 20, 30, 40], + >>> 'size': [100, 200, 300, 400] + >>> }) + >>> fig, ax = scatter(df, x='x', y='y', c='color', s='size', regression=True, diagonal=True) + """ + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + + if c is not None and s is not None: + df_ = df.dropna(subset=[x, y, c, s]).copy() + x_data, y_data, c_data, s_data = df_[x].to_numpy(), df_[y].to_numpy(), df_[c].to_numpy(), df_[s].to_numpy() + check_empty(x_data, y_data, c_data, s_data) + + scatter = ax.scatter(x_data, y_data, c=c_data, + norm=Normalize(vmin=np.percentile(c_data, 10), vmax=np.percentile(c_data, 90)), + cmap=cmap, s=50 * (s_data / s_data.max()) ** 1.5, alpha=0.7, edgecolors=None) + colorbar = True + + dot = np.linspace(s_data.min(), s_data.max(), 6).round(-1) + + for dott in dot[1:-1]: + plt.scatter([], [], c='k', alpha=0.8, s=50 * (dott / s_data.max()) ** 1.5, label='{:.0f}'.format(dott)) + + plt.legend(title=Unit(s)) + + elif c is not None: + df_ = df.dropna(subset=[x, y, c]).copy() + x_data, y_data, c_data = df_[x].to_numpy(), df_[y].to_numpy(), df_[c].to_numpy() + check_empty(x_data, y_data, c_data) + + scatter = ax.scatter(x_data, y_data, c=c_data, vmin=c_data.min(), vmax=np.percentile(c_data, 90), cmap=cmap, + alpha=0.7, + edgecolors=None) + colorbar = True + + elif s is not None: + df_ = df.dropna(subset=[x, y, s]).copy() + x_data, y_data, s_data = df_[x].to_numpy(), df_[y].to_numpy(), df_[s].to_numpy() + check_empty(x_data, y_data, s_data) + + scatter = ax.scatter(x_data, y_data, s=50 * (s_data / s_data.max()) ** 1.5, color=color, alpha=0.5, + edgecolors='white') + colorbar = False + + # dealing + dot = np.linspace(s_data.min(), s_data.max(), 6).round(-1) + + for dott in dot[1:-1]: + plt.scatter([], [], c='k', alpha=0.8, s=50 * (dott / s_data.max()) ** 1.5, label='{:.0f}'.format(dott)) + + plt.legend(title=Unit(s)) + + else: + df_ = df.dropna(subset=[x, y]).copy() + x_data, y_data = df_[x].to_numpy(), df_[y].to_numpy() + check_empty(x_data, y_data) + + scatter = ax.scatter(x_data, y_data, s=30, color=color, alpha=0.5, edgecolors='white') + colorbar = False + + ax.set(xlim=kwargs.get('xlim', (x_data.min(), x_data.max())), + ylim=kwargs.get('ylim', (y_data.min(), y_data.max())), + xlabel=kwargs.get('xlabel', Unit(x)), + ylabel=kwargs.get('ylabel', Unit(y)), + title=kwargs.get('title', '')) + + ax.xaxis.set_major_formatter(ScalarFormatter()) + ax.yaxis.set_major_formatter(ScalarFormatter()) + + if colorbar: + plt.colorbar(scatter, extend='both', label=Unit(c)) + + if regression: + text, y_predict, slope = linear_regression_base(x_data, y_data) + ax.plot(x_data, y_predict, linewidth=3, color=regression_line_color, alpha=1, zorder=3) + plt.text(0.05, 0.95, text, fontdict={'weight': 'bold'}, color=regression_line_color, + ha='left', va='top', transform=ax.transAxes) + + if diagonal: + ax.axline((0, 0), slope=1., color='k', lw=2, ls='--', alpha=0.5, label='1:1') + + data_range = min(ax.get_xlim()[1] - ax.get_xlim()[0], ax.get_ylim()[1] - ax.get_ylim()[0]) + plt.text(0.9 * data_range, 0.9 * data_range, r'$\bf 1:1\ Line$', color='k', ha='left', va='bottom', + bbox=dict(facecolor='white', edgecolor='none', alpha=0.1, pad=3)) + + plt.show() + + return fig, ax diff --git a/AeroViz/plot/templates/__init__.py b/AeroViz/plot/templates/__init__.py index 91682fe..0b8042b 100644 --- a/AeroViz/plot/templates/__init__.py +++ b/AeroViz/plot/templates/__init__.py @@ -1,8 +1,6 @@ +from .ammonium_rich import ammonium_rich from .contour import * from .corr_matrix import corr_matrix from .diurnal_pattern import * from .koschmieder import * -from .metal_heatmap import metal_heatmaps, process_data -from .regression import * -from .scatter import * -from .templates import * +from .metal_heatmap import metal_heatmaps, process_data_with_two_df diff --git a/AeroViz/plot/templates/ammonium_rich.py b/AeroViz/plot/templates/ammonium_rich.py new file mode 100644 index 0000000..3855a67 --- /dev/null +++ b/AeroViz/plot/templates/ammonium_rich.py @@ -0,0 +1,34 @@ +import matplotlib.pyplot as plt +from matplotlib.pyplot import Figure, Axes +from pandas import DataFrame + +from AeroViz.plot.utils import set_figure, Unit + + +@set_figure(figsize=(5, 4)) +def ammonium_rich(df: DataFrame, + **kwargs + ) -> tuple[Figure, Axes]: + df = df[['NH4+', 'SO42-', 'NO3-', 'PM2.5']].dropna().copy().div([18, 96, 62, 1]) + df['required_ammonium'] = df['NO3-'] + 2 * df['SO42-'] + + fig, ax = plt.subplots() + + scatter = ax.scatter(df['required_ammonium'].to_numpy(), df['NH4+'].to_numpy(), c=df['PM2.5'].to_numpy(), + vmin=0, vmax=70, cmap='jet', marker='o', s=10, alpha=1) + + ax.axline((0, 0), slope=1., color='k', lw=2, ls='--', alpha=0.5, label='1:1') + plt.text(0.97, 0.97, r'$\bf 1:1\ Line$', color='k', ha='right', va='top', transform=ax.transAxes) + + ax.set(xlim=(0, 1.2), + ylim=(0, 1.2), + xlabel=r'$\bf NO_{3}^{-}\ +\ 2\ \times\ SO_{4}^{2-}\ (mole\ m^{-3})$', + ylabel=r'$\bf NH_{4}^{+}\ (mole\ m^{-3})$', + title=kwargs.get('title', '')) + + color_bar = plt.colorbar(scatter, label=Unit('PM2.5'), extend='both') + + # fig.savefig(f'Ammonium_rich_{title}') + plt.show() + + return fig, ax diff --git a/AeroViz/plot/templates/contour.py b/AeroViz/plot/templates/contour.py index 5520eb9..8354392 100644 --- a/AeroViz/plot/templates/contour.py +++ b/AeroViz/plot/templates/contour.py @@ -10,38 +10,38 @@ @set_figure def contour(df, ax: Axes | None = None, **kwargs) -> tuple[Figure, Axes]: - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - npoints = 1000 - xreg = np.linspace(df.PM25.min(), df.PM25.max(), 83) - yreg = np.linspace(df.gRH.min(), df.gRH.max(), 34) - X, Y = np.meshgrid(xreg, yreg) + npoints = 1000 + xreg = np.linspace(df.PM25.min(), df.PM25.max(), 83) + yreg = np.linspace(df.gRH.min(), df.gRH.max(), 34) + X, Y = np.meshgrid(xreg, yreg) - d_f = df.copy() - df['gRH'] = d_f['gRH'].round(2) - df['PM25'] = d_f['PM25'].round(2) + d_f = df.copy() + df['gRH'] = d_f['gRH'].round(2) + df['PM25'] = d_f['PM25'].round(2) - def func(data, *params): - return params[0] * data ** (params[1]) + def func(data, *params): + return params[0] * data ** (params[1]) - initial_guess = [1.0, 1.0] + initial_guess = [1.0, 1.0] - fit_df = df[['PM25', 'gRH', 'Extinction']].dropna() - popt, pcov = curve_fit(func, xdata=(fit_df['PM25'] * fit_df['gRH']), ydata=fit_df['Extinction'], p0=initial_guess, - maxfev=2000000, method='trf') + fit_df = df[['PM25', 'gRH', 'Extinction']].dropna() + popt, pcov = curve_fit(func, xdata=(fit_df['PM25'] * fit_df['gRH']), ydata=fit_df['Extinction'], p0=initial_guess, + maxfev=2000000, method='trf') - x, y = df.PM25, df.gRH + x, y = df.PM25, df.gRH - # pcolor = ax.pcolormesh(X, Y, (X * 4.5 * Y ** (1 / 3)), cmap='jet', shading='auto', vmin=0, vmax=843, alpha=0.8) - Z = func(X * Y, *popt) - cont = ax.contour(X, Y, Z, colors='black', levels=5, vmin=0, vmax=Z.max()) - conf = ax.contourf(X, Y, Z, cmap='YlGnBu', levels=100, vmin=0, vmax=Z.max()) - ax.clabel(cont, colors=['black'], fmt=lambda s: f"{s:.0f} 1/Mm") - ax.set(xlabel=Unit('PM25'), ylabel=Unit('gRH'), xlim=(x.min(), x.max()), ylim=(y.min(), y.max())) + # pcolor = ax.pcolormesh(X, Y, (X * 4.5 * Y ** (1 / 3)), cmap='jet', shading='auto', vmin=0, vmax=843, alpha=0.8) + Z = func(X * Y, *popt) + cont = ax.contour(X, Y, Z, colors='black', levels=5, vmin=0, vmax=Z.max()) + conf = ax.contourf(X, Y, Z, cmap='YlGnBu', levels=100, vmin=0, vmax=Z.max()) + ax.clabel(cont, colors=['black'], fmt=lambda s: f"{s:.0f} 1/Mm") + ax.set(xlabel=Unit('PM25'), ylabel=Unit('gRH'), xlim=(x.min(), x.max()), ylim=(y.min(), y.max())) - color_bar = plt.colorbar(conf, pad=0.02, fraction=0.05, label='Extinction (1/Mm)') - color_bar.ax.set_xticklabels(color_bar.ax.get_xticks().astype(int)) + color_bar = plt.colorbar(conf, pad=0.02, fraction=0.05, label='Extinction (1/Mm)') + color_bar.ax.set_xticklabels(color_bar.ax.get_xticks().astype(int)) - plt.show() + plt.show() - return fig, ax + return fig, ax diff --git a/AeroViz/plot/templates/corr_matrix.py b/AeroViz/plot/templates/corr_matrix.py index 20855ee..fb2d929 100644 --- a/AeroViz/plot/templates/corr_matrix.py +++ b/AeroViz/plot/templates/corr_matrix.py @@ -12,97 +12,90 @@ __all__ = ['corr_matrix'] -@set_figure(fs=8) +@set_figure def corr_matrix(data: pd.DataFrame, - cmap: str = "RdBu", - ax: Axes | None = None, - **kwargs) -> tuple[Figure, Axes]: - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - - columns = ['Extinction', 'Scattering', 'Absorption', 'PM1', 'PM25', 'PM10', 'PBLH', 'VC', - 'AT', 'RH', 'WS', 'NO', 'NO2', 'NOx', 'O3', 'Benzene', 'Toluene', - 'SO2', 'CO', 'THC', 'CH4', 'NMHC', 'NH3', 'HCl', 'HNO2', 'HNO3', - 'Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'Cl-', 'NO2-', 'NO3-', 'SO42-', ] - - df = data[columns] - - _corr = df.corr() - corr = pd.melt(_corr.reset_index(), - id_vars='index') # Unpivot the dataframe, so we can get pair of arrays for x and y - corr.columns = ['x', 'y', 'value'] - - p_values = _corr.apply(lambda col1: _corr.apply(lambda col2: pearsonr(col1, col2)[1])) - p_values = p_values.mask(p_values > 0.05) - p_values = pd.melt(p_values.reset_index(), id_vars='index').dropna() - p_values.columns = ['x', 'y', 'value'] - - # Mapping from column names to integer coordinates - x_labels = [v for v in sorted(corr['x'].unique())] - y_labels = [v for v in sorted(corr['y'].unique())] - x_to_num = {p[1]: p[0] for p in enumerate(x_labels)} - y_to_num = {p[1]: p[0] for p in enumerate(y_labels)} - - # Show column labels on the axes - ax.set_xticks([x_to_num[v] for v in x_labels]) - ax.set_xticklabels(x_labels, rotation=90, horizontalalignment='center') - ax.set_yticks([y_to_num[v] for v in y_labels]) - ax.set_yticklabels(y_labels) - - # ax.tick_params(axis='both', which='major', direction='out', top=True, left=True) - - ax.grid(False, 'major') - ax.grid(True, 'minor') - ax.set_xticks([t + 0.5 for t in ax.get_xticks()], minor=True) - ax.set_yticks([t + 0.5 for t in ax.get_yticks()], minor=True) - - ax.set_xlim([-0.5, max([v for v in x_to_num.values()]) + 0.5]) - ax.set_ylim([-0.5, max([v for v in y_to_num.values()]) + 0.5]) - - n_colors = 256 # Use 256 colors for the diverging color palette - palette = sns.color_palette(cmap, n_colors=n_colors) # Create the palette - - # Range of values that will be mapped to the palette, i.e. min and max possible correlation - color_min, color_max = [-1, 1] - - def value_to_color(val): - val_position = float((val - color_min)) / (color_max - color_min) - ind = int(val_position * (n_colors - 1)) # target index in the color palette - return palette[ind] - - point = ax.scatter( - x=corr['x'].map(x_to_num), - y=corr['y'].map(y_to_num), - s=corr['value'].abs() * 70, - c=corr['value'].apply(value_to_color), # Vector of square color values, mapped to color palette - marker='s', - label='$R^{2}$' - ) - - axes_image = plt.cm.ScalarMappable(cmap=colormaps[cmap]) - - cax = inset_axes(ax, width="5%", - height="100%", - loc='lower left', - bbox_to_anchor=(1.02, 0., 1, 1), - bbox_transform=ax.transAxes, - borderpad=0) - - cbar = plt.colorbar(mappable=axes_image, cax=cax, label=r'$R^{2}$') - - cbar.set_ticks([0, 0.25, 0.5, 0.75, 1]) - cbar.set_ticklabels(np.linspace(-1, 1, 5)) - - point2 = ax.scatter( - x=p_values['x'].map(x_to_num), - y=p_values['y'].map(y_to_num), - s=10, - marker='*', - color='k', - label='p < 0.05' - ) - - ax.legend(handles=[point2], labels=['p < 0.05'], bbox_to_anchor=(0.05, 1, 0.1, 0.05)) - - plt.show() - - return fig, ax + cmap: str = "RdBu", + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + + _corr = data.corr() + corr = pd.melt(_corr.reset_index(), id_vars='index') + corr.columns = ['x', 'y', 'value'] + + p_values = _corr.apply(lambda col1: _corr.apply(lambda col2: pearsonr(col1, col2)[1])) + p_values = p_values.mask(p_values > 0.05) + p_values = pd.melt(p_values.reset_index(), id_vars='index').dropna() + p_values.columns = ['x', 'y', 'value'] + + # Mapping from column names to integer coordinates + x_labels = [v for v in sorted(corr['x'].unique())] + y_labels = [v for v in sorted(corr['y'].unique())] + x_to_num = {p[1]: p[0] for p in enumerate(x_labels)} + y_to_num = {p[1]: p[0] for p in enumerate(y_labels)} + + # Show column labels on the axes + ax.set_xticks([x_to_num[v] for v in x_labels]) + ax.set_xticklabels(x_labels, rotation=90, horizontalalignment='center') + ax.set_yticks([y_to_num[v] for v in y_labels]) + ax.set_yticklabels(y_labels) + + # ax.tick_params(axis='both', which='major', direction='out', top=True, left=True) + + ax.grid(False, 'major') + ax.grid(True, 'minor') + ax.set_xticks([t + 0.5 for t in ax.get_xticks()], minor=True) + ax.set_yticks([t + 0.5 for t in ax.get_yticks()], minor=True) + + ax.set_xlim([-0.5, max([v for v in x_to_num.values()]) + 0.5]) + ax.set_ylim([-0.5, max([v for v in y_to_num.values()]) + 0.5]) + + n_colors = 256 # Use 256 colors for the diverging color palette + palette = sns.color_palette(cmap, n_colors=n_colors) # Create the palette + + # Range of values that will be mapped to the palette, i.e. min and max possible correlation + color_min, color_max = [-1, 1] + + def value_to_color(val): + val_position = float((val - color_min)) / (color_max - color_min) + ind = int(val_position * (n_colors - 1)) # target index in the color palette + return palette[ind] + + point = ax.scatter( + x=corr['x'].map(x_to_num), + y=corr['y'].map(y_to_num), + s=corr['value'].abs() * 70, + c=corr['value'].apply(value_to_color), # Vector of square color values, mapped to color palette + marker='s', + label='$R^{2}$' + ) + + axes_image = plt.cm.ScalarMappable(cmap=colormaps[cmap]) + + cax = inset_axes(ax, width="5%", + height="100%", + loc='lower left', + bbox_to_anchor=(1.02, 0., 1, 1), + bbox_transform=ax.transAxes, + borderpad=0) + + cbar = plt.colorbar(mappable=axes_image, cax=cax, label=r'$R^{2}$') + + cbar.set_ticks([0, 0.25, 0.5, 0.75, 1]) + cbar.set_ticklabels(np.linspace(-1, 1, 5)) + + point2 = ax.scatter( + x=p_values['x'].map(x_to_num), + y=p_values['y'].map(y_to_num), + s=10, + marker='*', + color='k', + label='p < 0.05' + ) + + ax.legend(handles=[point2], labels=['p < 0.05'], bbox_to_anchor=(0.05, 1, 0.1, 0.05)) + + plt.show() + + return fig, ax diff --git a/AeroViz/plot/templates/diurnal_pattern.py b/AeroViz/plot/templates/diurnal_pattern.py index 6c4a7fb..de214eb 100644 --- a/AeroViz/plot/templates/diurnal_pattern.py +++ b/AeroViz/plot/templates/diurnal_pattern.py @@ -1,42 +1,61 @@ import matplotlib.pyplot as plt -import pandas as pd from matplotlib.pyplot import Figure, Axes from matplotlib.ticker import AutoMinorLocator +from pandas import DataFrame from AeroViz.plot.utils import * __all__ = ['diurnal_pattern'] -@set_figure(figsize=(4, 4), fs=8) -def diurnal_pattern(data_set: pd.DataFrame, - data_std: pd.DataFrame, - y: str | list[str], - std_area=0.5, - ax: Axes | None = None, - **kwargs) -> tuple[Figure, Axes]: - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - - Hour = range(0, 24) - - mean = data_set[y] - std = data_std[y] * std_area - - # Plot Diurnal pattern - ax.plot(Hour, mean, 'blue') - ax.fill_between(Hour, y1=mean + std, y2=mean - std, alpha=0.5, color='blue', edgecolor=None) - - ax.set(xlabel=kwargs.get('xlabel', 'Hours'), - ylabel=kwargs.get('ylabel', Unit(y)), - xlim=kwargs.get('xlim', (0, 23)), - ylim=kwargs.get('ylim', (None, None)), - xticks=kwargs.get('xticks', [0, 4, 8, 12, 16, 20])) - - ax.tick_params(axis='both', which='major') - ax.tick_params(axis='x', which='minor') - ax.xaxis.set_minor_locator(AutoMinorLocator()) - ax.ticklabel_format(axis='y', style='sci', scilimits=(-2, 3), useMathText=True) - - plt.show() - - return fig, ax +@set_figure +def diurnal_pattern(df: DataFrame, + y: str | list[str], + std_area: float = 0.5, + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + if 'hour' not in df.columns and 'Hour' not in df.columns: + df['Hour'] = df.index.hour + + Hour = range(0, 24) + mean = df.groupby('Hour')[y].mean() + std = df.groupby('Hour')[y].std() * std_area + + fig, ax = plt.subplots() if ax is None else (ax.get_figure(), ax) + + # Plot Diurnal pattern + ax.plot(Hour, mean, 'blue', zorder=3) + ax.fill_between(Hour, y1=mean + std, y2=mean - std, alpha=0.2, color='blue', edgecolor=None, zorder=2) + + # Plot Boxplot for each hour + bp = ax.boxplot([df[df['Hour'] == h][y].dropna() for h in Hour], + positions=Hour, + widths=0.5, + patch_artist=True, + showfliers=False, + zorder=1) + + # Customize boxplot colors + for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']: + plt.setp(bp[element], color='gray') + + for patch in bp['boxes']: + patch.set(facecolor='lightgray', alpha=0.5) + + ax.set(xlabel=kwargs.get('xlabel', 'Hours'), + ylabel=kwargs.get('ylabel', Unit(y)), + xlim=kwargs.get('xlim', (-0.5, 23.5)), + ylim=kwargs.get('ylim', (None, None)), + xticks=kwargs.get('xticks', range(0, 24, 4)), + xticklabels=kwargs.get('xticklabels', range(0, 24, 4))) + + ax.tick_params(axis='both', which='major') + ax.tick_params(axis='x', which='minor') + ax.xaxis.set_minor_locator(AutoMinorLocator()) + ax.ticklabel_format(axis='y', style='sci', scilimits=(-2, 3), useMathText=True) + + plt.tight_layout() + plt.show() + + return fig, ax \ No newline at end of file diff --git a/AeroViz/plot/templates/event_evolution.py b/AeroViz/plot/templates/event_evolution.py deleted file mode 100644 index 0d700ed..0000000 --- a/AeroViz/plot/templates/event_evolution.py +++ /dev/null @@ -1,65 +0,0 @@ -from os.path import join as pth - -import matplotlib.pyplot as plt -import numpy as np -from pandas import date_range, read_csv - -from AeroViz.plot.utils import * - -# TODO: - -# read csv file -blh = read_csv(pth('事件分析.csv'), parse_dates=['Time'], index_col='Time') - - -@set_figure(figsize=(12, 5)) -def event_evolution(_df, **kwargs): - print(f'Plot : {_df.month[0]}') - - st_tm, fn_tm = _df.index[0], _df.index[-1] - tick_time = date_range(st_tm, fn_tm, freq='1d') # set tick - - # seperate day and night - _df_day = _df.between_time('6:00', '17:00').reindex(date_range(st_tm, fn_tm, freq='1h')) - _df_night = _df.between_time('18:00', '5:00').reindex(date_range(st_tm, fn_tm, freq='1h')) - - ## plot - fig, ax = plt.subplots() - - ## plot background - shade_value, _ = np.meshgrid(_df['PM2.5'], np.arange((1., 2500), 100)) - ax.pcolormesh(_df.index, np.arange((1., 2500), 100), shade_value, cmap='binary', vmin=0, vmax=300, - shading='auto') - - ## plot day and night - ld = ax.scatter(_df.index[0:], _df_day['Ext'], s=50, c='#73b9ff', label='Day Ext', marker='o', alpha=.7) - ln = ax.scatter(_df.index[0:], _df_night['Ext'], s=50, c='#00238c', label='Night Ext', marker='o', alpha=.7) - - ax2 = ax.twinx() - # ld, = ax2.plot(_df_day['VC'],c='#FF9797',label='day 06:00~18:00') - # ln, = ax2.plot(_df_night['VC'],c='#FF0000',label='night 18:00~06:00') - ld2 = ax2.scatter(_df.index, _df_day['VC'], s=50, c='#FF9797', label='Day VC', marker='o', alpha=.5) - ln2 = ax2.scatter(_df.index, _df_night['VC'], s=50, c='#FF0000', label='Night VC', marker='o', alpha=.5) - - # add legend on the first axes - ax.legend(handles=[ld, ln, ld2, ln2], framealpha=0, prop={'weight': 'bold'}, loc='upper left') - - # add xlabel, ylabel, suptitle - ax.set(xlabel='Date', - ylabel='Ext (1/Mm)', - xlim=(st_tm, fn_tm), - ylim=(1., 600), - xticks=tick_time, - xticklabels=[_tm.strftime("%F %H:00") for _tm in tick_time]) - - ax2.set(ylabel=r'$VC (m^{2}/s)$', - ylim=(1., 2500)) - - fig.suptitle(f'Event evolution ({st_tm.strftime("%F")}_{fn_tm.strftime("%F")})') - - # save figure - fig.savefig(pth(f"event_evolution_{st_tm.strftime("%F")}_{fn_tm.strftime("%F")}")) - - -if __name__ == '__main__': - event_evolution(blh) diff --git a/AeroViz/plot/templates/koschmieder.py b/AeroViz/plot/templates/koschmieder.py index e0f4fc7..256e107 100644 --- a/AeroViz/plot/templates/koschmieder.py +++ b/AeroViz/plot/templates/koschmieder.py @@ -1,4 +1,3 @@ -from typing import Literal import matplotlib.pyplot as plt import numpy as np @@ -11,146 +10,86 @@ __all__ = ['koschmieder'] -@set_figure(fs=12) +@set_figure(figsize=(2.4, 3)) def koschmieder(df: pd.DataFrame, - y: Literal['Vis_Naked', 'Vis_LPV'], - function: Literal['log', 'reciprocal'] = 'log', - ax: Axes | None = None, - **kwargs) -> tuple[Figure, Axes]: - # x = Visibility, y = Extinction, log-log fit!! - def _log_fit(x, y, func=lambda x, a: -x + a): - x_log = np.log(x) - y_log = np.log(y) - - popt, pcov = curve_fit(func, x_log, y_log) + vis: str, + ext: list[str], + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Plot Koschmieder relationship between Visibility and Extinction. - residuals = y_log - func(x_log, *popt) - ss_res = np.sum(residuals ** 2) - ss_total = np.sum((y_log - np.mean(y_log)) ** 2) - r_squared = 1 - (ss_res / ss_total) - print(f'Const_Log = {popt[0].round(3)}') - print(f'Const = {np.exp(popt)[0].round(3)}') - print(f'R^2 = {r_squared.round(3)}') - return np.exp(popt)[0], pcov - - def _reciprocal_fit(x, y, func=lambda x, a, b: a / (x ** b)): - popt, pcov = curve_fit(func, x, y) + x = Visibility, y = Extinction, log-log fit!! + """ + def _log_fit(x, y, func=lambda x, a: -x + a): + x_log, y_log = np.log(x), np.log(y) + popt, pcov = curve_fit(func, x_log, y_log) - residuals = y - func(x, *popt) - ss_res = np.sum(residuals ** 2) - ss_total = np.sum((y - np.mean(y)) ** 2) - r_squared = 1 - (ss_res / ss_total) - print(f'Const = {popt.round(3)}') - print(f' R^2 = {r_squared.round(3)}') - return popt, pcov + return np.exp(popt)[0], pcov - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - _df1 = df[['Extinction', 'ExtinctionByGas', y]].dropna().copy() - _df2 = df[['total_ext_dry', 'ExtinctionByGas', y]].dropna().copy() + boxcolors = ['#a5bf6b', '#3f83bf'] + scattercolor = ['green', 'blue'] + arts = [] + labels = [] - x_data1 = _df1[y] - y_data1 = _df1['Extinction'] + _df1['ExtinctionByGas'] + for i, ext_col in enumerate(ext): + _df = df[[ext_col, vis]].dropna().copy() + x_data = _df[vis] + y_data = _df[ext_col] - x_data2 = _df2[y] - y_data2 = _df2['total_ext_dry'] + _df2['ExtinctionByGas'] + bins = np.linspace(0, 50, 25) + wid = (bins + (bins[1] - bins[0]) / 2)[0:-1] - para_coeff = [] - boxcolors = ['#3f83bf', '#a5bf6b'] + _df[f'{vis}_bins'] = pd.cut(x_data, bins=bins, labels=wid) - for i, (df_, x_data, y_data) in enumerate(zip([_df1, _df2], [x_data1, x_data2], [y_data1, y_data2])): - df_['Total_Ext'] = y_data + grouped = _df.groupby(f'{vis}_bins', observed=False) - if y == 'Vis_Naked': - df_grp = df_.groupby(f'{y}') + vis_labels, vals, median_vals = [], [], [] + for _, subdf in grouped: + if len(subdf[ext_col].dropna()) > 3: + vis_labels.append(subdf[vis].mean()) + vals.append(subdf[ext_col].dropna().values) + median_vals.append(subdf[ext_col].mean()) - vals, median_vals, vis = [], [], [] - for j, (name, subdf) in enumerate(df_grp): - if len(subdf['Total_Ext'].dropna()) > 20: - vis.append('{:.0f}'.format(name)) - vals.append(subdf['Total_Ext'].dropna().values) - median_vals.append(subdf['Total_Ext'].dropna().median()) + plt.boxplot(vals, labels=vis_labels, positions=np.array(vis_labels, dtype='float'), + widths=(bins[1] - bins[0]) / 2.5, + showfliers=False, showmeans=True, meanline=False, patch_artist=True, + boxprops=dict(facecolor=boxcolors[i], alpha=.7), + meanprops=dict(marker='o', markerfacecolor='white', markeredgecolor='k', markersize=4), + medianprops=dict(color='#000000', ls='-')) - plt.boxplot(vals, labels=vis, positions=np.array(vis, dtype='int'), widths=0.4, - showfliers=False, showmeans=True, meanline=False, patch_artist=True, - boxprops=dict(facecolor=boxcolors[i], alpha=.7), - meanprops=dict(marker='o', markerfacecolor='white', markeredgecolor='k', markersize=4), - medianprops=dict(color='#000000', ls='-')) + plt.scatter(x_data, y_data, marker='.', s=10, facecolor='white', edgecolor=boxcolors[i], alpha=0.1) - plt.scatter(x_data, y_data, marker='.', s=10, facecolor='white', edgecolor=boxcolors[i], alpha=0.1) + # fit curve + coeff, _ = _log_fit(np.array(vis_labels, dtype='float'), np.array(median_vals, dtype='float')) - if y == 'Vis_LPV': - bins = np.linspace(0, 70, 36) - wid = (bins + (bins[1] - bins[0]) / 2)[0:-1] + # Plot lines (ref & Measurement) + x_fit = np.linspace(0.1, 50, 1000) - df_[f'{x_data.name}' + '_bins'] = pd.cut(x=x_data, bins=bins, labels=wid) + func = lambda x, a: a / x + line, = ax.plot(x_fit, func(x_fit, coeff), c=scattercolor[i], lw=3, + label=f'Vis (km) = {round(coeff)} / Ext') - grouped = df_.groupby(f'{x_data.name}' + '_bins', observed=False) + arts.append(line) + if 'dry' in ext_col: + labels.append(f'Vis (km) = {round(coeff)} / Ext (dry)') + else: + labels.append(f'Vis (km) = {round(coeff)} / Ext (amb)') - vals, median_vals, vis = [], [], [] - for j, (name, subdf) in enumerate(grouped): - if len(subdf['Total_Ext'].dropna()) > 20: - vis.append('{:.1f}'.format(name)) - vals.append(subdf['Total_Ext'].dropna().values) - median_vals.append(subdf['Total_Ext'].dropna().mean()) + ax.legend(handles=arts, labels=labels, loc='upper right', prop=dict(weight='bold'), bbox_to_anchor=(0.99, 0.99)) - plt.boxplot(vals, labels=vis, positions=np.array(vis, dtype='float'), widths=(bins[1] - bins[0]) / 2.5, - showfliers=False, showmeans=True, meanline=False, patch_artist=True, - boxprops=dict(facecolor=boxcolors[i], alpha=.7), - meanprops=dict(marker='o', markerfacecolor='white', markeredgecolor='k', markersize=4), - medianprops=dict(color='#000000', ls='-')) + ax.set(xlabel=kwargs.get('xlabel', 'Visibility (km)'), + ylabel=kwargs.get('ylabel', 'Extinction (1/Mm)'), + title=kwargs.get('title', 'Koschmieder relationship'), + xlim=kwargs.get('xlim', (0, 30)), + ylim=kwargs.get('ylim', (0, 800)) + ) - plt.scatter(x_data, y_data, marker='.', s=10, facecolor='white', edgecolor=boxcolors[i], alpha=0.1) + plt.xticks(ticks=np.array(range(0, 31, 5)), labels=np.array(range(0, 31, 5))) - # fit curve - _x = np.array(vis, dtype='float') - _y = np.array(median_vals, dtype='float') + plt.show() - if function == 'log': - func = lambda x, a: a / x - coeff, pcov = _log_fit(_x, _y) - - else: - func = lambda x, a, b: a / (x ** b) - coeff, pcov = _reciprocal_fit(_x, _y) - - para_coeff.append(coeff) - - # Plot lines (ref & Measurement) - x_fit = np.linspace(0.1, 70, 1000) - - if function == 'log': - line1, = ax.plot(x_fit, func(x_fit, para_coeff[0]), c='b', lw=3) - line2, = ax.plot(x_fit, func(x_fit, para_coeff[1]), c='g', lw=3) - - labels = ['Vis (km) = ' + f'{round(para_coeff[0])}' + ' / Ext (Dry Extinction)', - 'Vis (km) = ' + f'{round(para_coeff[1])}' + ' / Ext (Amb Extinction)'] - - else: - x_fit = np.linspace(0.1, 70, 1000) - line1, = ax.plot(x_fit, func(x_fit, *para_coeff[0]), c='b', lw=3) - line2, = ax.plot(x_fit, func(x_fit, *para_coeff[1]), c='g', lw=3) - - labels = [f'Ext = ' + '{:.0f} / Vis ^ {:.3f}'.format(*para_coeff[0]) + ' (Dry Extinction)', - f'Ext = ' + '{:.0f} / Vis ^ {:.3f}'.format(*para_coeff[1]) + ' (Amb Extinction)'] - - plt.legend(handles=[line1, line2], labels=labels, loc='upper right', prop=dict(size=10, weight='bold'), - bbox_to_anchor=(0.99, 0.99)) - - plt.xticks(ticks=np.array(range(0, 51, 5)), labels=np.array(range(0, 51, 5))) - plt.xlim(0, 50) - plt.ylim(0, 700) - plt.title(r'$\bf Koschmieder\ relationship$') - plt.xlabel(f'{y} (km)') - plt.ylabel(r'$\bf Extinction\ coefficient\ (1/Mm)$') - - plt.show() - - return fig, ax - - -if __name__ == '__main__': - from AeroViz.tools import DataBase - - koschmieder(DataBase(), 'Vis_LPV', 'log') - # koschmieder(DataBase, 'Vis_Naked', 'reciprocal') + return fig, ax diff --git a/AeroViz/plot/templates/metal_heatmap.py b/AeroViz/plot/templates/metal_heatmap.py index 0293a74..422cfe8 100644 --- a/AeroViz/plot/templates/metal_heatmap.py +++ b/AeroViz/plot/templates/metal_heatmap.py @@ -2,56 +2,154 @@ import numpy as np import seaborn as sns from matplotlib.pyplot import Figure, Axes -from pandas import DataFrame, date_range +from pandas import DataFrame, date_range, concat from sklearn.preprocessing import StandardScaler from AeroViz.plot.utils import * +__all__ = ['metal_heatmaps', 'process_data_with_two_df'] -def process_data(df): - # detected_limit = 0.0001 - df = df.where(df >= 0.0001, np.nan) - # Normalize the data - df = DataFrame(StandardScaler().fit_transform(df), index=df.index, columns=df.columns) - # Remove outliers - df = df[(np.abs(df) < 6)] - # Interpolate the missing values - df = df.interpolate(method='linear') - # Smooth the data - df = df.rolling(window=3, min_periods=1).mean() - return df +def process_data(df, detected_limit=True, outlier_threshold=5, smoothing_window=6, fill_method='MDL'): + # Fill missing values based on the specified method + df = fill_missing_values(df.copy(), method=fill_method) + + # Normalize the data + df = normalize_data(df) + + # Remove outliers + df = remove_outliers(df, threshold=outlier_threshold) + + # Interpolate missing values + df = df.interpolate(method='linear') + + # Smooth the data + df = smooth_data(df, window=smoothing_window) + + return df + + +def process_data_with_two_df(df, df2, outlier_threshold=5, smoothing_window=6, fill_method='MDL'): + # Shift the first DataFrame by 30 minutes + df = df.shift(freq='30min') + + # Fill missing values for both DataFrames + df = fill_missing_values(df.copy(), method=fill_method) + df2 = fill_missing_values(df2.copy(), method=fill_method) + + # Normalize both DataFrames together + df, df2 = normalize_and_split(df, df2) + + # Shift the first DataFrame back by 30 minutes + df = df.shift(freq='-30min') + + # Remove outliers for both DataFrames + df = remove_outliers(df, threshold=outlier_threshold) + df2 = remove_outliers(df2, threshold=outlier_threshold) + + # Interpolate missing values + df = df.interpolate(method='linear') + df2 = df2.interpolate(method='linear') + + # Smooth the data + df = smooth_data(df, window=smoothing_window) + df2 = smooth_data(df2, window=smoothing_window) + + return df, df2 + + +def fill_missing_values(df, method='MDL'): + if method == 'interpolate': + return df.interpolate(method='linear') + else: + return fill_with_mdl(df) + + +def fill_with_mdl(df): + # Minimum detection limit (MDL) dictionary + MDL = { + 'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2, + 'Cl': 1.7, 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6, + 'V': 0.12, 'Cr': 0.12, 'Mn': 0.14, 'Fe': 0.17, + 'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079, 'Zn': 0.067, + 'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081, + 'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28, + 'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, 'Pd': 2.2, + 'Ag': 1.9, 'Cd': 2.5, 'In': 3.1, 'Sn': 4.1, + 'Sb': 5.2, 'Te': 0.6, 'I': 0.49, 'Cs': 0.37, + 'Ba': 0.39, 'La': 0.36, 'Ce': 0.3, 'Pt': 0.12, + 'Au': 0.1, 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, + 'Bi': 0.13 + } + + # Replace values below MDL with 5/6 * MDL + for element, threshold in MDL.items(): + if element in df.columns: + df.loc[:, element] = df[element].where(df[element] >= threshold, 5 / 6 * threshold) + + return df + + +def normalize_data(df): + # Standardize the data (z-score normalization) + return DataFrame(StandardScaler().fit_transform(df), index=df.index, columns=df.columns) + + +def remove_outliers(df, threshold=5): + # Remove rows where any column value exceeds the threshold + return df[(np.abs(df) < threshold)] + + +def smooth_data(df, window=6): + # Apply rolling mean to smooth the data + return df.rolling(window=window, min_periods=1).mean() + + +def normalize_and_split(df, df2): + # Concatenate DataFrames for combined normalization + combined_df = concat([df, df2]) + normalized_combined_df = normalize_data(combined_df) + + # Split the normalized DataFrame back into df and df2 + df = normalized_combined_df.loc[df.index] + df2 = normalized_combined_df.loc[df2.index] + + return df, df2 @set_figure(figsize=(12, 3), fs=6) -def metal_heatmaps(df, major_freq='24h', minor_freq='12h', ax: Axes | None = None, title=None, **kwargs - ) -> tuple[Figure, Axes]: - items = ['Al', 'Zr', 'Si', 'Ca', 'Ti', 'Mn', 'Fe', 'V', 'Cl', 'K', - 'Sr', 'Ba', 'Bi', 'Pd', 'Sn', 'Cr', 'W', 'Cu', 'Zn', - 'As', 'Co', 'Se', 'Br', 'Cd', 'Sb', 'In', 'Pb', 'Ni'] +def metal_heatmaps(df, + process=True, + major_freq='24h', + minor_freq='12h', + cmap='jet', + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + if process: + df = process_data(df) - df = df[items] + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + sns.heatmap(df.T, vmin=None, vmax=3, cmap=cmap, xticklabels=False, yticklabels=True, + cbar_kws={'label': 'Z score', "pad": 0.02}) + ax.grid(color='gray', linestyle='-', linewidth=0.3) - sns.heatmap(df.T, vmax=3, cmap="jet", xticklabels=False, yticklabels=True, - cbar_kws={'label': 'Z score'}) - ax.grid(color='gray', linestyle='-', linewidth=0.3) - # Set x-tick positions and labels - major_tick = date_range(start=df.index[0], end=df.index[-1], freq=major_freq) - minor_tick = date_range(start=df.index[0], end=df.index[-1], freq=minor_freq) + # Set x-tick positions and labels + major_tick = date_range(start=df.index[0], end=df.index[-1], freq=major_freq) + minor_tick = date_range(start=df.index[0], end=df.index[-1], freq=minor_freq) - # Set the major and minor ticks - ax.set_xticks(ticks=[df.index.get_loc(t) for t in major_tick]) - ax.set_xticks(ticks=[df.index.get_loc(t) for t in minor_tick], minor=True) - ax.set_xticklabels(major_tick.strftime('%F')) - ax.tick_params(axis='y', rotation=0) + # Set the major and minor ticks + ax.set_xticks(ticks=[df.index.get_loc(t) for t in major_tick]) + ax.set_xticks(ticks=[df.index.get_loc(t) for t in minor_tick], minor=True) + ax.set_xticklabels(major_tick.strftime('%F')) + ax.tick_params(axis='y', rotation=0) - ax.set_title(f"{title}", fontsize=10) - ax.set(xlabel='', - ylabel='', - ) + ax.set(xlabel='', + ylabel='', + title=kwargs.get('title', None) + ) - plt.show() + plt.show() - return fig, ax + return fig, ax diff --git a/AeroViz/plot/templates/regression.py b/AeroViz/plot/templates/regression.py deleted file mode 100644 index b4ca1b9..0000000 --- a/AeroViz/plot/templates/regression.py +++ /dev/null @@ -1,256 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from matplotlib.pyplot import Figure, Axes -from sklearn.linear_model import LinearRegression -from tabulate import tabulate - -from AeroViz.plot.utils import * - -__all__ = [ - 'linear_regression', - 'multiple_linear_regression', -] - - -def _linear_regression(x_array: np.ndarray, - y_array: np.ndarray, - columns: str | list[str] | None = None, - positive: bool = True, - fit_intercept: bool = True): - if len(x_array.shape) > 1 and x_array.shape[1] >= 2: - model = LinearRegression(positive=positive, fit_intercept=fit_intercept).fit(x_array, y_array) - - coefficients = model.coef_[0].round(3) - intercept = model.intercept_[0].round(3) if fit_intercept else 'None' - r_square = model.score(x_array, y_array).round(3) - y_predict = model.predict(x_array) - - equation = ' + '.join([f'{coeff:.3f} * {col}' for coeff, col in zip(coefficients, columns)]) - equation = equation.replace(' + 0.000 * Const', '') # Remove terms with coefficient 0 - - text = 'y = ' + str(equation) + '\n' + r'$\bf R^2 = $' + str(r_square) - tab = tabulate([[*coefficients, intercept, r_square]], headers=[*columns, 'intercept', 'R^2'], floatfmt=".3f", - tablefmt="fancy_grid") - print('\n' + tab) - - return text, y_predict, coefficients - - else: - x_array = x_array.reshape(-1, 1) - y_array = y_array.reshape(-1, 1) - - model = LinearRegression(positive=positive, fit_intercept=fit_intercept).fit(x_array, y_array) - - slope = model.coef_[0][0].round(3) - intercept = model.intercept_[0].round(3) if fit_intercept else 'None' - r_square = model.score(x_array, y_array).round(3) - y_predict = model.predict(x_array) - - text = np.poly1d([slope, intercept]) - text = 'y = ' + str(text).replace('\n', "") + '\n' + r'$\bf R^2 = $' + str(r_square) - - tab = tabulate([[slope, intercept, r_square]], headers=['slope', 'intercept', 'R^2'], floatfmt=".3f", - tablefmt="fancy_grid") - print('\n' + tab) - - return text, y_predict, slope - - -@set_figure -def linear_regression(df: pd.DataFrame, - x: str | list[str], - y: str | list[str], - labels: str | list[str] = None, - ax: Axes | None = None, - diagonal=False, - positive: bool = True, - fit_intercept: bool = True, - **kwargs - ) -> tuple[Figure, Axes]: - """ - Create a scatter plot with multiple regression lines for the given data. - - Parameters - ---------- - df : DataFrame - Input DataFrame containing the data. - - x : str or list of str - Column name(s) for the x-axis variable(s). - - y : str or list of str - Column name(s) for the y-axis variable(s). - - labels : str or list of str, optional - Labels for the y-axis variable(s). If None, column names are used as labels. Default is None. - - ax : AxesSubplot, optional - Matplotlib AxesSubplot to use for the plot. If None, a new subplot is created. Default is None. - - diagonal : bool, optional - If True, a diagonal line (1:1 line) is added to the plot. Default is False. - - positive : bool, optional - Whether to let coefficient positive. Default is True. - - fit_intercept: bool, optional - Whether to fit intercept. Default is True. - - **kwargs - Additional keyword arguments to customize the plot. - - Returns - ------- - AxesSubplot - Matplotlib AxesSubplot containing the scatter plot. - - Notes - ----- - - The function creates a scatter plot with the option to include multiple regression lines. - - If regression is True, regression lines are fitted for each y variable. - - Additional customization can be done using the **kwargs. - - Example - ------- - >>> linear_regression(df, x='X', y=['Y1', 'Y2'], labels=['Label1', 'Label2'], - ... regression=True, diagonal=True, xlim=(0, 10), ylim=(0, 20), - ... xlabel="X-axis", ylabel="Y-axis", title="Scatter Plot with Regressions") - """ - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - - if not isinstance(x, str): - x = x[0] - - if not isinstance(y, list): - y = [y] - - if labels is None: - labels = y - - df = df.dropna(subset=[x, *y]) - x_array = df[[x]].to_numpy() - - color_cycle = Color.linecolor - - handles, text_list = [], [] - - for i, y_var in enumerate(y): - y_array = df[[y_var]].to_numpy() - - color = color_cycle[i % len(color_cycle)] - - scatter = ax.scatter(x_array, y_array, s=25, color=color['face'], edgecolors=color['edge'], alpha=0.8, - label=labels[i]) - handles.append(scatter) - - text, y_predict, slope = _linear_regression(x_array, y_array, - columns=labels[i], - positive=positive, - fit_intercept=fit_intercept) - - text_list.append(f'{labels[i]}: {text}') - plt.plot(x_array, y_predict, linewidth=3, color=color['line'], alpha=1, zorder=3) - - ax.set(xlim=kwargs.get('xlim'), ylim=kwargs.get('ylim'), xlabel=Unit(x), ylabel=Unit(y[0]), - title=kwargs.get('title')) - - # Add regression info to the legend - leg = plt.legend(handles=handles, labels=text_list, loc='upper left', prop={'weight': 'bold', 'size': 10}) - - for text, color in zip(leg.get_texts(), [color['line'] for color in color_cycle]): - text.set_color(color) - - if diagonal: - ax.axline((0, 0), slope=1., color='k', lw=2, ls='--', alpha=0.5, label='1:1') - plt.text(0.97, 0.97, r'$\bf 1:1\ Line$', color='k', ha='right', va='top', transform=ax.transAxes) - - plt.show() - - return fig, ax - - -@set_figure -def multiple_linear_regression(df: pd.DataFrame, - x: str | list[str], - y: str | list[str], - labels: str | list[str] = None, - ax: Axes | None = None, - diagonal=False, - positive: bool = True, - fit_intercept: bool = True, - **kwargs - ) -> tuple[Figure, Axes]: - """ - Perform multiple linear regression analysis and plot the results. - - Parameters - ---------- - df : pandas.DataFrame - Input DataFrame containing the data. - - x : str or list of str - Column name(s) for the independent variable(s). Can be a single string or a list of strings. - - y : str or list of str - Column name(s) for the dependent variable(s). Can be a single string or a list of strings. - - labels : str or list of str, optional - Labels for the dependent variable(s). If None, column names are used as labels. Default is None. - - ax : matplotlib.axes.Axes or None, optional - Matplotlib Axes object to use for the plot. If None, a new subplot is created. Default is None. - - diagonal : bool, optional - Whether to include a diagonal line (1:1 line) in the plot. Default is False. - - positive : bool, optional - Whether to let coefficient positive. Default is True. - - fit_intercept: bool, optional - Whether to fit intercept. Default is True. - - **kwargs - Additional keyword arguments to customize the plot. - - Returns - ------- - matplotlib.axes.Axes - Matplotlib Axes object containing the regression plot. - - Notes - ----- - This function performs multiple linear regression analysis using the input DataFrame. - It supports multiple independent variables and can plot the regression results. - - Example - ------- - >>> multiple_linear_regression(df, x=['X1', 'X2'], y='Y', labels=['Y1', 'Y2'], - ... diagonal=True, add_constant=True, - ... xlabel="X-axis", ylabel="Y-axis", title="Multiple Linear Regression Plot") - """ - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - - if not isinstance(x, list): - x = [x] - - if not isinstance(y, str): - y = y[0] - - if labels is None: - labels = x - - df = df[[*x, y]].dropna() - x_array = df[[*x]].to_numpy() - y_array = df[[y]].to_numpy() - - text, y_predict, coefficients = _linear_regression(x_array, y_array, - columns=labels, - positive=positive, - fit_intercept=fit_intercept) - - df = pd.DataFrame(np.concatenate([y_array, y_predict], axis=1), columns=['y_actual', 'y_predict']) - - linear_regression(df, x='y_actual', y='y_predict', ax=ax, regression=True, diagonal=diagonal) - - return fig, ax diff --git a/AeroViz/plot/templates/scatter.py b/AeroViz/plot/templates/scatter.py deleted file mode 100644 index 0bc9254..0000000 --- a/AeroViz/plot/templates/scatter.py +++ /dev/null @@ -1,130 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import seaborn as sns -from matplotlib.colors import Normalize -from matplotlib.pyplot import Figure, Axes -from matplotlib.ticker import ScalarFormatter - -from AeroViz.plot.templates.regression import _linear_regression -from AeroViz.plot.utils import * - -__all__ = ['scatter'] - - -@set_figure -def scatter(df: pd.DataFrame, - x: str, - y: str, - c: str | None = None, - s: str | None = None, - cmap='jet', - regression=False, - diagonal=False, - box=False, - ax: Axes | None = None, - **kwargs) -> tuple[Figure, Axes]: - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - - if c is not None and s is not None: - df_ = df.dropna(subset=[x, y, c, s]).copy() - x_data, y_data, c_data, s_data = df_[x].to_numpy(), df_[y].to_numpy(), df_[c].to_numpy(), df_[s].to_numpy() - - scatter = ax.scatter(x_data, y_data, c=c_data, - norm=Normalize(vmin=np.percentile(c_data, 10), vmax=np.percentile(c_data, 90)), - cmap=cmap, s=50 * (s_data / s_data.max()) ** 1.5, alpha=0.7, edgecolors=None) - colorbar = True - - dot = np.linspace(s_data.min(), s_data.max(), 6).round(-1) - - for dott in dot[1:-1]: - plt.scatter([], [], c='k', alpha=0.8, s=50 * (dott / s_data.max()) ** 1.5, label='{:.0f}'.format(dott)) - - plt.legend(title=Unit(s)) - - elif c is not None: - df_ = df.dropna(subset=[x, y, c]).copy() - x_data, y_data, c_data = df_[x].to_numpy(), df_[y].to_numpy(), df_[c].to_numpy() - - scatter = ax.scatter(x_data, y_data, c=c_data, vmin=c_data.min(), vmax=np.percentile(c_data, 90), cmap=cmap, - alpha=0.7, - edgecolors=None) - colorbar = True - - elif s is not None: - df_ = df.dropna(subset=[x, y, s]).copy() - x_data, y_data, s_data = df_[x].to_numpy(), df_[y].to_numpy(), df_[s].to_numpy() - - scatter = ax.scatter(x_data, y_data, s=50 * (s_data / s_data.max()) ** 1.5, color='#7a97c9', alpha=0.7, - edgecolors='white') - colorbar = False - - # dealing - dot = np.linspace(s_data.min(), s_data.max(), 6).round(-1) - - for dott in dot[1:-1]: - plt.scatter([], [], c='k', alpha=0.8, s=50 * (dott / s_data.max()) ** 1.5, label='{:.0f}'.format(dott)) - - plt.legend(title=Unit(s)) - - else: - df_ = df.dropna(subset=[x, y]).copy() - x_data, y_data = df_[x].to_numpy(), df_[y].to_numpy() - - scatter = ax.scatter(x_data, y_data, s=30, color='#7a97c9', alpha=0.7, edgecolors='white') - colorbar = False - - xlim = kwargs.get('xlim', (x_data.min(), x_data.max())) - ylim = kwargs.get('ylim', (y_data.min(), y_data.max())) - xlabel = kwargs.get('xlabel', Unit(x)) - ylabel = kwargs.get('ylabel', Unit(y)) - title = kwargs.get('title', '') - ax.set(xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel, title=title) - - # color_bar - if colorbar: - color_bar = plt.colorbar(scatter, extend='both') - color_bar.set_label(label=Unit(c), size=14) - - if regression: - text, y_predict, slope = _linear_regression(x_data, y_data) - plt.plot(x_data, y_predict, linewidth=3, color=sns.xkcd_rgb["denim blue"], alpha=1, zorder=3) - - plt.text(0.05, 0.95, f'{text}', fontdict={'weight': 'bold'}, color=sns.xkcd_rgb["denim blue"], - ha='left', va='top', transform=ax.transAxes) - - if diagonal: - ax.axline((0, 0), slope=1., color='k', lw=2, ls='--', alpha=0.5, label='1:1') - plt.text(0.91, 0.97, r'$\bf 1:1\ Line$', color='k', ha='right', va='top', transform=ax.transAxes) - - if box: - bins = np.linspace(x_data.min(), x_data.max(), 11, endpoint=True) - wid = (bins + (bins[1] - bins[0]) / 2)[0:-1] - - df[x + '_bin'] = pd.cut(x=x_data, bins=bins, labels=wid) - - group = x + '_bin' - column = y - grouped = df.groupby(group, observed=False) - - names, vals = [], [] - - for i, (name, subdf) in enumerate(grouped): - names.append('{:.0f}'.format(name)) - vals.append(subdf[column].dropna().values) - - plt.boxplot(vals, labels=names, positions=wid, widths=(bins[1] - bins[0]) / 3, - showfliers=False, showmeans=True, meanline=True, patch_artist=True, - boxprops=dict(facecolor='#f2c872', alpha=.7), - meanprops=dict(color='#000000', ls='none'), - medianprops=dict(ls='-', color='#000000')) - - plt.xlim(x_data.min(), x_data.max()) - ax.set_xticks(bins, labels=bins.astype(int)) - - ax.xaxis.set_major_formatter(ScalarFormatter()) - ax.yaxis.set_major_formatter(ScalarFormatter()) - - plt.show() - - return fig, ax diff --git a/AeroViz/plot/templates/templates.py b/AeroViz/plot/templates/templates.py deleted file mode 100644 index e0b536d..0000000 --- a/AeroViz/plot/templates/templates.py +++ /dev/null @@ -1,398 +0,0 @@ -from typing import Literal - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import seaborn as sns -from matplotlib.pyplot import Figure, Axes -from pandas import DataFrame - -from AeroViz.plot.utils import * - -__all__ = [ - 'pie', - 'donuts', - 'violin', - 'bar', -] - - -def _auto_label_pct(pct, - symbol: bool = True, - include_pct: bool = False, - ignore: Literal["inner", "outer"] = 'inner', - value: float = 2): - if not symbol: - return '' - cond = pct <= value if ignore == 'inner' else pct > value - label = '' if cond else '{:.1f}'.format(pct) - return '' if label == '' else label + '%' if include_pct else label - - -@set_figure(fs=8, fw='bold') -def pie(data_set: DataFrame | dict, - labels: list[str], - unit: str, - style: Literal["pie", 'donut'], - ax: Axes | None = None, - symbol: bool = True, - **kwargs) -> tuple[Figure, Axes]: - """ - Create a pie or donut chart based on the provided data. - - Parameters - ---------- - data_set : pd.DataFrame | dict - A pandas DataFrame or dictionary mapping category names to a list of species. - If a DataFrame is provided, the index represents the categories, and each column contains species data. - If a dictionary is provided, it maps category names to lists of species data. - It is assumed that all lists or DataFrame columns contain the same number of entries as the *labels* list. - labels : list of str - The labels for each category. - unit : str - The unit to display in the center of the donut chart. - style : Literal["pie", 'donut'] - The style of the chart, either 'pie' for a standard pie chart or 'donut' for a donut chart. - ax : plt.Axes or None, optional - The Axes object to plot the chart onto. If None, a new figure and Axes will be created. - symbol : bool, optional - Whether to display values for each species in the chart. - **kwargs - Additional keyword arguments to be passed to the plotting function. - - Returns - ------- - matplotlib.axes.Axes - The Axes object containing the violin plot. - - Notes - ----- - - If *data_set* is a dictionary, it should contain lists of species that correspond to each category in *labels*. - - The length of each list in *data_set* or the number of columns in the DataFrame should match the length of the *labels* list. - - Examples - -------- - >>> data_set = {'Category 1': [10, 20, 30], 'Category 2': [15, 25, 35]} - >>> labels = ['Species 1', 'Species 2', 'Species 3'] - >>> pie(data_set, labels, unit='kg', style='pie', symbol=True) - """ - if isinstance(data_set, DataFrame): - category_names = list(data_set.index) - data = data_set.to_numpy() - - pies, species = data.shape - - elif isinstance(data_set, dict): - category_names = list(data_set.keys()) - data = np.array(list(data_set.values())) - - pies, species = data.shape - - else: - raise ValueError('data_set must be a DataFrame or a dictionary.') - - colors = kwargs.get('colors') or (Color.colors1 if species == 6 else Color.getColor(num=species)) - - radius = 4 - width = 4 if style == 'pie' else 1 - - text = [''] * pies if style == 'pie' else [Unit(unit) + '\n\n' + '{:.2f}'.format(x) for x in data.sum(axis=1)] - pct_distance = 0.6 if style == 'pie' else 0.88 - - fig, ax = plt.subplots(1, pies, figsize=((pies * 2) + 1, 2)) if ax is None else (ax.get_figure(), ax) - - if pies == 1: - ax = [ax] - - for i in range(pies): - ax[i].pie(data[i], labels=None, colors=colors, textprops=None, - autopct=lambda pct: _auto_label_pct(pct, symbol=symbol, include_pct=True), - pctdistance=pct_distance, radius=radius, wedgeprops=dict(width=width, edgecolor='w')) - - ax[i].pie(data[i], labels=None, colors=colors, textprops=None, - autopct=lambda pct: _auto_label_pct(pct, symbol=symbol, ignore='outer', include_pct=True), - pctdistance=1.3, radius=radius, wedgeprops=dict(width=width, edgecolor='w')) - ax[i].axis('equal') - ax[i].text(0, 0, text[i], ha='center', va='center') - ax[i].set_title(category_names[i]) - - ax[-1].legend(labels, loc='center left', prop={'weight': 'bold'}, bbox_to_anchor=(1, 0, 1.15, 1)) - - # fig.savefig(f"pie_{style}_{title}") - - plt.show() - - return fig, ax - - -@set_figure(fs=8, fw='bold') -def donuts(data_set: DataFrame | dict, - labels: list[str], - unit: str, - ax: Axes | None = None, - symbol=True, - **kwargs) -> tuple[Figure, Axes]: - """ - Plot a donut chart based on the data set. - - Parameters - ---------- - data_set : pd.DataFrame | dict - A pandas DataFrame or a dictionary mapping category names to a list of species. - If a DataFrame is provided, the index represents the categories, and each column contains species data. - If a dictionary is provided, it maps category names to lists of species data. - It is assumed that all lists or DataFrame columns contain the same number of entries as the *labels* list. - labels : list of str - The category labels. - unit : str - The unit to be displayed in the center of the donut chart. - ax : matplotlib.axes.Axes, optional - The axes to plot on. If None, the current axes will be used (default). - symbol : bool, optional - Whether to display values for each species (default is True). - **kwargs : dict, optional - Additional keyword arguments to pass to the matplotlib pie chart function. - - Returns - ------- - matplotlib.axes.Axes - The axes containing the donut chart. - """ - - if isinstance(data_set, DataFrame): - category_names = list(data_set.index) - data = data_set.to_numpy() - - pies, species = data.shape - - elif isinstance(data_set, dict): - category_names = list(data_set.keys()) - data = np.array(list(data_set.values())) - - pies, species = data.shape - - else: - raise ValueError('data_set must be a DataFrame or a dictionary.') - - colors1 = kwargs.get('colors') or (Color.colors1 if species == 6 else Color.getColor(num=species)) - colors2 = Color.adjust_opacity(colors1, 0.8) - colors3 = Color.adjust_opacity(colors1, 0.6) - - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - - ax.pie(data[2], labels=None, colors=colors1, textprops=None, - autopct=lambda pct: _auto_label_pct(pct, symbol=symbol, include_pct=True), - pctdistance=0.9, radius=14, wedgeprops=dict(width=3, edgecolor='w')) - - ax.pie(data[1], labels=None, colors=colors2, textprops=None, - autopct=lambda pct: _auto_label_pct(pct, symbol=symbol, include_pct=True), - pctdistance=0.85, radius=11, wedgeprops=dict(width=3, edgecolor='w')) - - ax.pie(data[0], labels=None, colors=colors3, textprops=None, - autopct=lambda pct: _auto_label_pct(pct, symbol=symbol, include_pct=True), - pctdistance=0.80, radius=8, wedgeprops=dict(width=3, edgecolor='w')) - - text = (Unit(f'{unit}') + '\n\n' + - 'Event : ' + "{:.2f}".format(np.sum(data[2])) + '\n' + - 'Transition : ' + "{:.2f}".format(np.sum(data[1])) + '\n' + - 'Clean : ' + "{:.2f}".format(np.sum(data[0]))) - - ax.text(0, 0, text, ha='center', va='center') - ax.axis('equal') - - ax.set_title(kwargs.get('title', '')) - - ax.legend(labels, loc='center', prop={'weight': 'bold'}, title_fontproperties={'weight': 'bold'}, - title=f'Outer : {category_names[2]}' + '\n' + f'Middle : {category_names[1]}' + '\n' + f'Inner : {category_names[0]}', - bbox_to_anchor=(0.8, 0, 0.5, 1)) - - # fig.savefig(f"donuts_{title}") - - plt.show() - - return fig, ax - - -@set_figure(figsize=(5, 4)) -def bar(data_set: DataFrame | dict, - data_std: DataFrame | None, - labels: list[str], - unit: str, - style: Literal["stacked", "dispersed"] = "dispersed", - orientation: Literal["va", "ha"] = 'va', - ax: Axes | None = None, - symbol=True, - **kwargs - ) -> tuple[Figure, Axes]: - """ - Parameters - ---------- - data_set : pd.DataFrame or dict - A mapping from category names to a list of species mean or a DataFrame with columns as categories and values as means. - data_std : pd.DataFrame or None - A DataFrame with standard deviations corresponding to data_set, or None if standard deviations are not provided. - labels : list of str - The species names. - unit : str - The unit for the values. - style : {'stacked', 'dispersed'}, default 'dispersed' - Whether to display the bars stacked or dispersed. - orientation : {'va', 'ha'}, default 'va' - The orientation of the bars, 'va' for vertical and 'ha' for horizontal. - ax : plt.Axes or None, default None - The Axes object to plot on. If None, a new figure and Axes are created. - symbol : bool, default True - Whether to display values for each bar. - kwargs : dict - Additional keyword arguments passed to the barplot function. - - Returns - ------- - matplotlib.Axes - The Axes object containing the plot. - - """ - # data process - data = data_set.values - - if data_std is None: - data_std = np.zeros(data.shape) - else: - data_std = data_std.values - - groups, species = data.shape - groups_arr = np.arange(groups) - species_arr = np.arange(species) - - total = np.array([data.sum(axis=1), ] * species).T - - pct_data = data / total * 100 - data_cum = pct_data.cumsum(axis=1) - - # figure info - category_names = kwargs.get('ticks') or list(data_set.index) - title = kwargs.get('title', '') - colors = kwargs.get('colors') or (Color.colors1 if species == 6 else Color.getColor(num=species)) - - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - - if style == "stacked": - for i in range(species): - widths = pct_data[:, i] - starts = data_cum[:, i] - pct_data[:, i] - - if orientation == 'va': - _ = ax.bar(groups_arr, widths, bottom=starts, width=0.7, color=colors[i], label=labels[i], - edgecolor=None, capsize=None) - if orientation == 'ha': - _ = ax.barh(groups_arr, widths, left=starts, height=0.7, color=colors[i], label=labels[i], - edgecolor=None, capsize=None) - if symbol: - ax.bar_label(_, fmt=_auto_label_pct, label_type='center', padding=0, fontsize=10, weight='bold') - - if style == "dispersed": - width = 0.1 - block = width / 4 - - for i in range(species): - val = data[:, i] - std = (0,) * groups, data_std[:, i] - if orientation == 'va': - _ = ax.bar(groups_arr + (i + 1) * (width + block), val, yerr=std, width=width, color=colors[i], - edgecolor=None, capsize=None) - if orientation == 'ha': - _ = ax.barh(groups_arr + (i + 1) * (width + block), val, xerr=std, height=width, color=colors[i], - edgecolor=None, capsize=None) - if symbol: - ax.bar_label(_, fmt=_auto_label_pct, label_type='center', padding=0, fontsize=8, weight='bold') - - if orientation == 'va': - xticks = groups_arr + (species / 2 + 0.5) * (width + block) if style == "dispersed" else groups_arr - ax.set_xticks(xticks, category_names, weight='bold') - ax.set_ylabel(Unit(unit) if style == "dispersed" else '$Contribution (\\%)$') - ax.set_ylim(0, None if style == "dispersed" else 100) - ax.legend(labels, bbox_to_anchor=(1, 1), loc='upper left', prop={'size': 12}) - - if orientation == 'ha': - ax.invert_yaxis() - yticks = groups_arr + 3.5 * (width + block) if style == "dispersed" else groups_arr - ax.set_yticks(yticks, category_names, weight='bold') - ax.set_xlabel(Unit(unit) if style == "dispersed" else '$Contribution (\\%)$') - ax.set_xlim(0, None if style == "dispersed" else 100) - ax.legend(labels, bbox_to_anchor=(1, 1), loc='upper left', prop={'size': 12}) - - # fig.savefig(f"Barplot_{title}") - - plt.show() - - return fig, ax - - -@set_figure -def violin(data_set: DataFrame | dict, - unit: str, - ax: Axes | None = None, - **kwargs - ) -> tuple[Figure, Axes]: - """ - Generate a violin plot for multiple data sets. - - Parameters - ---------- - data_set : pd.DataFrame or dict - A mapping from category names to pandas DataFrames containing the data. - unit : str - The unit for the data being plotted. - ax : matplotlib.axes.Axes, optional - The Axes object to draw the plot onto. If not provided, a new figure will be created. - **kwargs : dict - Additional keyword arguments to be passed to the violinplot function. - - Returns - ------- - matplotlib.axes.Axes - The Axes object containing the violin plot. - - """ - fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) - - data = data_set.to_numpy() - - data = data[~np.isnan(data).any(axis=1)] - - grps = data.shape[1] - - width = 0.6 - block = width / 2 - x_position = np.arange(grps) - - plt.boxplot(data, positions=x_position, widths=0.15, - showfliers=False, showmeans=True, meanline=False, patch_artist=True, - capprops=dict(linewidth=0), - whiskerprops=dict(linewidth=1.5, color='k', alpha=1), - boxprops=dict(linewidth=1.5, color='k', facecolor='#4778D3', alpha=1), - meanprops=dict(marker='o', markeredgecolor='black', markerfacecolor='white', markersize=6), - medianprops=dict(linewidth=1.5, ls='-', color='k', alpha=1)) - - sns.violinplot(data=data, density_norm='area', color='#4778D3', inner=None) - - for violin, alpha in zip(ax.collections[:], [0.5] * len(ax.collections[:])): - violin.set_alpha(alpha) - violin.set_edgecolor(None) - - plt.scatter(x_position, data.mean(), marker='o', facecolor='white', edgecolor='k', s=10) - - xlim = kwargs.get('xlim') or (x_position[0] - (width / 2 + block), x_position[-1] + (width / 2 + block)) - ylim = kwargs.get('ylim') or (0, None) - xlabel = kwargs.get('xlabel') or '' - ylabel = kwargs.get('ylabel') or Unit(unit) - xticks = kwargs.get('xticks') or [x.replace('-', '\n') for x in list(data_set.keys())] - - ax.set(xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel, title=kwargs.get('title')) - ax.set_xticks(x_position, xticks, fontweight='bold', fontsize=12) - - # fig.savefig(f'Violin_{unit}') - - plt.show() - - return fig, ax diff --git a/AeroViz/plot/timeseries/__init__.py b/AeroViz/plot/timeseries/__init__.py index d4ff955..49bd5e5 100644 --- a/AeroViz/plot/timeseries/__init__.py +++ b/AeroViz/plot/timeseries/__init__.py @@ -1 +1,2 @@ +from .template import * from .timeseries import * diff --git a/AeroViz/plot/timeseries/template.py b/AeroViz/plot/timeseries/template.py new file mode 100644 index 0000000..8357c8c --- /dev/null +++ b/AeroViz/plot/timeseries/template.py @@ -0,0 +1,47 @@ +import matplotlib.pyplot as plt +from matplotlib.pyplot import Figure, Axes +from pandas import DataFrame + +from AeroViz.plot.timeseries.timeseries import timeseries + + +def timeseries_template(df: DataFrame) -> tuple[Figure, Axes]: + fig, ax = plt.subplots(5, 1, figsize=(len(df.index) * 0.01, 4)) + (ax1, ax2, ax3, ax4, ax5) = ax + + timeseries(df, + y=['Extinction', 'Scattering', 'Absorption'], + rolling=30, + ax=ax1, + ylabel='Coefficient', + ylim=[0., None], + set_xaxis_visible=False, + legend_ncol=3, + ) + + # Temp, RH + timeseries(df, + y='AT', + y2='RH', + rolling=30, + ax=ax2, + ax_plot_kws=dict(color='r'), + ax2_plot_kws=dict(color='b'), + ylim=[10, 30], + ylim2=[20, 100], + set_xaxis_visible=False, + legend_ncol=2, + ) + + timeseries(df, y='WS', color='WD', style='scatter', ax=ax3, scatter_kws=dict(cmap='hsv'), + cbar_kws=dict(ticks=[0, 90, 180, 270, 360]), + ylim=[0, None], set_xaxis_visible=False) + + timeseries(df, y='VC', color='PBLH', style='bar', ax=ax4, bar_kws=dict(cmap='Blues'), set_xaxis_visible=False, + ylim=[0, 5000]) + + timeseries(df, y='PM2.5', color='PM1/PM25', style='scatter', ax=ax5, ylim=[0, None]) + + plt.show() + + return fig, ax diff --git a/AeroViz/plot/timeseries/timeseries.py b/AeroViz/plot/timeseries/timeseries.py index b0a6c97..b8f2e0e 100644 --- a/AeroViz/plot/timeseries/timeseries.py +++ b/AeroViz/plot/timeseries/timeseries.py @@ -1,38 +1,40 @@ -from datetime import datetime from typing import Literal import matplotlib.pyplot as plt +import numpy as np from matplotlib.cm import ScalarMappable from matplotlib.pyplot import Figure, Axes +from mpl_toolkits.axes_grid1 import make_axes_locatable from mpl_toolkits.axes_grid1.inset_locator import inset_axes -from pandas import DataFrame, date_range, Timestamp +from pandas import DataFrame, date_range, Timedelta from AeroViz.plot.utils import * -__all__ = ['timeseries', 'timeseries_template'] +__all__ = ['timeseries', 'timeseries_stacked'] + default_bar_kws = dict( - width=0.0417, - edgecolor=None, - linewidth=0, - cmap='jet', + width=0.0417, + edgecolor=None, + linewidth=0, + cmap='jet', ) default_scatter_kws = dict( - marker='o', - s=5, - edgecolor=None, - linewidths=0.3, - alpha=0.9, - cmap='jet', + marker='o', + s=5, + edgecolor=None, + linewidths=0.3, + alpha=0.9, + cmap='jet', ) default_insert_kws = dict( - width="1.5%", - height="100%", - loc='lower left', - bbox_to_anchor=(1.01, 0, 1.2, 1), - borderpad=0 + width="1.5%", + height="100%", + loc='lower left', + bbox_to_anchor=(1.01, 0, 1.2, 1), + borderpad=0 ) default_plot_kws = dict() @@ -41,262 +43,360 @@ def _scatter(ax, df, _y, _c, scatter_kws, cbar_kws, inset_kws): - if _c is None or _c not in df.columns: - scatter_kws.pop('cmap') - ax.scatter(df.index, df[_y], **scatter_kws) - else: - ax.scatter(df.index, df[_y], c=df[_c], **scatter_kws) - cax = inset_axes(ax, **inset_kws) + if _c is None or _c not in df.columns: + scatter_kws.pop('cmap') + ax.scatter(df.index, df[_y], **scatter_kws) + else: + ax.scatter(df.index, df[_y], c=df[_c], **scatter_kws) + cax = inset_axes(ax, **inset_kws) - # Filter the children to find ScalarMappable objects - mappable_objects = [child for child in ax.get_children() if isinstance(child, ScalarMappable)] + # Filter the children to find ScalarMappable objects + mappable_objects = [child for child in ax.get_children() if isinstance(child, ScalarMappable)] - # Use the first mappable object for the colorbar - if mappable_objects: - plt.colorbar(mappable=mappable_objects[0], cax=cax, **cbar_kws) - else: - print("No mappable objects found.") + # Use the first mappable object for the colorbar + if mappable_objects: + plt.colorbar(mappable=mappable_objects[0], cax=cax, **cbar_kws) + else: + print("No mappable objects found.") def _bar(ax, df, _y, _c, bar_kws, cbar_kws, inset_kws): - scalar_map, colors = Color.color_maker(df[_c].values, cmap=bar_kws.pop('cmap')) - ax.bar(df.index, df[_y], color=scalar_map.to_rgba(colors), **bar_kws) - cax = inset_axes(ax, **inset_kws) - plt.colorbar(mappable=scalar_map, cax=cax, **cbar_kws) + scalar_map, colors = Color.color_maker(df[_c].values, cmap=bar_kws.pop('cmap')) + ax.bar(df.index, df[_y], color=scalar_map.to_rgba(colors), **bar_kws) + cax = inset_axes(ax, **inset_kws) + plt.colorbar(mappable=scalar_map, cax=cax, **cbar_kws) def _plot(ax, df, _y, _color, plot_kws): - ax.plot(df.index, df[_y], color=_color, **plot_kws) + ax.plot(df.index, df[_y], color=_color, **plot_kws) + + +def _wind_arrow(ax, df, y, c, scatter_kws, cbar_kws, inset_kws): + """ + Plot wind arrows on a scatter plot. + + :param ax: matplotlib axes + :param df: pandas DataFrame + :param y: column name for wind speed + :param c: column name for wind direction + :param scatter_kws: keyword arguments for scatter plot + :param cbar_kws: keyword arguments for colorbar + :param inset_kws: keyword arguments for inset axes + """ + # First, create a scatter plot + sc = ax.scatter(df.index, df[y], c=df[c], **scatter_kws) + + # Add colorbar + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="2%", pad=0.05) + plt.colorbar(sc, cax=cax, **cbar_kws) + # Add wind arrows + for idx, row in df.iterrows(): + wind_speed = row[y] + wind_dir = np.radians(row[c]) + dx = np.sin(wind_dir) * wind_speed / 20 # Scale factor can be adjusted + dy = np.cos(wind_dir) * wind_speed / 20 + ax.annotate('', xy=(idx + 10 * dx * Timedelta(hours=5), wind_speed + 4 * dy), + xytext=(idx - 10 * dx * Timedelta(hours=5), wind_speed - 4 * dy), + arrowprops=dict(arrowstyle='->', color='k', linewidth=0.5)) -def combine_legends(axes_list: list[Axes]) -> tuple[list, list]: - return ( - [legend for axes in axes_list for legend in axes.get_legend_handles_labels()[0]], - [label for axes in axes_list for label in axes.get_legend_handles_labels()[1]] - ) + # Set the x-axis limit to show all data points + # ax.set_xlim(df.index.min() - datetime.timedelta(days=1), df.index.max()) -@set_figure(fs=8, autolayout=False) +def process_timeseries_data(df, rolling=None, interpolate_limit=None): + # apply rolling window if specified + df = df.rolling(window=rolling, min_periods=1).mean(numeric_only=True) if rolling is not None else df + + # apply interpolation if specified + df = df.interpolate(method='time', limit=interpolate_limit) if interpolate_limit is not None else df + return df + + +@set_figure(autolayout=False) def timeseries(df: DataFrame, - y: list[str] | str, - y2: list[str] | str = None, - c: list[str] | str = None, - # color: list[str] | str = None, - rolling: str | int | None = None, - times: list[datetime | Timestamp | str] = None, - freq: str = '1MS', - style: list[Literal['scatter', 'bar', 'line']] | str | None = None, - ax: Axes | None = None, - set_xaxis_visible: bool | None = None, - legend_loc: Literal['best', 'upper right', 'upper left', 'lower left', 'lower right'] = 'best', - legend_ncol: int = 1, - **kwargs - ) -> tuple[Figure, Axes]: - """ - Plot the timeseries data with the option of scatterplot, barplot, and lineplot. - - Parameters - ----------- - df : DataFrame - The data to plot. - y : list[str] | str - The primary y-axis data columns. - y2 : list[str] | str, optional - The secondary y-axis data columns. Defaults to None. - c : str, optional - The column for color mapping or the color. Defaults to None. - rolling : str | int | None, optional - Rolling window size for smoothing. Defaults to None. - times : tuple[datetime, datetime] | tuple[Timestamp, Timestamp], optional - Time range for the data. Defaults to None. - freq : str, optional - Frequency for x-axis ticks. Defaults to '2MS'. - style : Literal['scatter', 'bar', 'line'] | None, optional - Style of the plot. Defaults to 'scatter'. - ax : Axes | None, optional - Matplotlib Axes object to plot on. Defaults to None. - set_xaxis_visible : bool | None, optional - Whether to set x-axis visibility. Defaults to None. - legend_loc : Literal['best', 'upper right', 'upper left', 'lower left', 'lower right'], optional - Location of the legend. Defaults to 'best'. - legend_ncol : int, optional - Number of columns in the legend. Defaults to 1. - **kwargs : Additional keyword arguments for customization. - fig_kws : dict, optional - Additional keyword arguments for the figure. Defaults to {}. - scatter_kws : dict, optional - Additional keyword arguments for the scatter plot. Defaults to {}. - bar_kws : dict, optional - Additional keyword arguments for the bar plot. Defaults to {}. - ax_plot_kws : dict, optional - Additional keyword arguments for the primary y-axis plot. Defaults to {}. - ax2_plot_kws : dict, optional - Additional keyword arguments for the secondary y-axis plot. Defaults to {}. - cbar_kws : dict, optional - Additional keyword arguments for the colorbar. Defaults to {}. - inset_kws : dict, optional - Additional keyword arguments for the inset axes. Defaults to {}. - - Returns - ------- - ax : AxesSubplot - Matplotlib AxesSubplot. - - Example - ------- - >>> timeseries(df, y='WS', c='WD', scatter_kws=dict(cmap='hsv'), cbar_kws=dict(ticks=[0, 90, 180, 270, 360]), ylim=[0, None]) - """ - # Set the time - - if times is not None: - st_tm, fn_tm = map(Timestamp, times) - else: - try: - st_tm, fn_tm = df.index[0], df.index[-1] - except IndexError: - raise IndexError("The DataFrame is empty. Please provide a valid DataFrame.") - - # Apply rolling window if specified - df = df.loc[st_tm:fn_tm] if rolling is None else ( - df.loc[st_tm:fn_tm].rolling(window=rolling, min_periods=1).mean(numeric_only=True)) - - # Initialize figure and axis if not provided - fig, ax = plt.subplots(**{**{'figsize': (6, 2)}, **kwargs.get('fig_kws', {})}) if ax is None else ( - ax.get_figure(), ax) - - # Ensure y, y2, c, and style are lists - y = [y] if isinstance(y, str) else y - y2 = [y2] if isinstance(y2, str) else y2 if y2 is not None else [] - c = [c] if isinstance(c, str) else c if c is not None else [None] * (len(y) + len(y2)) - style = [style] if isinstance(style, str) else style if style is not None else ['plot'] * (len(y) + len(y2)) - - if len(c) != len(y) + len(y2): - raise ValueError("The length of c must match the combined length of y and y2") - - if len(style) != len(y) + len(y2): - raise ValueError("The length of style must match the combined length of y and y2") - - # Create a secondary y-axis if y2 is not empty - ax2 = ax.twinx() if y2 else None - - # # Set color cycle - ax.set_prop_cycle(Color.color_cycle) - if y2: - ax2.set_prop_cycle(Color.color_cycle[len(y):]) - - if y2 and ('scatter' or 'bar') in style: - fig.subplots_adjust(right=0.8) - - for i, _c in enumerate(c): - if _c is not None and _c in df.columns: - style[i] = 'scatter' - - for i, (_y, _c, _style) in enumerate(zip(y, c, style)): - scatter_kws = {**default_scatter_kws, **{'label': Unit(_y)}, **kwargs.get('scatter_kws', {})} - bar_kws = {**default_bar_kws, **{'label': Unit(_y)}, **kwargs.get('bar_kws', {})} - plot_kws = {**default_plot_kws, **{'label': Unit(_y)}, **kwargs.get('plot_kws', {})} - - if _style in ['scatter', 'bar']: - cbar_kws = {**default_cbar_kws, **{'label': Unit(_c), 'ticks': None}, **kwargs.get('cbar_kws', {})} - inset_kws = {**default_insert_kws, **{'bbox_transform': ax.transAxes}, **kwargs.get('inset_kws', {})} - - if _style == 'scatter': - _scatter(ax, df, _y, _c, scatter_kws, cbar_kws, inset_kws) - - elif _style == 'bar': - _bar(ax, df, _y, _c, bar_kws, cbar_kws, inset_kws) - - else: - _plot(ax, df, _y, _c, plot_kws) - - if y2: - for i, (_y, _c, _style) in enumerate(zip(y2, c[len(y):], style[len(y):])): - scatter_kws = {**default_scatter_kws, **{'label': Unit(_y)}, **kwargs.get('scatter_kws2', {})} - bar_kws = {**default_bar_kws, **{'label': Unit(_y)}, **kwargs.get('bar_kws2', {})} - plot_kws = {**default_plot_kws, **{'label': Unit(_y)}, **kwargs.get('plot_kws2', {})} - - if _style in ['scatter', 'bar']: - cbar_kws = {**default_cbar_kws, **{'label': Unit(_c), 'ticks': None}, **kwargs.get('cbar_kws2', {})} - inset_kws = {**default_insert_kws, **{'bbox_transform': ax.transAxes}, **kwargs.get('inset_kws2', {})} - - if _style == 'scatter': - _scatter(ax2, df, _y, _c, scatter_kws, cbar_kws, inset_kws) - - elif _style == 'bar': - _bar(ax2, df, _y, _c, bar_kws, cbar_kws, inset_kws) - - else: # line plot - _plot(ax2, df, _y, _c, plot_kws) - - # Combine legends from ax and ax2 - ax.legend(*combine_legends([ax, ax2]), loc=legend_loc, ncol=legend_ncol) - - else: - ax.legend(loc=legend_loc, ncol=legend_ncol) - - if set_xaxis_visible is not None: - ax.axes.xaxis.set_visible(set_xaxis_visible) - - ax.set(xlabel=kwargs.get('xlabel', ''), - ylabel=kwargs.get('ylabel', Unit(y) if isinstance(y, str) else Unit(y[0])), - xticks=kwargs.get('xticks', date_range(start=st_tm, end=fn_tm, freq=freq).strftime("%F")), - yticks=kwargs.get('yticks', ax.get_yticks()), - xticklabels=kwargs.get('xticklabels', date_range(start=st_tm, end=fn_tm, freq=freq).strftime("%F")), - yticklabels=kwargs.get('yticklabels', ax.get_yticklabels()), - xlim=kwargs.get('xlim', (st_tm, fn_tm)), - ylim=kwargs.get('ylim', (None, None)), - title=kwargs.get('title', '') - ) - - if y2: - ax2.set(ylabel=kwargs.get('ylabel2', Unit(y2) if isinstance(y2, str) else Unit(y2[0])), - yticks=kwargs.get('yticks2', ax2.get_yticks()), - yticklabels=kwargs.get('yticklabels2', ax2.get_yticklabels()), - ylim=kwargs.get('ylim2', (None, None))) - - plt.show() - - return fig, ax - - -@set_figure(fs=8, autolayout=False) -def timeseries_template(df: DataFrame) -> tuple[Figure, Axes]: - fig, ax = plt.subplots(5, 1, figsize=(len(df.index) * 0.01, 4)) - (ax1, ax2, ax3, ax4, ax5) = ax - - timeseries(df, - y=['Extinction', 'Scattering', 'Absorption'], - rolling=30, - ax=ax1, - ylabel='Coefficient', - ylim=[0., None], - set_xaxis_visible=False, - legend_ncol=3, - ) - - # Temp, RH - timeseries(df, - y='AT', - y2='RH', - rolling=30, - ax=ax2, - ax_plot_kws=dict(color='r'), - ax2_plot_kws=dict(color='b'), - ylim=[10, 30], - ylim2=[20, 100], - set_xaxis_visible=False, - legend_ncol=2, - ) - - timeseries(df, y='WS', c='WD', style='scatter', ax=ax3, scatter_kws=dict(cmap='hsv'), - cbar_kws=dict(ticks=[0, 90, 180, 270, 360]), - ylim=[0, None], set_xaxis_visible=False) - - timeseries(df, y='VC', c='PBLH', style='bar', ax=ax4, bar_kws=dict(cmap='Blues'), set_xaxis_visible=False, - ylim=[0, 5000]) - - timeseries(df, y='PM25', c='PM1/PM25', style='scatter', ax=ax5, ylim=[0, None]) - - plt.show() - - return fig, ax + y: list[str] | str, + y2: list[str] | str = None, + yi: list[str] | str = None, + color: list[str] | str | None = None, + label: list[str] | str | None = None, + rolling: int | str | None = 3, + interpolate_limit: int | None = 6, + major_freq: str = '1MS', + minor_freq: str = '10d', + style: list[Literal['scatter', 'bar', 'line', 'arrow']] | str | None = None, + ax: Axes | None = None, + set_xaxis_visible: bool | None = None, + legend_loc: Literal['best', 'upper right', 'upper left', 'lower left', 'lower right'] = 'best', + legend_ncol: int = 1, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Plot the timeseries data with the option of scatterplot, barplot, and lineplot. + + Parameters + ----------- + df : DataFrame + The data to plot. + y : list[str] | str + The primary y-axis data columns. + y2 : list[str] | str, optional + The secondary y-axis data columns. Defaults to None. + yi : list[str] | str, optional + The components for percentage calculation. Defaults to None. + color : str, optional + The column for color mapping or the color. Defaults to None. + label : str, optional + The label for the legend. Defaults to None. + rolling : str | int | None, optional + Rolling window size for smoothing. Defaults to None. + interpolate_limit : int, optional + Interpolation limit for missing values. Defaults to None. + major_freq : str, optional + Frequency for x-axis ticks. Defaults to '1MS'. + minor_freq : str, optional + Frequency for x-axis minor ticks. Defaults to '10d'. + style : Literal['scatter', 'bar', 'line'] | None, optional + Style of the plot. Defaults to 'scatter'. + ax : Axes | None, optional + Matplotlib Axes object to plot on. Defaults to None. + set_xaxis_visible : bool | None, optional + Whether to set x-axis visibility. Defaults to None. + legend_loc : Literal['best', 'upper right', 'upper left', 'lower left', 'lower right'], optional + Location of the legend. Defaults to 'best'. + legend_ncol : int, optional + Number of columns in the legend. Defaults to 1. + **kwargs : Additional keyword arguments for customization. + fig_kws : dict, optional + Additional keyword arguments for the figure. Defaults to {}. + scatter_kws : dict, optional + Additional keyword arguments for the scatter plot. Defaults to {}. + bar_kws : dict, optional + Additional keyword arguments for the bar plot. Defaults to {}. + ax_plot_kws : dict, optional + Additional keyword arguments for the primary y-axis plot. Defaults to {}. + ax2_plot_kws : dict, optional + Additional keyword arguments for the secondary y-axis plot. Defaults to {}. + cbar_kws : dict, optional + Additional keyword arguments for the colorbar. Defaults to {}. + inset_kws : dict, optional + Additional keyword arguments for the inset axes. Defaults to {}. + + Returns + ------- + ax : AxesSubplot + Matplotlib AxesSubplot. + + Example + ------- + >>> timeseries(df, y='WS', color='WD', scatter_kws=dict(cmap='hsv'), cbar_kws=dict(ticks=[0, 90, 180, 270, 360]), ylim=[0, None]) + """ + # Set the time + try: + st_tm, fn_tm = df.index[0], df.index[-1] + except IndexError: + raise IndexError("The DataFrame is empty. Please provide a valid DataFrame.") + + # calculate the percentage of each component + if yi is not None: + df_pct = df[yi].div(df[yi].sum(axis=1), axis=0) * 100 + mean = [f"{_label} : {df[comp].mean():.2f}" for _label, comp in zip(label, yi)] + pct = [f"{_label} : {df_pct[comp].mean():.2f}%" for _label, comp in zip(label, yi)] + df_pct = process_timeseries_data(df_pct, rolling, interpolate_limit) + + # process data + df = process_timeseries_data(df, rolling, interpolate_limit) + + # Initialize figure and axis if not provided + fig, ax = plt.subplots(**{**{'figsize': (6, 2)}, **kwargs.get('fig_kws', {})}) if ax is None else ( + ax.get_figure(), ax) + + # Ensure y, y2, c, and style are lists + y = [y] if isinstance(y, str) else y + y2 = [y2] if isinstance(y2, str) else y2 if y2 is not None else [] + color = [color] if isinstance(color, str) else color if color is not None else [None] * (len(y) + len(y2)) + label = [label] if isinstance(label, str) else label if label is not None else [None] * (len(y) + len(y2)) + style = [style] if isinstance(style, str) else style if style is not None else ['plot'] * (len(y) + len(y2)) + + for name, lst in [("c", color), ("style", style), ("label", label)]: + if len(lst) != len(y) + len(y2): + raise ValueError(f"The length of {name} must match the combined length of y and y2") + + # Create a secondary y-axis if y2 is not empty + ax2 = ax.twinx() if y2 else None + + # # Set color cycle + ax.set_prop_cycle(Color.color_cycle) + if y2: + ax2.set_prop_cycle(Color.color_cycle[len(y):]) + + if y2 and ('scatter' or 'bar') in style: + fig.subplots_adjust(right=0.8) + + # for i, _c in enumerate(color): + # if _c is not None and _c in df.columns: + # style[i] = 'scatter' + + for i, (_y, _c, _label, _style) in enumerate(zip(y, color, label, style)): + scatter_kws = {**default_scatter_kws, **{'label': Unit(_y)}, **kwargs.get('scatter_kws', {})} + bar_kws = {**default_bar_kws, **{'label': Unit(_y)}, **kwargs.get('bar_kws', {})} + plot_kws = {**default_plot_kws, **{'label': Unit(_y)}, **kwargs.get('plot_kws', {})} + + if _style in ['scatter', 'bar', 'arrow']: + cbar_kws = {**default_cbar_kws, **{'label': Unit(_c), 'ticks': None}, **kwargs.get('cbar_kws', {})} + inset_kws = {**default_insert_kws, **{'bbox_transform': ax.transAxes}, **kwargs.get('inset_kws', {})} + + if _style == 'scatter': + _scatter(ax, df, _y, _c, scatter_kws, cbar_kws, inset_kws) + + elif _style == 'bar': + _bar(ax, df, _y, _c, bar_kws, cbar_kws, inset_kws) + + elif _style == 'arrow': + _wind_arrow(ax, df, _y, _c, scatter_kws, cbar_kws, inset_kws) + + else: + _plot(ax, df, _y, _c, plot_kws) + + if y2: + for i, (_y, _c, _style) in enumerate(zip(y2, color[len(y):], style[len(y):])): + scatter_kws = {**default_scatter_kws, **{'label': Unit(_y)}, **kwargs.get('scatter_kws2', {})} + bar_kws = {**default_bar_kws, **{'label': Unit(_y)}, **kwargs.get('bar_kws2', {})} + plot_kws = {**default_plot_kws, **{'label': Unit(_y)}, **kwargs.get('plot_kws2', {})} + + if _style in ['scatter', 'bar']: + cbar_kws = {**default_cbar_kws, **{'label': Unit(_c), 'ticks': None}, **kwargs.get('cbar_kws2', {})} + inset_kws = {**default_insert_kws, **{'bbox_transform': ax.transAxes}, **kwargs.get('inset_kws2', {})} + + if _style == 'scatter': + _scatter(ax2, df, _y, _c, scatter_kws, cbar_kws, inset_kws) + + elif _style == 'bar': + _bar(ax2, df, _y, _c, bar_kws, cbar_kws, inset_kws) + + elif _style == 'arrow': + pass + + else: # line plot + _plot(ax2, df, _y, _c, plot_kws) + + # Combine legends from ax and ax2 + ax.legend(*combine_legends([ax, ax2]), loc=legend_loc, ncol=legend_ncol) + + else: + ax.legend(loc=legend_loc, ncol=legend_ncol) + + if set_xaxis_visible is not None: + ax.axes.xaxis.set_visible(set_xaxis_visible) + + ax.set(xlabel=kwargs.get('xlabel', ''), + ylabel=kwargs.get('ylabel', Unit(y) if isinstance(y, str) else Unit(y[0])), + xlim=kwargs.get('xlim', (st_tm, fn_tm)), + ylim=kwargs.get('ylim', (None, None)), + title=kwargs.get('title', '') + ) + + xticks = kwargs.get('xticks', date_range(start=st_tm, end=fn_tm, freq=major_freq)) + minor_xticks = kwargs.get('minor_xticks', date_range(start=st_tm, end=fn_tm, freq=minor_freq)) + + ax.set_xticks(ticks=xticks, labels=xticks.strftime("%F")) + ax.set_xticks(minor_xticks, minor=True) + + if y2: + ax2.set(ylim=kwargs.get('ylim2', (None, None)), + ylabel=kwargs.get('ylabel2', Unit(y2) if isinstance(y2, str) else Unit(y2[0])) + ) + + plt.show() + + return fig, ax + + +@set_figure(autolayout=False) +def timeseries_stacked(df, + y: list[str] | str, + yi: list[str] | str, + label: list[str] | str, + rolling: int | str | None = 6, + interpolate_limit: int | None = 6, + major_freq: str = '1MS', + minor_freq: str = '10d', + ax: Axes | None = None, + legend_ncol: int = 1, + **kwargs + ) -> tuple[Figure, Axes]: + try: + st_tm, fn_tm = df.index[0], df.index[-1] + except IndexError: + raise IndexError("The DataFrame is empty. Please provide a valid DataFrame.") + + # calculate the percentage of each component + df_pct = df[yi].div(df[yi].sum(axis=1), axis=0) * 100 + mean = [f"{_label} : {df[comp].mean():.2f}" for _label, comp in zip(label, yi)] + pct = [f"{_label} : {df_pct[comp].mean():.2f}%" for _label, comp in zip(label, yi)] + + # process data + df = process_timeseries_data(df, rolling, interpolate_limit) + df_pct = process_timeseries_data(df_pct, rolling, interpolate_limit) + + fig, (ax1, ax2) = plt.subplots(2, 1, **{**{'figsize': (12, 3)}, **kwargs.get('fig_kws', {})}) + + width = 0.0417 + color = Color.colors1 + + for name, lst in [("color", color), ("label", label)]: + if len(lst) != len(yi): + raise ValueError(f"The length of {name} must match the combined length of y and y2") + + bottom = None # 初始化堆疊位置 + for i, (_column, _color, _label) in enumerate(zip(yi, color, mean)): + if i == 0: + bottom = df[_column] * 0 # 第一個堆疊底部為零 + ax1.bar(df.index, df[_column], color=_color, width=width, bottom=bottom, label=_label) + bottom += df[_column] # 更新堆疊底部位置 + + ax1.legend(loc='upper left', ncol=legend_ncol, prop={'weight': 'bold'}, bbox_to_anchor=(1, 0, 1.2, 1)) + + ax1.axes.xaxis.set_visible(False) + + ax1.set(xlabel=kwargs.get('xlabel', ''), + ylabel=kwargs.get('ylabel', Unit(y) if isinstance(y, str) else Unit(y[0])), + xlim=kwargs.get('xlim', (st_tm, fn_tm)), + ylim=kwargs.get('ylim', (None, None)), + title=kwargs.get('title', ''), + ) + + xticks = kwargs.get('xticks', date_range(start=st_tm, end=fn_tm, freq=major_freq)) + yticks = kwargs.get('yticks', np.linspace(*ax1.get_ylim(), num=6)) + minor_xticks = kwargs.get('minor_xticks', date_range(start=st_tm, end=fn_tm, freq=minor_freq)) + + ax1.set_xticks(ticks=xticks, labels=xticks.strftime("%F")) + ax1.set_yticks(ticks=yticks, labels=[f'{tick:.0f}' for tick in yticks]) + ax1.set_xticks(minor_xticks, minor=True) + + # ax2 + bottom = None # 初始化堆疊位置 + for i, (_column, _color, _label) in enumerate(zip(yi, color, pct)): + if i == 0: + bottom = df_pct[_column] * 0 # 第一個堆疊底部為零 + ax2.bar(df_pct.index, df_pct[_column], color=_color, width=width, bottom=bottom, label=_label) + bottom += df_pct[_column] # 更新堆疊底部位置 + + ax2.legend(loc='upper left', ncol=legend_ncol, prop={'weight': 'bold'}, bbox_to_anchor=(1, 0, 1.2, 1)) + + ax2.set(xlabel=kwargs.get('xlabel', ''), + ylabel=kwargs.get('ylabel', 'Percentage (%)'), + xlim=kwargs.get('xlim', (st_tm, fn_tm)), + ylim=(0, 100), + ) + + xticks = kwargs.get('xticks', date_range(start=st_tm, end=fn_tm, freq=major_freq)) + yticks = kwargs.get('yticks', np.linspace(*ax2.get_ylim(), num=6)) + minor_xticks = kwargs.get('minor_xticks', date_range(start=st_tm, end=fn_tm, freq=minor_freq)) + + ax2.set_xticks(ticks=xticks, labels=xticks.strftime("%F")) + ax2.set_yticks(ticks=yticks, labels=[f'{tick:.0f}' for tick in yticks]) + ax2.set_xticks(minor_xticks, minor=True) + + return fig, ax1 diff --git a/AeroViz/plot/utils/__init__.py b/AeroViz/plot/utils/__init__.py index 4a0bdba..6e90991 100644 --- a/AeroViz/plot/utils/__init__.py +++ b/AeroViz/plot/utils/__init__.py @@ -1,3 +1,4 @@ from ._color import Color -from ._decorator import set_figure from ._unit import Unit +from .plt_utils import * +from .sklearn_utils import * diff --git a/AeroViz/plot/utils/_color.py b/AeroViz/plot/utils/_color.py index 9da7f4a..777d3f5 100644 --- a/AeroViz/plot/utils/_color.py +++ b/AeroViz/plot/utils/_color.py @@ -9,63 +9,63 @@ class Color: - color_cycle = cycler(color=['b', 'g', 'r', 'c', 'm', 'y', 'k']) - - linecolor = [{'line': '#1a56db', 'edge': '#0F50A6', 'face': '#5983D9'}, - {'line': '#046c4e', 'edge': '#1B591F', 'face': '#538C4A'}, - {'line': '#c81e1e', 'edge': '#f05252', 'face': '#f98080'}] - - # colors = ['#FF3333', '#33FF33', '#FFFF33', '#5555FF', '#B94FFF', '#AAAAAA', '#748690'] # the last one is "unknown" - - colors1 = ['#A65E58', '#A5BF6B', '#F2BF5E', '#3F83BF', '#B777C2', '#D1CFCB'] - colors2 = ['#A65E58', '#A5BF6B', '#F2BF5E', '#3F83BF', '#B777C2', '#D1CFCB', '#96c8e6'] - colors3 = ['#A65E58', '#A5BF6B', '#a6710d', '#F2BF5E', '#3F83BF', '#B777C2', '#D1CFCB', '#96c8e6'] # POC SOC - - colors_mutiWater = ['#A65E58', '#c18e8a', '#A5BF6B', '#c5d6a0', '#F2BF5E', '#3F83BF', '#c089ca', '#d3acda', - '#D1CFCB'] - colors_mutiWater2 = ['#A65E58', '#96c8e6', '#A5BF6B', '#96c8e6', '#F2BF5E', '#3F83BF', '#c089ca', '#96c8e6', - '#D1CFCB'] # water - - color_choose = {'Clean': ['#1d4a9f', '#84a7e9'], - 'Transition': ['#4a9f1d', '#a7e984'], - 'Event': ['#9f1d4a', '#e984a7']} - - paired = [plt.get_cmap('Paired')(i) for i in range(4)] - - @staticmethod - def getColor(num: int = 6, cmap: str = 'jet_r'): - category_colors = plt.colormaps[cmap](np.linspace(0.1, 0.9, num)) - return [plc.to_hex(category_colors[i]) for i in range(num)] - - @staticmethod - def palplot(*args, **kwargs): - sns.palplot(*args, **kwargs) - - @staticmethod - def adjust_opacity(colors: str | list[str], alpha: float): - if isinstance(colors, str): - colors = [colors] - - adjusted_colors = [] - for color in colors: - # 將顏色轉換為RGB表示 - r, g, b = int(color[1:3], 16), int(color[3:5], 16), int(color[5:7], 16) - # 調整透明度 - r_new = int(alpha * r + (1 - alpha) * 255) - g_new = int(alpha * g + (1 - alpha) * 255) - b_new = int(alpha * b + (1 - alpha) * 255) - # 轉換為新的色碼 - new_color = '#{:02X}{:02X}{:02X}'.format(r_new, g_new, b_new) - adjusted_colors.append(new_color) - return adjusted_colors - - @staticmethod - def color_maker(obj, cmap='Blues'): - colors = np.nan_to_num(obj, nan=0) - scalar_map = plt.cm.ScalarMappable(cmap=colormaps[cmap]) # create a scalar map for the colorbar - scalar_map.set_array(colors) - return scalar_map, colors + color_cycle = cycler(color=['b', 'g', 'r', 'c', 'm', 'y', 'k']) + + linecolor = [{'line': '#1a56db', 'edge': '#0F50A6', 'face': '#5983D9'}, + {'line': '#046c4e', 'edge': '#1B591F', 'face': '#538C4A'}, + {'line': '#c81e1e', 'edge': '#f05252', 'face': '#f98080'}] + + # colors = ['#FF3333', '#33FF33', '#FFFF33', '#5555FF', '#B94FFF', '#AAAAAA', '#748690'] # the last one is "unknown" + + colors1 = ['#A65E58', '#A5BF6B', '#F2BF5E', '#3F83BF', '#B777C2', '#D1CFCB'] + colors2 = ['#A65E58', '#A5BF6B', '#F2BF5E', '#3F83BF', '#B777C2', '#D1CFCB', '#96c8e6'] + colors3 = ['#A65E58', '#A5BF6B', '#a6710d', '#F2BF5E', '#3F83BF', '#B777C2', '#D1CFCB', '#96c8e6'] # POC SOC + + colors_mutiWater = ['#A65E58', '#c18e8a', '#A5BF6B', '#c5d6a0', '#F2BF5E', '#3F83BF', '#c089ca', '#d3acda', + '#D1CFCB'] + colors_mutiWater2 = ['#A65E58', '#96c8e6', '#A5BF6B', '#96c8e6', '#F2BF5E', '#3F83BF', '#c089ca', '#96c8e6', + '#D1CFCB'] # water + + color_choose = {'Clean': ['#1d4a9f', '#84a7e9'], + 'Transition': ['#4a9f1d', '#a7e984'], + 'Event': ['#9f1d4a', '#e984a7']} + + paired = [plt.get_cmap('Paired')(i) for i in range(4)] + + @staticmethod + def getColor(num: int = 6, cmap: str = 'jet_r'): + category_colors = plt.colormaps[cmap](np.linspace(0.1, 0.9, num)) + return [plc.to_hex(category_colors[i]) for i in range(num)] + + @staticmethod + def palplot(*args, **kwargs): + sns.palplot(*args, **kwargs) + + @staticmethod + def adjust_opacity(colors: str | list[str], alpha: float): + if isinstance(colors, str): + colors = [colors] + + adjusted_colors = [] + for color in colors: + # 將顏色轉換為RGB表示 + r, g, b = int(color[1:3], 16), int(color[3:5], 16), int(color[5:7], 16) + # 調整透明度 + r_new = int(alpha * r + (1 - alpha) * 255) + g_new = int(alpha * g + (1 - alpha) * 255) + b_new = int(alpha * b + (1 - alpha) * 255) + # 轉換為新的色碼 + new_color = '#{:02X}{:02X}{:02X}'.format(r_new, g_new, b_new) + adjusted_colors.append(new_color) + return adjusted_colors + + @staticmethod + def color_maker(obj, cmap='Blues'): + colors = np.nan_to_num(obj, nan=0) + scalar_map = plt.cm.ScalarMappable(cmap=colormaps[cmap]) # create a scalar map for the colorbar + scalar_map.set_array(colors) + return scalar_map, colors if __name__ == '__main__': - Color.palplot(Color.colors2) + Color.palplot(Color.colors2) diff --git a/AeroViz/plot/utils/_decorator.py b/AeroViz/plot/utils/_decorator.py deleted file mode 100644 index 738be24..0000000 --- a/AeroViz/plot/utils/_decorator.py +++ /dev/null @@ -1,74 +0,0 @@ -from functools import wraps - -import matplotlib.pyplot as plt - -__all__ = ['set_figure'] - - -# For more details please see https://matplotlib.org/stable/users/explain/customizing.html - - -def set_figure(func=None, - *, - figsize: tuple | None = None, - fs: int | None = None, - fw: str = None, - autolayout: bool = True - ): - def decorator(_func): - @wraps(_func) - def wrapper(*args, **kwargs): - print(f'\t\t Plot: \033[96m{_func.__name__}\033[0m') - - plt.rcParams['mathtext.fontset'] = 'custom' - plt.rcParams['mathtext.rm'] = 'Times New Roman' - plt.rcParams['mathtext.it'] = 'Times New Roman: italic' - plt.rcParams['mathtext.bf'] = 'Times New Roman: bold' - plt.rcParams['mathtext.default'] = 'regular' - - # The font properties used by `text.Text`. - # The text, annotate, label, title, ticks, are used to create text - plt.rcParams['font.family'] = 'Times New Roman' - plt.rcParams['font.weight'] = fw or 'normal' - plt.rcParams['font.size'] = fs or 8 - - plt.rcParams['axes.titlesize'] = 'large' - plt.rcParams['axes.titleweight'] = 'bold' - plt.rcParams['axes.labelweight'] = 'bold' - - # color - plt.rcParams['axes.prop_cycle'] = plt.cycler(color=['b', 'g', 'r', 'c', 'm', 'y', 'k']) - - plt.rcParams['xtick.labelsize'] = 'medium' - plt.rcParams['ytick.labelsize'] = 'medium' - - # matplotlib.font_manager.FontProperties ---> matplotlib.rcParams - plt.rcParams['legend.loc'] = 'best' - plt.rcParams['legend.frameon'] = False - plt.rcParams['legend.fontsize'] = 'small' - plt.rcParams['legend.title_fontsize'] = 'medium' - plt.rcParams['legend.handlelength'] = 1.5 - plt.rcParams['legend.labelspacing'] = 0.7 - - plt.rcParams['figure.figsize'] = figsize or (5, 4) - plt.rcParams['figure.dpi'] = 200 - plt.rcParams['figure.autolayout'] = autolayout - - if ~autolayout: - plt.rcParams['figure.subplot.left'] = 0.1 - plt.rcParams['figure.subplot.right'] = 0.875 - plt.rcParams['figure.subplot.top'] = 0.875 - plt.rcParams['figure.subplot.bottom'] = 0.125 - - # plt.rcParams['figure.constrained_layout.use'] = True - - plt.rcParams['savefig.transparent'] = True - - return _func(*args, **kwargs) - - return wrapper - - if func is None: - return decorator - - return decorator(func) diff --git a/AeroViz/plot/utils/_unit.py b/AeroViz/plot/utils/_unit.py index 04ecc9c..1eaa96d 100644 --- a/AeroViz/plot/utils/_unit.py +++ b/AeroViz/plot/utils/_unit.py @@ -5,51 +5,51 @@ class Unit: - file_path = Path(__file__).parent / 'units.json' - data = None - - def __new__(cls, unit: str): - cls.data = cls.load_jsonfile() - try: - value = cls.data[unit] - return r'${}$'.format(value.replace(' ', r'\ ')) - except KeyError: - print(f"Attribute '{unit}' not found. Using default value.") - return r'${}$'.format(unit.replace(' ', r'\ ')) if unit is not None else 'None' - - @classmethod - def load_jsonfile(cls): - """ 讀取 JSON 檔中數據并將其變成屬性 """ - try: - with open(cls.file_path, 'r', encoding='utf-8') as f: - return json.load(f) - - except FileNotFoundError: - print(f"JSON file '{cls.file_path}' not found.") - except json.JSONDecodeError: - print(f"Invalid JSON format in '{cls.file_path}'.") - - @classmethod - def update_jsonfile(cls, key, value): - """ 更新JSON檔 """ - with open(cls.file_path, 'r', encoding='utf-8') as f: - old_data = json.load(f) - - old_data[key] = value - - with open(cls.file_path, 'w', encoding='utf-8') as f: - json.dump(old_data, f, indent=4) - - @classmethod - def del_jsonfile(cls, key): - """ 更新JSON檔 """ - with open(cls.file_path, 'r', encoding='utf-8') as f: - old_data = json.load(f) - - if key in old_data: - del old_data[key] - - with open(cls.file_path, 'w', encoding='utf-8') as f: - json.dump(old_data, f, indent=4) - else: - print(f"Key '{key}' not found.") + file_path = Path(__file__).parent / 'units.json' + data = None + + def __new__(cls, unit: str): + cls.data = cls.load_jsonfile() + try: + value = cls.data[unit] + return r'${}$'.format(value.replace(' ', r'\ ')) + except KeyError: + print(f"Attribute '{unit}' not found. Using default value.") + return r'${}$'.format(unit.replace(' ', r'\ ')) if unit is not None else 'None' + + @classmethod + def load_jsonfile(cls): + """ 讀取 JSON 檔中數據并將其變成屬性 """ + try: + with open(cls.file_path, 'r', encoding='utf-8') as f: + return json.load(f) + + except FileNotFoundError: + print(f"JSON file '{cls.file_path}' not found.") + except json.JSONDecodeError: + print(f"Invalid JSON format in '{cls.file_path}'.") + + @classmethod + def update_jsonfile(cls, key, value): + """ 更新JSON檔 """ + with open(cls.file_path, 'r', encoding='utf-8') as f: + old_data = json.load(f) + + old_data[key] = value + + with open(cls.file_path, 'w', encoding='utf-8') as f: + json.dump(old_data, f, indent=4) + + @classmethod + def del_jsonfile(cls, key): + """ 更新JSON檔 """ + with open(cls.file_path, 'r', encoding='utf-8') as f: + old_data = json.load(f) + + if key in old_data: + del old_data[key] + + with open(cls.file_path, 'w', encoding='utf-8') as f: + json.dump(old_data, f, indent=4) + else: + print(f"Key '{key}' not found.") diff --git a/AeroViz/plot/utils/plt_utils.py b/AeroViz/plot/utils/plt_utils.py new file mode 100644 index 0000000..ae79ebb --- /dev/null +++ b/AeroViz/plot/utils/plt_utils.py @@ -0,0 +1,92 @@ +from functools import wraps +from typing import Literal + +import matplotlib.pyplot as plt +from matplotlib.pyplot import Axes + +__all__ = ['set_figure', 'combine_legends', 'auto_label_pct'] + + +def set_figure(func=None, + *, + figsize: tuple | None = None, + fs: int | None = None, + fw: str = None, + autolayout: bool = True): + # For more details please see https://matplotlib.org/stable/users/explain/customizing.html + def decorator(_func): + @wraps(_func) + def wrapper(*args, **kwargs): + print(f'\n\tPlot:\033[96m {_func.__name__}\033[0m') + + plt.rcParams['mathtext.fontset'] = 'custom' + plt.rcParams['mathtext.rm'] = 'Times New Roman' + plt.rcParams['mathtext.it'] = 'Times New Roman: italic' + plt.rcParams['mathtext.bf'] = 'Times New Roman: bold' + plt.rcParams['mathtext.default'] = 'regular' + + # The font properties used by `text.Text`. + # The text, annotate, label, title, ticks, are used to create text + plt.rcParams['font.family'] = 'Times New Roman' + plt.rcParams['font.weight'] = fw or 'normal' + plt.rcParams['font.size'] = fs or 8 + + plt.rcParams['axes.titlesize'] = 'large' + plt.rcParams['axes.titleweight'] = 'bold' + plt.rcParams['axes.labelweight'] = 'bold' + + # color + plt.rcParams['axes.prop_cycle'] = plt.cycler(color=['b', 'g', 'r', 'c', 'm', 'y', 'k']) + + plt.rcParams['xtick.labelsize'] = 'medium' + plt.rcParams['ytick.labelsize'] = 'medium' + + # matplotlib.font_manager.FontProperties ---> matplotlib.rcParams + plt.rcParams['legend.loc'] = 'best' + plt.rcParams['legend.frameon'] = False + plt.rcParams['legend.fontsize'] = 'small' + plt.rcParams['legend.title_fontsize'] = 'medium' + plt.rcParams['legend.handlelength'] = 1.5 + plt.rcParams['legend.labelspacing'] = 0.7 + + plt.rcParams['figure.figsize'] = figsize or (4, 4) + plt.rcParams['figure.dpi'] = 300 + plt.rcParams['figure.autolayout'] = autolayout + + if not autolayout: + plt.rcParams['figure.subplot.left'] = 0.1 + plt.rcParams['figure.subplot.right'] = 0.875 + plt.rcParams['figure.subplot.top'] = 0.875 + plt.rcParams['figure.subplot.bottom'] = 0.125 + + # plt.rcParams['figure.constrained_layout.use'] = True + + plt.rcParams['savefig.transparent'] = True + + return _func(*args, **kwargs) + + return wrapper + + if func is None: + return decorator + + return decorator(func) + + +def combine_legends(axes_list: list[Axes]) -> tuple[list, list]: + return ( + [legend for axes in axes_list for legend in axes.get_legend_handles_labels()[0]], + [label for axes in axes_list for label in axes.get_legend_handles_labels()[1]] + ) + + +def auto_label_pct(pct, + symbol: bool = True, + include_pct: bool = False, + ignore: Literal["inner", "outer"] = 'inner', + value: float = 2): + if not symbol: + return '' + cond = pct <= value if ignore == 'inner' else pct > value + label = '' if cond else '{:.1f}'.format(pct) + return '' if label == '' else label + '%' if include_pct else label diff --git a/AeroViz/plot/utils/sklearn_utils.py b/AeroViz/plot/utils/sklearn_utils.py new file mode 100644 index 0000000..9fd41f6 --- /dev/null +++ b/AeroViz/plot/utils/sklearn_utils.py @@ -0,0 +1,49 @@ +import numpy as np +from sklearn.linear_model import LinearRegression +from tabulate import tabulate + +__all__ = ['linear_regression_base'] + + +def linear_regression_base(x_array: np.ndarray, + y_array: np.ndarray, + columns: str | list[str] | None = None, + positive: bool = True, + fit_intercept: bool = True): + if len(x_array.shape) > 1 and x_array.shape[1] >= 2: + model = LinearRegression(positive=positive, fit_intercept=fit_intercept).fit(x_array, y_array) + + coefficients = model.coef_[0].round(3) + intercept = model.intercept_[0].round(3) if fit_intercept else 'None' + r_square = model.score(x_array, y_array).__round__(3) + y_predict = model.predict(x_array) + + equation = ' + '.join([f'{coeff:.3f} * {col}' for coeff, col in zip(coefficients, columns)]) + equation = equation.replace(' + 0.000 * Const', '') # Remove terms with coefficient 0 + + text = 'y = ' + str(equation) + '\n' + r'$\bf R^2 = $' + str(r_square) + tab = tabulate([[*coefficients, intercept, r_square]], headers=[*columns, 'intercept', 'R^2'], floatfmt=".3f", + tablefmt="fancy_grid") + print('\n' + tab) + + return text, y_predict, coefficients + + else: + x_array = x_array.reshape(-1, 1) + y_array = y_array.reshape(-1, 1) + + model = LinearRegression(positive=positive, fit_intercept=fit_intercept).fit(x_array, y_array) + + slope = model.coef_[0][0].round(3) + intercept = model.intercept_[0].round(3) if fit_intercept else 'None' + r_square = model.score(x_array, y_array).__round__(3) + y_predict = model.predict(x_array) + + text = np.poly1d([slope, intercept]) + text = 'y = ' + str(text).replace('\n', "") + '\n' + r'$\bf R^2 = $' + str(r_square) + + tab = tabulate([[slope, intercept, r_square]], headers=['slope', 'intercept', 'R^2'], floatfmt=".3f", + tablefmt="fancy_grid") + print('\n' + tab) + + return text, y_predict, slope diff --git a/AeroViz/plot/utils/units.json b/AeroViz/plot/utils/units.json index eb2b154..3d3da6f 100644 --- a/AeroViz/plot/utils/units.json +++ b/AeroViz/plot/utils/units.json @@ -6,6 +6,8 @@ "T_OC": "OC (\u00b5g/m^3)", "PM1": "PM_{1} (\u00b5g/m^3)", "PM25": "PM_{2.5} (\u00b5g/m^3)", + "PM2.5": "PM_{2.5} (\u00b5g/m^3)", + "PM10": "PM_{10} (\u00b5g/m^3)", "SIA": "SIA (\u00b5g/m^3)", "POC": "POC (\u00b5g/m^3)", "SOC": "SOC (\u00b5g/m^3)", @@ -20,12 +22,15 @@ "Babs": "Mie Amb Absorption (1/Mm)", "Babs_dry": "Mie Dry Absorption (1/Mm)", "Absorption": "Absorption (1/Mm)", + "abs": "Absorption (1/Mm)", "Bext": "Mie Amb Extinction (1/Mm)", "Bext_dry": "Mie Dry Extinction (1/Mm)", "Extinction": "Extinction (1/Mm)", + "ext": "Extinction (1/Mm)", "Bsca": "Mie Amb Scattering (1/Mm)", "Bsca_dry": "Mie Dry Scattering (1/Mm)", "Scattering": "Scattering (1/Mm)", + "sca": "Scattering (1/Mm)", "Diurnal": "Hour", "PBLH": "PBLH (m)", "VC": "VC (m²/s)", diff --git a/AeroViz/plot/violin.py b/AeroViz/plot/violin.py new file mode 100644 index 0000000..8772e16 --- /dev/null +++ b/AeroViz/plot/violin.py @@ -0,0 +1,80 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +from matplotlib.pyplot import Figure, Axes +from pandas import DataFrame + +from AeroViz.plot.utils import * + +__all__ = ['violin'] + + +@set_figure(fw='bold') +def violin(df: DataFrame | dict, + unit: str, + ax: Axes | None = None, + **kwargs + ) -> tuple[Figure, Axes]: + """ + Generate a violin plot for multiple data sets. + + Parameters + ---------- + df : pd.DataFrame or dict + A mapping from category names to pandas DataFrames containing the data. + unit : str + The unit for the data being plotted. + ax : matplotlib.axes.Axes, optional + The Axes object to draw the plot onto. If not provided, a new figure will be created. + **kwargs : dict + Additional keyword arguments to be passed to the violinplot function. + + Returns + ------- + fig : Figure + The matplotlib Figure object. + ax : Axes + The matplotlib Axes object with the scatter plot. + + """ + fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax) + + data = df.to_numpy() + + data = data[~np.isnan(data).any(axis=1)] + + grps = data.shape[1] + + width = 0.6 + block = width / 2 + x_position = np.arange(grps) + + plt.boxplot(data, positions=x_position, widths=0.15, + showfliers=False, showmeans=True, meanline=False, patch_artist=True, + capprops=dict(linewidth=0), + whiskerprops=dict(linewidth=1.5, color='k', alpha=1), + boxprops=dict(linewidth=1.5, color='k', facecolor='#4778D3', alpha=1), + meanprops=dict(marker='o', markeredgecolor='black', markerfacecolor='white', markersize=6), + medianprops=dict(linewidth=1.5, ls='-', color='k', alpha=1)) + + sns.violinplot(data=data, density_norm='area', color='#4778D3', inner=None) + + for violin, alpha in zip(ax.collections[:], [0.5] * len(ax.collections[:])): + violin.set_alpha(alpha) + violin.set_edgecolor(None) + + plt.scatter(x_position, data.mean(), marker='o', facecolor='white', edgecolor='k', s=10) + + xlim = kwargs.get('xlim') or (x_position[0] - (width / 2 + block), x_position[-1] + (width / 2 + block)) + ylim = kwargs.get('ylim') or (0, None) + xlabel = kwargs.get('xlabel') or '' + ylabel = kwargs.get('ylabel') or Unit(unit) + xticks = kwargs.get('xticks') or [x.replace('-', '\n') for x in list(df.keys())] + + ax.set(xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel, title=kwargs.get('title')) + ax.set_xticks(x_position, xticks, fontweight='bold', fontsize=12) + + plt.show() + + return fig, ax diff --git a/AeroViz/process/__init__.py b/AeroViz/process/__init__.py deleted file mode 100644 index f357cb7..0000000 --- a/AeroViz/process/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -from pathlib import Path - -from pandas import read_csv, concat - -from AeroViz.process.script import (ImpactProc, ImproveProc, ChemicalProc, ParticleSizeDistProc, - ExtinctionDistProc, OthersProc) - -__all__ = ['DataProcess'] - - -class DataProcess: - def __new__(cls, file_path, reset: bool = False, save_file: Path | str = 'All_data.csv'): - file_path = Path(file_path) - - print(f'\t\t \033[96m --- Processing Data --- \033[0m') - - if file_path.exists() and not reset: - return read_csv(file_path, parse_dates=['Time'], index_col='Time', - na_values=('-', 'E', 'F'), low_memory=False) - - processor = [ImpactProc, ChemicalProc, ImproveProc, ParticleSizeDistProc, ExtinctionDistProc, OthersProc] - reset = [False, False, False, False, False, False] - save_filename = ['IMPACT.csv', 'chemical.csv', 'revised_IMPROVE.csv', 'PSD.csv', 'PESD.csv', 'Others.csv'] - - _df = concat([processor().process_data(reset, save_filename) for processor, reset, save_filename in - zip(processor, reset, save_filename)], axis=1) - - # 7. save result - _df.to_csv(file_path) - - return _df diff --git a/AeroViz/process/core/DataProc.py b/AeroViz/process/core/DataProc.py deleted file mode 100644 index 9e6fc66..0000000 --- a/AeroViz/process/core/DataProc.py +++ /dev/null @@ -1,19 +0,0 @@ -from abc import ABC, abstractmethod -from pathlib import Path - -from pandas import DataFrame - -__all__ = ['DataProc'] - - -class DataProc(ABC): - def __init__(self): - pass - - @abstractmethod - def process_data(self, - reset: bool = False, - save_filename: str | Path = None - ) -> DataFrame: - """ Implementation of processing data """ - pass diff --git a/AeroViz/process/core/SizeDist.py b/AeroViz/process/core/SizeDist.py deleted file mode 100644 index dd299c1..0000000 --- a/AeroViz/process/core/SizeDist.py +++ /dev/null @@ -1,90 +0,0 @@ -from typing import Literal - -import numpy as np -from pandas import DataFrame - -__all__ = ['SizeDist'] - - -class SizeDist: - """ - Attributes - ---------- - - _data: DataFrame - The processed PSD data stored as a pandas DataFrame. - - _dp: ndarray - The array of particle diameters from the PSD data. - - _dlogdp: ndarray - The array of logarithmic particle diameter bin widths. - - _index: DatetimeIndex - The index of the DataFrame representing time. - - _state: str - The state of particle size distribution data. - - Methods - ------- - number() - Calculate number distribution properties. - - surface(filename='PSSD_dSdlogdp.csv') - Calculate surface distribution properties. - - volume(filename='PVSD_dVdlogdp.csv') - Calculate volume distribution properties. - - """ - - def __init__(self, - data: DataFrame, - state: Literal['dN', 'ddp', 'dlogdp'] = 'dlogdp', - weighting: Literal['n', 's', 'v', 'ext_in', 'ext_ex'] = 'n' - ): - self._data = data - self._dp = np.array(self._data.columns, dtype=float) - self._dlogdp = np.full_like(self._dp, 0.014) - self._index = self._data.index.copy() - self._state = state - self._weighting = weighting - - @property - def data(self) -> DataFrame: - return self._data - - @property - def dp(self) -> np.ndarray: - return self._dp - - @dp.setter - def dp(self, new_dp: np.ndarray): - self._dp = new_dp - - @property - def dlogdp(self) -> np.ndarray: - return self._dlogdp - - @dlogdp.setter - def dlogdp(self, new_dlogdp: np.ndarray): - self._dlogdp = new_dlogdp - - @property - def index(self): - return self._index - - @property - def state(self): - return self._state - - @state.setter - def state(self, value): - if value not in ['dN', 'dlogdp', 'ddp']: - raise ValueError("state must be 'dlogdp' or 'ddp'") - self._state = value - - @property - def weighting(self): - return self._weighting diff --git a/AeroViz/process/core/__init__.py b/AeroViz/process/core/__init__.py deleted file mode 100644 index 8647a84..0000000 --- a/AeroViz/process/core/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .DataProc import DataProc -from .SizeDist import SizeDist - -__all__ = ['DataProc', 'SizeDist'] diff --git a/AeroViz/process/method/PyMieScatt_update.py b/AeroViz/process/method/PyMieScatt_update.py deleted file mode 100644 index de88672..0000000 --- a/AeroViz/process/method/PyMieScatt_update.py +++ /dev/null @@ -1,567 +0,0 @@ -# -*- coding: utf-8 -*- -# http://pymiescatt.readthedocs.io/en/latest/forward.html -import numpy as np -from scipy.special import jv, yv -from scipy.integrate import trapezoid -import warnings - - -def coerceDType(d): - if type(d) is not np.ndarray: - return np.array(d) - else: - return d - - -def MieQ(m, wavelength, diameter, nMedium=1.0, asDict=False, asCrossSection=False): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ - nMedium = nMedium.real - m /= nMedium - wavelength /= nMedium - x = np.pi * diameter / wavelength - if x == 0: - return 0, 0, 0, 1.5, 0, 0, 0 - elif x <= 0.05: - return RayleighMieQ(m, wavelength, diameter, nMedium, asDict) - elif x > 0.05: - nmax = np.round(2 + x + 4 * (x ** (1 / 3))) - n = np.arange(1, nmax + 1) - n1 = 2 * n + 1 - n2 = n * (n + 2) / (n + 1) - n3 = n1 / (n * (n + 1)) - x2 = x ** 2 - - an, bn = Mie_ab(m, x) - - qext = (2 / x2) * np.sum(n1 * (an.real + bn.real)) - qsca = (2 / x2) * np.sum(n1 * (an.real ** 2 + an.imag ** 2 + bn.real ** 2 + bn.imag ** 2)) - qabs = qext - qsca - - g1 = [an.real[1:int(nmax)], - an.imag[1:int(nmax)], - bn.real[1:int(nmax)], - bn.imag[1:int(nmax)]] - g1 = [np.append(x, 0.0) for x in g1] - g = (4 / (qsca * x2)) * np.sum( - (n2 * (an.real * g1[0] + an.imag * g1[1] + bn.real * g1[2] + bn.imag * g1[3])) + ( - n3 * (an.real * bn.real + an.imag * bn.imag))) - - qpr = qext - qsca * g - qback = (1 / x2) * (np.abs(np.sum(n1 * ((-1) ** n) * (an - bn))) ** 2) - qratio = qback / qsca - if asCrossSection: - css = np.pi * (diameter / 2) ** 2 - cext = css * qext - csca = css * qsca - cabs = css * qabs - cpr = css * qpr - cback = css * qback - cratio = css * qratio - if asDict: - return dict(Cext=cext, Csca=csca, Cabs=cabs, g=g, Cpr=cpr, Cback=cback, Cratio=cratio) - else: - return cext, csca, cabs, g, cpr, cback, cratio - else: - if asDict: - return dict(Qext=qext, Qsca=qsca, Qabs=qabs, g=g, Qpr=qpr, Qback=qback, Qratio=qratio) - else: - return qext, qsca, qabs, g, qpr, qback, qratio - - -def Mie_ab(m, x): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#Mie_ab - mx = m * x - nmax = np.round(2 + x + 4 * (x ** (1 / 3))) - nmx = np.round(max(nmax, np.abs(mx)) + 16) - n = np.arange(1, nmax + 1) # - nu = n + 0.5 # - - sx = np.sqrt(0.5 * np.pi * x) - - px = sx * jv(nu, x) # - p1x = np.append(np.sin(x), px[0:int(nmax) - 1]) # - - chx = -sx * yv(nu, x) # - ch1x = np.append(np.cos(x), chx[0:int(nmax) - 1]) # - - gsx = px - (0 + 1j) * chx # - gs1x = p1x - (0 + 1j) * ch1x # - - # B&H Equation 4.89 - Dn = np.zeros(int(nmx), dtype=complex) - for i in range(int(nmx) - 1, 1, -1): - Dn[i - 1] = (i / mx) - (1 / (Dn[i] + i / mx)) - - D = Dn[1:int(nmax) + 1] # Dn(mx), drop terms beyond nMax - da = D / m + n / x - db = m * D + n / x - - an = (da * px - p1x) / (da * gsx - gs1x) - bn = (db * px - p1x) / (db * gsx - gs1x) - - return an, bn - - -def Mie_cd(m, x): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#Mie_cd - mx = m * x - nmax = np.round(2 + x + 4 * (x ** (1 / 3))) - nmx = np.round(max(nmax, np.abs(mx)) + 16) - n = np.arange(1, int(nmax) + 1) - nu = n + 0.5 - - cnx = np.zeros(int(nmx), dtype=complex) - - for j in np.arange(nmx, 1, -1): - cnx[int(j) - 2] = j - mx * mx / (cnx[int(j) - 1] + j) - - cnn = np.array([cnx[b] for b in range(0, len(n))]) - - jnx = np.sqrt(np.pi / (2 * x)) * jv(nu, x) - jnmx = np.sqrt((2 * mx) / np.pi) / jv(nu, mx) - - yx = np.sqrt(np.pi / (2 * x)) * yv(nu, x) - hx = jnx + (1.0j) * yx - - b1x = np.append(np.sin(x) / x, jnx[0:int(nmax) - 1]) - y1x = np.append(-np.cos(x) / x, yx[0:int(nmax) - 1]) - - hn1x = b1x + (1.0j) * y1x - ax = x * b1x - n * jnx - ahx = x * hn1x - n * hx - - numerator = jnx * ahx - hx * ax - c_denominator = ahx - hx * cnn - d_denominator = m * m * ahx - hx * cnn - - cn = jnmx * numerator / c_denominator - dn = jnmx * m * numerator / d_denominator - - return cn, dn - - -def RayleighMieQ(m, wavelength, diameter, nMedium=1.0, asDict=False, asCrossSection=False): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#RayleighMieQ - nMedium = nMedium.real - m /= nMedium - wavelength /= nMedium - x = np.pi * diameter / wavelength - if x == 0: - return 0, 0, 0, 1.5, 0, 0, 0 - elif x > 0: - LL = (m ** 2 - 1) / (m ** 2 + 2) # Lorentz-Lorenz term - LLabsSq = np.abs(LL) ** 2 - qsca = 8 * LLabsSq * (x ** 4) / 3 # B&H eq 5.8 - qabs = 4 * x * LL.imag # B&H eq. 5.11 - qext = qsca + qabs - qback = 1.5 * qsca # B&H eq. 5.9 - qratio = 1.5 - g = 0 - qpr = qext - if asCrossSection: - css = np.pi * (diameter / 2) ** 2 - cext = css * qext - csca = css * qsca - cabs = css * qabs - cpr = css * qpr - cback = css * qback - cratio = css * qratio - if asDict: - return dict(Cext=cext, Csca=csca, Cabs=cabs, g=g, Cpr=cpr, Cback=cback, Cratio=cratio) - else: - return cext, csca, cabs, g, cpr, cback, cratio - else: - if asDict: - return dict(Qext=qext, Qsca=qsca, Qabs=qabs, g=g, Qpr=qpr, Qback=qback, Qratio=qratio) - else: - return qext, qsca, qabs, g, qpr, qback, qratio - - -def AutoMieQ(m, wavelength, diameter, nMedium=1.0, crossover=0.01, asDict=False, asCrossSection=False): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#AutoMieQ - nMedium = nMedium.real - m_eff = m / nMedium - wavelength_eff = wavelength / nMedium - x_eff = np.pi * diameter / wavelength_eff - if x_eff == 0: - return 0, 0, 0, 1.5, 0, 0, 0 - elif x_eff < crossover: - return RayleighMieQ(m, wavelength, diameter, nMedium, asDict=asDict, asCrossSection=asCrossSection) - else: - return MieQ(m, wavelength, diameter, nMedium, asDict=asDict, asCrossSection=asCrossSection) - - -def LowFrequencyMieQ(m, wavelength, diameter, nMedium=1.0, asDict=False, asCrossSection=False): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#LowFrequencyMieQ - nMedium = nMedium.real - m /= nMedium - wavelength /= nMedium - x = np.pi * diameter / wavelength - if x == 0: - return 0, 0, 0, 1.5, 0, 0, 0 - elif x > 0: - n = np.arange(1, 3) - n1 = 2 * n + 1 - n2 = n * (n + 2) / (n + 1) - n3 = n1 / (n * (n + 1)) - x2 = x ** 2 - - an, bn = LowFrequencyMie_ab(m, x) - - qext = (2 / x2) * np.sum(n1 * (an.real + bn.real)) - qsca = (2 / x2) * np.sum(n1 * (an.real ** 2 + an.imag ** 2 + bn.real ** 2 + bn.imag ** 2)) - qabs = qext - qsca - - g1 = [an.real[1:2], an.imag[1:2], bn.real[1:2], bn.imag[1:2]] - g1 = [np.append(x, 0.0) for x in g1] - g = (4 / (qsca * x2)) * np.sum( - (n2 * (an.real * g1[0] + an.imag * g1[1] + bn.real * g1[2] + bn.imag * g1[3])) + ( - n3 * (an.real * bn.real + an.imag * bn.imag))) - - qpr = qext - qsca * g - qback = (1 / x2) * (np.abs(np.sum(n1 * ((-1) ** n) * (an - bn))) ** 2) - qratio = qback / qsca - - if asCrossSection: - css = np.pi * (diameter / 2) ** 2 - cext = css * qext - csca = css * qsca - cabs = css * qabs - cpr = css * qpr - cback = css * qback - cratio = css * qratio - if asDict: - return dict(Cext=cext, Csca=csca, Cabs=cabs, g=g, Cpr=cpr, Cback=cback, Cratio=cratio) - else: - return cext, csca, cabs, g, cpr, cback, cratio - else: - if asDict: - return dict(Qext=qext, Qsca=qsca, Qabs=qabs, g=g, Qpr=qpr, Qback=qback, Qratio=qratio) - else: - return qext, qsca, qabs, g, qpr, qback, qratio - - -def LowFrequencyMie_ab(m, x): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#LowFrequencyMie_ab - # B&H page 131 - m2 = m ** 2 - LL = (m ** 2 - 1) / (m ** 2 + 2) - x3 = x ** 3 - x5 = x ** 5 - x6 = x ** 6 - - a1 = (-2j * x3 / 3) * LL - (2j * x5 / 5) * LL * (m2 - 2) / (m2 + 2) + (4 * x6 / 9) * (LL ** 2) - a2 = (-1j * x5 / 15) * (m2 - 1) / (2 * m2 + 3) - b1 = (-1j * x5 / 45) * (m2 - 1) - b2 = 0 + 0j - an = np.append(a1, a2) - bn = np.append(b1, b2) - return an, bn - - -def AutoMie_ab(m, x): - if x < 0.5: - return LowFrequencyMie_ab(m, x) - else: - return Mie_ab(m, x) - - -def Mie_SD(m, wavelength, dp, ndp, nMedium=1.0, SMPS=True, interpolate=False, asDict=False): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#Mie_SD - nMedium = nMedium.real - m /= nMedium - wavelength /= nMedium - dp = coerceDType(dp) - ndp = coerceDType(ndp) - _length = np.size(dp) - Q_ext = np.zeros(_length) - Q_sca = np.zeros(_length) - Q_abs = np.zeros(_length) - Q_pr = np.zeros(_length) - Q_back = np.zeros(_length) - Q_ratio = np.zeros(_length) - g = np.zeros(_length) - - # scaling of 1e-6 to cast in units of inverse megameters - see docs - aSDn = np.pi * ((dp / 2) ** 2) * ndp * (1e-6) - # _logdp = np.log10(dp) - - for i in range(_length): - Q_ext[i], Q_sca[i], Q_abs[i], g[i], Q_pr[i], Q_back[i], Q_ratio[i] = AutoMieQ(m, wavelength, dp[i], nMedium) - - if SMPS: - Bext = np.sum(Q_ext * aSDn) - Bsca = np.sum(Q_sca * aSDn) - Babs = Bext - Bsca - Bback = np.sum(Q_back * aSDn) - Bratio = np.sum(Q_ratio * aSDn) - bigG = np.sum(g * Q_sca * aSDn) / np.sum(Q_sca * aSDn) - Bpr = Bext - bigG * Bsca - else: - Bext = np.trapz(Q_ext * aSDn, dp) - Bsca = np.trapz(Q_sca * aSDn, dp) - Babs = Bext - Bsca - Bback = np.trapz(Q_back * aSDn, dp) - Bratio = np.trapz(Q_ratio * aSDn, dp) - bigG = np.trapz(g * Q_sca * aSDn, dp) / np.trapz(Q_sca * aSDn, dp) - Bpr = Bext - bigG * Bsca - - if asDict: - return dict(Bext=Bext, Bsca=Bsca, Babs=Babs, G=bigG, Bpr=Bpr, Bback=Bback, Bratio=Bratio) - else: - return Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio - - -def ScatteringFunction(m, wavelength, diameter, nMedium=1.0, minAngle=0, maxAngle=180, angularResolution=0.5, - space='theta', angleMeasure='radians', normalization=None): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#ScatteringFunction - nMedium = nMedium.real - m /= nMedium - wavelength /= nMedium - x = np.pi * diameter / wavelength - - _steps = int(1 + (maxAngle - minAngle) / angularResolution) # default 361 - - if angleMeasure in ['radians', 'RADIANS', 'rad', 'RAD']: - adjust = np.pi / 180 - elif angleMeasure in ['gradians', 'GRADIANS', 'grad', 'GRAD']: - adjust = 1 / 200 - else: - adjust = 1 - - if space in ['q', 'qspace', 'QSPACE', 'qSpace']: - # _steps *= 10 - _steps += 1 - if minAngle == 0: - minAngle = 1e-5 - # measure = np.logspace(np.log10(minAngle),np.log10(maxAngle),_steps)*np.pi/180 - measure = np.linspace(minAngle, maxAngle, _steps) * np.pi / 180 - _q = True - else: - measure = np.linspace(minAngle, maxAngle, _steps) * adjust - _q = False - if x == 0: - return measure, 0, 0, 0 - _measure = np.linspace(minAngle, maxAngle, _steps) * np.pi / 180 - SL = np.zeros(_steps) - SR = np.zeros(_steps) - SU = np.zeros(_steps) - for j in range(_steps): - u = np.cos(_measure[j]) - S1, S2 = MieS1S2(m, x, u) - SL[j] = (np.sum(np.conjugate(S1) * S1)).real - SR[j] = (np.sum(np.conjugate(S2) * S2)).real - SU[j] = (SR[j] + SL[j]) / 2 - if normalization in ['m', 'M', 'max', 'MAX']: - SL /= np.max(SL) - SR /= np.max(SR) - SU /= np.max(SU) - elif normalization in ['t', 'T', 'total', 'TOTAL']: - SL /= np.trapz(SL, measure) - SR /= np.trapz(SR, measure) - SU /= np.trapz(SU, measure) - if _q: - measure = (4 * np.pi / wavelength) * np.sin(measure / 2) * (diameter / 2) - return measure, SL, SR, SU - - -def SF_SD(m, wavelength, dp, ndp, nMedium=1.0, minAngle=0, maxAngle=180, angularResolution=0.5, space='theta', - angleMeasure='radians', normalization=None): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#SF_SD - nMedium = nMedium.real - m /= nMedium - wavelength /= nMedium - - _steps = int(1 + (maxAngle - minAngle) / angularResolution) - ndp = coerceDType(ndp) - dp = coerceDType(dp) - SL = np.zeros(_steps) - SR = np.zeros(_steps) - SU = np.zeros(_steps) - kwargs = {'minAngle': minAngle, - 'maxAngle': maxAngle, - 'angularResolution': angularResolution, - 'space': space, - 'normalization': None} - for n, d in zip(ndp, dp): - measure, l, r, u = ScatteringFunction(m, wavelength, d, **kwargs) - SL += l * n - SR += r * n - SU += u * n - if normalization in ['n', 'N', 'number', 'particles']: - _n = np.trapz(ndp, dp) - SL /= _n - SR /= _n - SU /= _n - elif normalization in ['m', 'M', 'max', 'MAX']: - SL /= np.max(SL) - SR /= np.max(SR) - SU /= np.max(SU) - elif normalization in ['t', 'T', 'total', 'TOTAL']: - SL /= np.trapz(SL, measure) - SR /= np.trapz(SR, measure) - SU /= np.trapz(SU, measure) - return measure, SL, SR, SU - - -def MieS1S2(m, x, mu): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieS1S2 - nmax = np.round(2 + x + 4 * np.power(x, 1 / 3)) - an, bn = AutoMie_ab(m, x) - pin, taun = MiePiTau(mu, nmax) - n = np.arange(1, int(nmax) + 1) - n2 = (2 * n + 1) / (n * (n + 1)) - S1 = np.sum(n2[0:len(an)] * (an * pin[0:len(an)] + bn * taun[0:len(bn)])) - S2 = np.sum(n2[0:len(an)] * (an * taun[0:len(an)] + bn * pin[0:len(bn)])) - return S1, S2 - - -def MiePiTau(mu, nmax): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#MiePiTau - p = np.zeros(int(nmax)) - t = np.zeros(int(nmax)) - p[0] = 1 - p[1] = 3 * mu - t[0] = mu - t[1] = 3.0 * np.cos(2 * np.arccos(mu)) - for n in range(2, int(nmax)): - p[n] = ((2 * n + 1) * (mu * p[n - 1]) - (n + 1) * p[n - 2]) / n - t[n] = (n + 1) * mu * p[n] - (n + 2) * p[n - 1] - return p, t - - -def MatrixElements(m, wavelength, diameter, mu, nMedium=1.0): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#MatrixElements - nMedium = nMedium.real - m /= nMedium - wavelength /= nMedium - x = np.pi * diameter / wavelength - # B&H eqs. 4.77, where mu=cos(theta) - S1, S2 = MieS1S2(m, x, mu) - S11 = 0.5 * (np.abs(S2) ** 2 + np.abs(S1) ** 2) - S12 = 0.5 * (np.abs(S2) ** 2 - np.abs(S1) ** 2) - S33 = 0.5 * (np.conjugate(S2) * S1 + S2 * np.conjugate(S1)) - S34 = 0.5j * (S1 * np.conjugate(S2) - S2 * np.conjugate(S1)) - return S11, S12, S33, S34 - - -def MieQ_withDiameterRange(m, wavelength, nMedium=1.0, diameterRange=(10, 1000), nd=1000, logD=False): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ_withDiameterRange - nMedium = nMedium.real - m /= nMedium - wavelength /= nMedium - if logD: - diameters = np.logspace(np.log10(diameterRange[0]), np.log10(diameterRange[1]), nd) - else: - diameters = np.linspace(diameterRange[0], diameterRange[1], nd) - _qD = [AutoMieQ(m, wavelength, diameter) for diameter in diameters] - qext = np.array([q[0] for q in _qD]) - qsca = np.array([q[1] for q in _qD]) - qabs = np.array([q[2] for q in _qD]) - g = np.array([q[3] for q in _qD]) - qpr = np.array([q[4] for q in _qD]) - qback = np.array([q[5] for q in _qD]) - qratio = np.array([q[6] for q in _qD]) - return diameters, qext, qsca, qabs, g, qpr, qback, qratio - - -def MieQ_withWavelengthRange(m, diameter, nMedium=1.0, wavelengthRange=(100, 1600), nw=1000, logW=False): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ_withWavelengthRange - nMedium = nMedium.real - _m = m / nMedium - _wavelengthRange = tuple([x / nMedium for x in wavelengthRange]) - if type(_m) == complex and len(_wavelengthRange) == 2: - if logW: - wavelengths = np.logspace(np.log10(_wavelengthRange[0]), np.log10(_wavelengthRange[1]), nw) - else: - wavelengths = np.linspace(_wavelengthRange[0], _wavelengthRange[1], nw) - _qD = [AutoMieQ(_m, wavelength, diameter) for wavelength in wavelengths] - elif type(_m) in [np.ndarray, list, tuple] and len(_wavelengthRange) == len(_m): - wavelengths = _wavelengthRange - _qD = [MieQ(emm, wavelength, diameter) for emm, wavelength in zip(_m, wavelengths)] - else: - warnings.warn("Error: the size of the input data is mismatched. Please examine your inputs and try again.") - return - - qext = np.array([q[0] for q in _qD]) - qsca = np.array([q[1] for q in _qD]) - qabs = np.array([q[2] for q in _qD]) - g = np.array([q[3] for q in _qD]) - qpr = np.array([q[4] for q in _qD]) - qback = np.array([q[5] for q in _qD]) - qratio = np.array([q[6] for q in _qD]) - return wavelengths, qext, qsca, qabs, g, qpr, qback, qratio - - -def MieQ_withSizeParameterRange(m, nMedium=1.0, xRange=(1, 10), nx=1000, logX=False): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#MieQ_withSizeParameterRange - nMedium = nMedium.real - m /= nMedium - _xRange = tuple([x * nMedium for x in xRange]) # I think - if logX: - xValues = list(np.logspace(np.log10(_xRange[0]), np.log10(_xRange[1]), nx)) - else: - xValues = list(np.linspace(_xRange[0], _xRange[1], nx)) - dValues = [1000 * x / np.pi for x in xValues] - _qD = [AutoMieQ(m, 1000, d) for d in dValues] - qext = np.array([q[0] for q in _qD]) - qsca = np.array([q[1] for q in _qD]) - qabs = np.array([q[2] for q in _qD]) - g = np.array([q[3] for q in _qD]) - qpr = np.array([q[4] for q in _qD]) - qback = np.array([q[5] for q in _qD]) - qratio = np.array([q[6] for q in _qD]) - return xValues, qext, qsca, qabs, g, qpr, qback, qratio - - -def Mie_Lognormal(m, wavelength, geoStdDev, geoMean, numberOfParticles, nMedium=1.0, numberOfBins=10000, lower=1, - upper=1000, gamma=[1], returnDistribution=False, decomposeMultimodal=False, asDict=False): - # http://pymiescatt.readthedocs.io/en/latest/forward.html#Mie_Lognormal - nMedium = nMedium.real - m /= nMedium - wavelength /= nMedium - ithPart = lambda gammai, dp, dpgi, sigmagi: (gammai / (np.sqrt(2 * np.pi) * np.log(sigmagi) * dp)) * np.exp( - -(np.log(dp) - np.log(dpgi)) ** 2 / (2 * np.log(sigmagi) ** 2)) - dp = np.logspace(np.log10(lower), np.log10(upper), numberOfBins) - if all([type(x) in [list, tuple, np.ndarray] for x in [geoStdDev, geoMean]]): - # multimodal - if len(gamma) == 1 and (len(geoStdDev) == len(geoMean) > 1): - # gamma is distributed equally among modes - gamma = [1 for x in geoStdDev] - gamma = [float(x / np.sum(gamma)) for x in gamma] - ndpi = [numberOfParticles * ithPart(g, dp, dpg, sg) for g, dpg, sg in zip(gamma, geoMean, geoStdDev)] - ndp = np.sum(ndpi, axis=0) - elif len(gamma) == len(geoStdDev) == len(geoMean): - # gamma is fully specified for each mode - gamma = [float(x / np.sum(gamma)) for x in gamma] - ndpi = [numberOfParticles * ithPart(g, dp, dpg, sg) for g, dpg, sg in zip(gamma, geoMean, geoStdDev)] - ndp = np.sum(ndpi, axis=0) - else: - # user fucked up - warnings.warn("Not enough parameters to fully specify each mode.") - return None - else: - # unimodal - decomposeMultimodal = False - ndp = numberOfParticles * ithPart(1, dp, geoMean, geoStdDev) - if ndp[-1] > np.max(ndp) / 100 or ndp[0] > np.max(ndp) / 100: - warnings.warn( - "Warning: distribution may not be compact on the specified interval. Consider using a higher upper bound.") - Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio = Mie_SD(m, wavelength, dp, ndp, SMPS=False) - if returnDistribution: - if decomposeMultimodal: - if asDict == True: - return dict(Bext=Bext, Bsca=Bsca, Babs=Babs, bigG=bigG, Bpr=Bpr, Bback=Bback, - Bratio=Bratio), dp, ndp, ndpi - else: - return Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio, dp, ndp, ndpi - else: - if asDict == True: - return dict(Bext=Bext, Bsca=Bsca, Babs=Babs, bigG=bigG, Bpr=Bpr, Bback=Bback, Bratio=Bratio), dp, ndp - else: - return Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio, dp, ndp - else: - if asDict == True: - return dict(Bext=Bext, Bsca=Bsca, Babs=Babs, bigG=bigG, Bpr=Bpr, Bback=Bback, Bratio=Bratio) - else: - return Bext, Bsca, Babs, bigG, Bpr, Bback, Bratio diff --git a/AeroViz/process/method/__init__.py b/AeroViz/process/method/__init__.py deleted file mode 100644 index 9431cbd..0000000 --- a/AeroViz/process/method/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .mie_theory import Mie_Q, Mie_MEE, Mie_PESD, internal, external, core_shell, sensitivity -from .prop import properties diff --git a/AeroViz/process/method/mie_theory.py b/AeroViz/process/method/mie_theory.py deleted file mode 100644 index dd6722c..0000000 --- a/AeroViz/process/method/mie_theory.py +++ /dev/null @@ -1,258 +0,0 @@ -from typing import Sequence, Literal - -import numpy as np -import pandas as pd -from .PyMieScatt_update import AutoMieQ -from numpy import exp, log, log10, sqrt, pi - - -def Mie_Q(m: complex, - wavelength: float, - dp: float | Sequence[float] - ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - """ - Calculate Mie scattering efficiency (Q) for given spherical particle diameter(s). - - Parameters - ---------- - m : complex - The complex refractive index of the particles. - wavelength : float - The wavelength of the incident light (in nm). - dp : float | Sequence[float] - Particle diameters (in nm), can be a single value or Sequence object. - - Returns - ------- - Q_ext : ndarray - The Mie extinction efficiency for each particle diameter. - Q_sca : ndarray - The Mie scattering efficiency for each particle diameter. - Q_abs : ndarray - The Mie absorption efficiency for each particle diameter. - - Examples - -------- - >>> Q_ext, Q_sca, Q_abs = Mie_Q(m=complex(1.5, 0.02), wavelength=550, dp=[100, 200, 300, 400]) - """ - # Ensure dp is a numpy array - dp = np.atleast_1d(dp) - - # Transpose for proper unpacking - Q_ext, Q_sca, Q_abs, g, Q_pr, Q_back, Q_ratio = np.array([AutoMieQ(m, wavelength, _dp) for _dp in dp]).T - - return Q_ext, Q_sca, Q_abs - - -def Mie_MEE(m: complex, - wavelength: float, - dp: float | Sequence[float], - density: float - ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - """ - Calculate mass extinction efficiency and other parameters. - - Parameters - ---------- - m : complex - The complex refractive index of the particles. - wavelength : float - The wavelength of the incident light. - dp : float | Sequence[float] - List of particle sizes or a single value. - density : float - The density of particles. - - Returns - ------- - MEE : ndarray - The mass extinction efficiency for each particle diameter. - MSE : ndarray - The mass scattering efficiency for each particle diameter. - MAE : ndarray - The mass absorption efficiency for each particle diameter. - - Examples - -------- - >>> MEE, MSE, MAE = Mie_MEE(m=complex(1.5, 0.02), wavelength=550, dp=[100, 200, 300, 400], density=1.2) - """ - Q_ext, Q_sca, Q_abs = Mie_Q(m, wavelength, dp) - - MEE = (3 * Q_ext) / (2 * density * dp) * 1000 - MSE = (3 * Q_sca) / (2 * density * dp) * 1000 - MAE = (3 * Q_abs) / (2 * density * dp) * 1000 - - return MEE, MSE, MAE - - -def Mie_PESD(m: complex, - wavelength: float = 550, - dp: float | Sequence[float] = None, - ndp: float | Sequence[float] = None, - lognormal: bool = False, - dp_range: tuple = (1, 2500), - geoMean: float = 200, - geoStdDev: float = 2, - numberOfParticles: float = 1e6, - numberOfBins: int = 167, - ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - """ - Simultaneously calculate "extinction distribution" and "integrated results" using the Mie_Q method. - - Parameters - ---------- - m : complex - The complex refractive index of the particles. - wavelength : float - The wavelength of the incident light. - dp : float | Sequence[float] - Particle sizes. - ndp : float | Sequence[float] - Number concentration from SMPS or APS in the units of dN/dlogdp. - lognormal : bool, optional - Whether to use lognormal distribution for ndp. Default is False. - dp_range : tuple, optional - Range of particle sizes. Default is (1, 2500) nm. - geoMean : float, optional - Geometric mean of the particle size distribution. Default is 200 nm. - geoStdDev : float, optional - Geometric standard deviation of the particle size distribution. Default is 2. - numberOfParticles : float, optional - Number of particles. Default is 1e6. - numberOfBins : int, optional - Number of bins for the lognormal distribution. Default is 167. - - Returns - ------- - ext_dist : ndarray - The extinction distribution for the given data. - sca_dist : ndarray - The scattering distribution for the given data. - abs_dist : ndarray - The absorption distribution for the given data. - - Notes - ----- - return in "dext/dlogdp", please make sure input the dNdlogdp data. - - Examples - -------- - >>> Ext, Sca, Abs = Mie_PESD(m=complex(1.5, 0.02), wavelength=550, dp=[100, 200, 500, 1000], ndp=[100, 50, 30, 20]) - """ - if lognormal: - dp = np.logspace(log10(dp_range[0]), log10(dp_range[1]), numberOfBins) - - ndp = numberOfParticles * (1 / (log(geoStdDev) * sqrt(2 * pi)) * - exp(-(log(dp) - log(geoMean)) ** 2 / (2 * log(geoStdDev) ** 2))) - - # dN / dlogdp - ndp = np.atleast_1d(ndp) - - Q_ext, Q_sca, Q_abs = Mie_Q(m, wavelength, dp) - - # The 1e-6 here is so that the final value is the same as the unit 1/10^6m. - Ext = Q_ext * (pi / 4 * dp ** 2) * ndp * 1e-6 - Sca = Q_sca * (pi / 4 * dp ** 2) * ndp * 1e-6 - Abs = Q_abs * (pi / 4 * dp ** 2) * ndp * 1e-6 - - return Ext, Sca, Abs - - -def internal(dist: pd.Series, - dp: float | Sequence[float], - wavelength: float = 550, - result_type: Literal['extinction', 'scattering', 'absorption'] = 'extinction' - ) -> np.ndarray: - """ - Calculate the extinction distribution by internal mixing model. - - Parameters - ---------- - dist : pd.Series - Particle size distribution data. - dp : float | Sequence[float] - Diameter(s) of the particles, either a single value or a sequence. - wavelength : float, optional - Wavelength of the incident light, default is 550 nm. - result_type : {'extinction', 'scattering', 'absorption'}, optional - Type of result to calculate, defaults to 'extinction'. - - Returns - ------- - np.ndarray - Extinction distribution calculated based on the internal mixing model. - """ - ext_dist, sca_dist, abs_dist = Mie_PESD(m=complex(dist['n_amb'], dist['k_amb']), - wavelength=wavelength, - dp=dp, - ndp=np.array(dist[:np.size(dp)])) - - if result_type == 'extinction': - return ext_dist - elif result_type == 'scattering': - return sca_dist - else: - return abs_dist - - # return dict(ext=ext_dist, sca=sca_dist, abs=abs_dist) - - -def external(dist: pd.Series, - dp: float | Sequence[float], - wavelength: float = 550, - result_type: Literal['extinction', 'scattering', 'absorption'] = 'extinction' - ) -> np.ndarray: - """ - Calculate the extinction distribution by external mixing model. - - Parameters - ---------- - dist : pd.Series - Particle size distribution data. - dp : float | Sequence[float] - Diameter(s) of the particles, either a single value or a sequence. - wavelength : float, optional - Wavelength of the incident light, default is 550 nm. - result_type : {'extinction', 'scattering', 'absorption'}, optional - Type of result to calculate, defaults to 'extinction'. - - Returns - ------- - np.ndarray - Extinction distribution calculated based on the external mixing model. - """ - refractive_dic = {'AS_volume_ratio': complex(1.53, 0.00), - 'AN_volume_ratio': complex(1.55, 0.00), - 'OM_volume_ratio': complex(1.54, 0.00), - 'Soil_volume_ratio': complex(1.56, 0.01), - 'SS_volume_ratio': complex(1.54, 0.00), - 'EC_volume_ratio': complex(1.80, 0.54), - 'ALWC_volume_ratio': complex(1.33, 0.00)} - - ndp = np.array(dist[:np.size(dp)]) - mie_results = ( - Mie_PESD(refractive_dic[_specie], wavelength, dp, dist[_specie] / (1 + dist['ALWC_volume_ratio']) * ndp) for - _specie in refractive_dic) - - ext_dist, sca_dist, abs_dist = (np.sum([res[0] for res in mie_results], axis=0), - np.sum([res[1] for res in mie_results], axis=0), - np.sum([res[2] for res in mie_results], axis=0)) - - if result_type == 'extinction': - return ext_dist - elif result_type == 'scattering': - return sca_dist - else: - return abs_dist - - -def core_shell(): - pass - - -def sensitivity(): - pass - - -if __name__ == '__main__': - result = Mie_Q(m=complex(1.5, 0.02), wavelength=550, dp=[100., 200.]) diff --git a/AeroViz/process/method/prop.py b/AeroViz/process/method/prop.py deleted file mode 100644 index 1b30c4b..0000000 --- a/AeroViz/process/method/prop.py +++ /dev/null @@ -1,62 +0,0 @@ -import numpy as np -from numpy import exp, log -from scipy.signal import find_peaks - - -def geometric(dp: np.ndarray, - dist: np.ndarray - ) -> tuple[float, float]: - """ Calculate the geometric mean and standard deviation. """ - - _gmd = (((dist * log(dp)).sum()) / dist.sum()) - - logdp_mesh, gmd_mesh = np.meshgrid(log(dp), _gmd) - _gsd = ((((logdp_mesh - gmd_mesh) ** 2) * dist).sum() / dist.sum()) ** .5 - - return exp(_gmd), exp(_gsd) - - -def contribution(dp: np.ndarray, - dist: np.ndarray - ) -> tuple[float, float, float]: - """ Calculate the relative contribution of each mode. """ - - ultra = dist[(dp >= 11.8) & (dp < 100)].sum() / dist.sum() - accum = dist[(dp >= 100) & (dp < 1000)].sum() / dist.sum() - coars = dist[(dp >= 1000) & (dp < 2500)].sum() / dist.sum() - - return ultra, accum, coars - - -def mode(dp: np.ndarray, - dist: np.ndarray - ) -> np.ndarray: - """ Find three peak mode in distribution. """ - - min_value = np.array([dist.min()]) - mode, _ = find_peaks(np.concatenate([min_value, dist, min_value]), distance=len(dist) - 1) - - return dp[mode - 1] - - -def properties(dist, - dp: np.ndarray, - dlogdp: np.ndarray, - weighting: str - ) -> dict: - """ for apply """ - dist = np.array(dist) - - gmd, gsd = geometric(dp, dist) - ultra, accum, coarse = contribution(dp, dist) - peak = mode(dp, dist) - - return {key: round(value, 3) for key, value in - {f'total_{weighting}': (dist * dlogdp).sum(), - f'GMD_{weighting}': gmd, - f'GSD_{weighting}': gsd, - f'mode_{weighting}': peak[0], - f'ultra_{weighting}': ultra, - f'accum_{weighting}': accum, - f'coarse_{weighting}': coarse} - .items()} diff --git a/AeroViz/process/script/AbstractDistCalc.py b/AeroViz/process/script/AbstractDistCalc.py deleted file mode 100644 index 49a3d8f..0000000 --- a/AeroViz/process/script/AbstractDistCalc.py +++ /dev/null @@ -1,143 +0,0 @@ -from abc import ABC, abstractmethod -from functools import partial -from typing import Literal - -import numpy as np -from pandas import DataFrame, concat - -from AeroViz.process.core.SizeDist import SizeDist -from AeroViz.process.method import properties, internal, external, core_shell, sensitivity - - -class AbstractDistCalc(ABC): - @abstractmethod - def useApply(self) -> DataFrame: - pass - - -class NumberDistCalc(AbstractDistCalc): - def __init__(self, psd: SizeDist): - self.psd = psd - - def useApply(self) -> DataFrame: - """ Calculate number distribution """ - return self.psd.data - - -class SurfaceDistCalc(AbstractDistCalc): - def __init__(self, psd: SizeDist): - self.psd = psd - - def useApply(self) -> DataFrame: - """ Calculate surface distribution """ - return self.psd.data.dropna().apply(lambda col: np.pi * self.psd.dp ** 2 * np.array(col), - axis=1, result_type='broadcast').reindex(self.psd.index) - - -class VolumeDistCalc(AbstractDistCalc): - def __init__(self, psd: SizeDist): - self.psd = psd - - def useApply(self) -> DataFrame: - """ Calculate volume distribution """ - return self.psd.data.dropna().apply(lambda col: np.pi / 6 * self.psd.dp ** 3 * np.array(col), - axis=1, result_type='broadcast').reindex(self.psd.index) - - -class PropertiesDistCalc(AbstractDistCalc): - def __init__(self, psd: SizeDist): - self.psd = psd - - def useApply(self) -> DataFrame: - """ Calculate properties of distribution """ - return self.psd.data.dropna().apply(partial(properties, dp=self.psd.dp, dlogdp=self.psd.dlogdp, - weighting=self.psd.weighting), - axis=1, result_type='expand').reindex(self.psd.index) - - -class ExtinctionDistCalc(AbstractDistCalc): - mapping = {'internal': internal, - 'external': external, - 'core_shell': core_shell, - 'sensitivity': sensitivity} - - def __init__(self, - psd: SizeDist, - RI: DataFrame, - method: Literal['internal', 'external', 'utils-shell', 'sensitivity'], - result_type: Literal['extinction', 'scattering', 'absorption'] = 'extinction' - ): - self.psd = psd - self.RI = RI - if method not in ExtinctionDistCalc.mapping: - raise ValueError(f"Invalid method: {method}. Valid methods are: {list(ExtinctionDistCalc.mapping.keys())}") - self.method = ExtinctionDistCalc.mapping[method] - self.result_type = result_type - - def useApply(self) -> DataFrame: - """ Calculate volume distribution """ - combined_data = concat([self.psd.data, self.RI], axis=1).dropna() - return combined_data.apply(partial(self.method, dp=self.psd.dp, result_type=self.result_type), - axis=1, result_type='expand').reindex(self.psd.index).set_axis(self.psd.dp, axis=1) - - -# TODO: -class LungDepositsDistCalc(AbstractDistCalc): - - def __init__(self, psd: SizeDist, lung_curve): - self.psd = psd - self.lung_curve = lung_curve - - def useApply(self) -> DataFrame: - pass - - -class DistributionCalculator: # 策略模式 (Strategy Pattern) - """ Interface for distribution calculator """ - - mapping = {'number': NumberDistCalc, - 'surface': SurfaceDistCalc, - 'volume': VolumeDistCalc, - 'property': PropertiesDistCalc, - 'extinction': ExtinctionDistCalc, - 'lung_deposit': LungDepositsDistCalc} - - def __init__(self, - calculator: Literal['number', 'surface', 'volume', 'property', 'extinction'], - psd: SizeDist, - RI: DataFrame = None, - method: str = None, - result_type: str = None - ): - """ - Initialize the DistributionCalculator. - - Parameters: - calculator (CalculatorType): The type of calculator. - psd (SizeDist): The particle size distribution data. - RI (Optional[DataFrame]): The refractive index data. Default is None. - method (Optional[str]): The method to use. Default is None. - result_type (Optional[str]): The result type. Default is None. - """ - if calculator not in DistributionCalculator.mapping.keys(): - raise ValueError( - f"Invalid calculator: {calculator}. Valid calculators are: {list(DistributionCalculator.mapping.keys())}") - self.calculator = DistributionCalculator.mapping[calculator] - self.psd = psd - self.RI = RI - self.method = method - self.result_type = result_type - - def useApply(self) -> DataFrame: - """ - Apply the calculator to the data. - - Returns: - DataFrame: The calculated data. - """ - if self.RI is not None: - return self.calculator(self.psd, self.RI, self.method, self.result_type).useApply() - elif issubclass(self.calculator, (NumberDistCalc, SurfaceDistCalc, VolumeDistCalc, PropertiesDistCalc)): - return self.calculator(self.psd).useApply() - else: - raise ValueError("RI parameter is required for this calculator type") diff --git a/AeroViz/process/script/Chemical.py b/AeroViz/process/script/Chemical.py deleted file mode 100644 index 7ffa99e..0000000 --- a/AeroViz/process/script/Chemical.py +++ /dev/null @@ -1,176 +0,0 @@ -from pathlib import Path - -import numpy as np -from pandas import read_csv, concat, notna, DataFrame - -from AeroViz.process.core import DataProc -from AeroViz.tools.datareader import DataReader - - -class ChemicalProc(DataProc): - """ - A class for process chemical data. - - Parameters: - ----------- - reset : bool, optional - If True, resets the process. Default is False. - filename : str, optional - The name of the file to process. Default is None. - - Methods: - -------- - mass(_df): - Calculate mass-related parameters. - - volume(_df): - Calculate volume-related parameters. - - volume_average_mixing(_df): - Calculate volume average mixing parameters. - - process_data(): - Process data and save the result. - - Attributes: - ----------- - DEFAULT_PATH : Path - The default path for data files. - - Examples: - --------- - - """ - - def __init__(self, file_paths: list[Path | str] = None): - super().__init__() - self.file_paths = [Path(fp) for fp in file_paths] - - @staticmethod - def mass(_df): # Series like - Ammonium, Sulfate, Nitrate, OC, Soil, SS, EC, PM25 = _df - status = (Ammonium / 18) / (2 * (Sulfate / 96) + (Nitrate / 62)) - - if status >= 1: - _df['NH4_status'] = 'Enough' - _df['AS'] = 1.375 * Sulfate - _df['AN'] = 1.29 * Nitrate - - if status < 1: - _df['NH4_status'] = 'Deficiency' - mol_A = Ammonium / 18 - mol_S = Sulfate / 96 - mol_N = Nitrate / 62 - residual = mol_A - 2 * mol_S - - if residual > 0: - _df['AS'] = 1.375 * Sulfate - _df['AN'] = residual * 80 if residual <= mol_N else mol_N * 80 - - else: - _df['AS'] = mol_A / 2 * 132 if mol_A <= 2 * mol_S else mol_S * 132 - _df['AN'] = 0 - - _df['OM'] = 1.8 * OC - _df['Soil'] = 28.57 * Soil - _df['SS'] = 2.54 * SS - _df['EC'] = EC - _df['SIA'] = _df['AS'] + _df['AN'] - _df['total_mass'] = _df[['AS', 'AN', 'OM', 'Soil', 'SS', 'EC']].sum() - species_lst = ['AS', 'AN', 'OM', 'Soil', 'SS', 'EC', 'SIA', 'unknown_mass'] - - _df['unknown_mass'] = PM25 - _df['total_mass'] if PM25 >= _df['total_mass'] else 0 - for _species, _val in _df[species_lst].items(): - _df[f'{_species}_ratio'] = _val / PM25 if PM25 >= _df['total_mass'] else _val / _df['total_mass'] - - return _df['NH4_status':] - - @staticmethod - def volume(_df): - _df['AS_volume'] = (_df['AS'] / 1.76) - _df['AN_volume'] = (_df['AN'] / 1.73) - _df['OM_volume'] = (_df['OM'] / 1.4) - _df['Soil_volume'] = (_df['Soil'] / 2.6) - _df['SS_volume'] = (_df['SS'] / 2.16) - _df['EC_volume'] = (_df['EC'] / 1.5) - _df['ALWC_volume'] = _df['ALWC'] - _df['total_volume'] = sum(_df['AS_volume':'EC_volume']) - - for _species, _val in _df['AS_volume':'ALWC_volume'].items(): - _df[f'{_species}_ratio'] = _val / _df['total_volume'] - - _df['density'] = _df['total_mass'] / _df['total_volume'] - return _df['AS_volume':] - - @staticmethod - def volume_average_mixing(_df): - _df['n_dry'] = (1.53 * _df['AS_volume_ratio'] + - 1.55 * _df['AN_volume_ratio'] + - 1.55 * _df['OM_volume_ratio'] + - 1.56 * _df['Soil_volume_ratio'] + - 1.54 * _df['SS_volume_ratio'] + - 1.80 * _df['EC_volume_ratio']) - - _df['k_dry'] = (0.00 * _df['OM_volume_ratio'] + - 0.01 * _df['Soil_volume_ratio'] + - 0.54 * _df["EC_volume_ratio"]) - - # 檢查_df['ALWC']是否缺失 -> 有值才計算ambient的折射率 - if notna(_df['ALWC']): - v_dry = _df['total_volume'] - v_wet = _df['total_volume'] + _df['ALWC'] - - multiplier = v_dry / v_wet - _df['ALWC_volume_ratio'] = (1 - multiplier) - - _df['n_amb'] = (1.53 * _df['AS_volume_ratio'] + - 1.55 * _df['AN_volume_ratio'] + - 1.55 * _df['OM_volume_ratio'] + - 1.56 * _df['Soil_volume_ratio'] + - 1.54 * _df['SS_volume_ratio'] + - 1.80 * _df['EC_volume_ratio']) * multiplier + \ - (1.33 * _df['ALWC_volume_ratio']) - - _df['k_amb'] = (0.00 * _df['OM_volume_ratio'] + - 0.01 * _df['Soil_volume_ratio'] + - 0.54 * _df['EC_volume_ratio']) * multiplier - - _df['gRH'] = (v_wet / v_dry) ** (1 / 3) - - return _df[['n_dry', 'k_dry', 'n_amb', 'k_amb', 'gRH']] - - @staticmethod - def kappa(_df, diameter=0.5): - surface_tension, Mw, density, universal_gas_constant = 0.072, 18, 1, 8.314 # J/mole*K - - A = 4 * (surface_tension * Mw) / (density * universal_gas_constant * (_df['AT'] + 273)) - power = A / diameter - a_w = (_df['RH'] / 100) * (np.exp(-power)) - - _df['kappa_chem'] = (_df['gRH'] ** 3 - 1) * (1 - a_w) / a_w - _df['kappa_vam'] = np.nan - - @staticmethod - def ISORROPIA(): - pass - - def process_data(self, reset: bool = False, save_file: Path | str = None) -> DataFrame: - save_file = Path(save_file) - if save_file.exists() and not reset: - return read_csv(save_file, parse_dates=['Time'], index_col='Time') - else: - df = concat([DataReader(file) for file in self.file_paths], axis=1) - - df_mass = df[['NH4+', 'SO42-', 'NO3-', 'O_OC', 'Fe', 'Na+', 'O_EC', 'PM25']].dropna().apply(self.mass, - axis=1) - df_mass['ALWC'] = df['ALWC'] - df_volume = df_mass[['AS', 'AN', 'OM', 'Soil', 'SS', 'EC', 'total_mass', 'ALWC']].dropna().apply( - self.volume, - axis=1) - df_volume['ALWC'] = df['ALWC'] - df_vam = df_volume.dropna().apply(self.volume_average_mixing, axis=1) - - _df = concat([df_mass, df_volume.drop(['ALWC'], axis=1), df_vam], axis=1).reindex(df.index.copy()) - _df.to_csv(save_file) - - return _df diff --git a/AeroViz/process/script/IMPACT.py b/AeroViz/process/script/IMPACT.py deleted file mode 100644 index caa1df9..0000000 --- a/AeroViz/process/script/IMPACT.py +++ /dev/null @@ -1,49 +0,0 @@ -from pathlib import Path - -from pandas import DataFrame, read_csv, concat - -from AeroViz.process.core import DataProc -from AeroViz.tools.datareader import DataReader - - -class ImpactProc(DataProc): - """ - A class for processing impact data. - - Parameters: - ----------- - reset : bool, optional - If True, resets the processing. Default is False. - save_filename : str or Path, optional - The name or path to save the processed data. Default is 'IMPACT.csv'. - - Methods: - -------- - process_data(reset: bool = False, save_filename: str | Path = 'IMPACT.csv') -> DataFrame: - Process data and save the result. - - save_data(data: DataFrame, save_filename: str | Path): - Save processed data to a file. - - Attributes: - ----------- - DEFAULT_PATH : Path - The default path for data files. - - Examples: - --------- - >>> df_custom = ImpactProc().process_data(reset=True, save_filename='custom_file.csv') - """ - - def __init__(self, file_paths: list[Path | str] = None): - super().__init__() - self.file_paths = [Path(fp) for fp in file_paths] - - def process_data(self, reset: bool = False, save_file: Path | str = None) -> DataFrame: - save_file = Path(save_file) - if save_file.exists() and not reset: - return read_csv(save_file, parse_dates=['Time'], index_col='Time') - else: - _df = concat([DataReader(file) for file in self.file_paths], axis=1) - _df.to_csv(save_file) - return _df diff --git a/AeroViz/process/script/IMPROVE.py b/AeroViz/process/script/IMPROVE.py deleted file mode 100644 index fe875b5..0000000 --- a/AeroViz/process/script/IMPROVE.py +++ /dev/null @@ -1,161 +0,0 @@ -from typing import Literal - -from pathlib import Path -from pandas import read_csv, concat, read_json - -from AeroViz.process.core import DataProc -from AeroViz.tools.datareader import DataReader - - -class ImproveProc(DataProc): - """ - A class for process improved chemical data. - - Parameters: - ----------- - reset : bool, optional - If True, resets the process. Default is False. - filename : str, optional - The name of the file to process. Default is None. - version : str, optional - The version of the data process. Should be one of 'revised' or 'modified'. - Default is None. - - Methods: - -------- - revised(_df): - Calculate revised version of particle contribution. - - modified(_df): - Calculate modified version of particle contribution. - - gas(_df): - Calculate gas contribution. - - frh(_RH, version=None): - Helper function to get frh values based on relative humidity (RH) and version. - - process_data(): - Process data and save the result. - - Attributes: - ----------- - DEFAULT_PATH : Path - The default path for data files. - - Examples: - --------- - >>> df = ImproveProc(reset=True, filename='revised_IMPROVE.csv', version='revised').process_data() - - """ - - def __init__(self, file_paths: list[Path | str] = None): - super().__init__() - self.file_paths = [Path(fp) for fp in file_paths] - - @staticmethod - def frh(_RH): - _frh = read_json(Path(__file__).parent.parent.parent / 'plot' / 'utils' / 'fRH.json') - if _RH is not None: - if _RH > 95: - _RH = 95 - _RH = round(_RH) - return _frh.loc[_RH].values.T - - return 1, 1, 1, 1 - - def revised(self, _df): - def mode(Mass): - L_mode = Mass ** 2 / 20 if Mass < 20 else Mass - S_mode = Mass - L_mode if Mass < 20 else 0 - - return L_mode, S_mode - - _frh, _frhss, _frhs, _frhl = self.frh(_df['RH']) - - L_AS, S_AS = mode(_df['AS']) - L_AN, S_AN = mode(_df['AN']) - L_OM, S_OM = mode(_df['OM']) - - _df['AS_ext_dry'] = 2.2 * 1 * S_AS + 4.8 * 1 * L_AS - _df['AN_ext_dry'] = 2.4 * 1 * S_AN + 5.1 * 1 * L_AN - _df['OM_ext_dry'] = 2.8 * S_OM + 6.1 * L_OM - _df['Soil_ext_dry'] = 1 * _df['Soil'] - _df['SS_ext_dry'] = 1.7 * 1 * _df['SS'] - _df['EC_ext_dry'] = 10 * _df['EC'] - _df['total_ext_dry'] = sum(_df['AS_ext_dry':'EC_ext_dry']) - - _df['AS_ext'] = (2.2 * _frhs * S_AS) + (4.8 * _frhl * L_AS) - _df['AN_ext'] = (2.4 * _frhs * S_AN) + (5.1 * _frhl * L_AN) - _df['OM_ext'] = (2.8 * S_OM) + (6.1 * L_OM) - _df['Soil_ext'] = (1 * _df['Soil']) - _df['SS_ext'] = (1.7 * _frhss * _df['SS']) - _df['EC_ext'] = (10 * _df['EC']) - _df['total_ext'] = sum(_df['AS_ext':'EC_ext']) - - _df['ALWC_AS_ext'] = _df['AS_ext'] - _df['AS_ext_dry'] - _df['ALWC_AN_ext'] = _df['AN_ext'] - _df['AN_ext_dry'] - _df['ALWC_SS_ext'] = _df['SS_ext'] - _df['SS_ext_dry'] - _df['ALWC_ext'] = _df['total_ext'] - _df['total_ext_dry'] - - _df['fRH_IMPR'] = _df['total_ext'] / _df['total_ext_dry'] - - return _df['AS_ext_dry':] - - def modified(self, _df): - _frh, _frhss, _frhs, _frhl = self.frh(_df['RH']) - - _df['AS_ext_dry'] = 3 * 1 * _df['AS'] - _df['AN_ext_dry'] = 3 * 1 * _df['AN'] - _df['OM_ext_dry'] = 4 * _df['OM'] - _df['Soil_ext_dry'] = 1 * _df['Soil'] - _df['SS_ext_dry'] = 1.7 * 1 * _df['SS'] - _df['EC_ext_dry'] = 10 * _df['EC'] - _df['total_ext_dry'] = sum(_df['AS_ext_dry':'EC_ext_dry']) - - _df['AS_ext'] = (3 * _frh * _df['AS']) - _df['AN_ext'] = (3 * _frh * _df['AN']) - _df['OM_ext'] = (4 * _df['OM']) - _df['Soil_ext'] = (1 * _df['Soil']) - _df['SS_ext'] = (1.7 * _frhss * _df['SS']) - _df['EC_ext'] = (10 * _df['EC']) - _df['total_ext'] = sum(_df['AS_ext':'EC_ext']) - - _df['ALWC_AS_ext'] = _df['AS_ext'] - _df['AS_ext_dry'] - _df['ALWC_AN_ext'] = _df['AN_ext'] - _df['AN_ext_dry'] - _df['ALWC_SS_ext'] = _df['SS_ext'] - _df['SS_ext_dry'] - _df['ALWC_ext'] = _df['total_ext'] - _df['total_ext_dry'] - - _df['fRH_IMPR'] = _df['total_ext'] / _df['total_ext_dry'] - - return _df['AS_ext_dry':] - - @staticmethod - def gas(_df): - _df['ScatteringByGas'] = (11.4 * 293 / (273 + _df['AT'])) - _df['AbsorptionByGas'] = (0.33 * _df['NO2']) - _df['ExtinctionByGas'] = _df['ScatteringByGas'] + _df['AbsorptionByGas'] - return _df['ScatteringByGas':] - - def process_data(self, reset: bool = False, save_file: Path | str = None, - version: Literal["revised", "modified"] = "revised"): - save_file = Path(save_file) - if save_file.exists() and not reset: - return read_csv(save_file, parse_dates=['Time'], index_col='Time') - else: - # data_files = ['EPB.csv', 'IMPACT.csv', 'chemical.csv'] - df = concat([DataReader(file) for file in self.file_paths], axis=1) - - # particle contribution '銨不足不納入計算' - improve_input_df = df.loc[df['NH4_status'] != 'Deficiency', ['AS', 'AN', 'OM', 'Soil', 'SS', 'EC', 'RH']] - - df_improve = improve_input_df.dropna().copy().apply(self.revised if version == 'revised' else self.modified, - axis=1) - - # gas contribution - df_ext_gas = df[['NO2', 'AT']].dropna().copy().apply(self.gas, axis=1) - - _df = concat([df_improve, df_ext_gas], axis=1).reindex(df.index.copy()) - _df.to_csv(save_file) - - return _df diff --git a/AeroViz/process/script/Others.py b/AeroViz/process/script/Others.py deleted file mode 100644 index 15c1b06..0000000 --- a/AeroViz/process/script/Others.py +++ /dev/null @@ -1,65 +0,0 @@ -from pathlib import Path - -import numpy as np -from pandas import read_csv, concat, DataFrame - -from AeroViz.process.core import DataProc -from AeroViz.tools.datareader import DataReader - - -class OthersProc(DataProc): - """ - A class for process impact data. - - Parameters: - ----------- - reset : bool, optional - If True, resets the process. Default is False. - filename : str, optional - The name of the file to process. Default is None. - - Methods: - -------- - process_data(): - Process data and save the result. - - Attributes: - ----------- - DEFAULT_PATH : Path - The default path for data files. - - Examples: - --------- - >>> df = OthersProc().process_data(reset=True, filename=None) - - """ - - def __init__(self, file_paths: Path | list[Path | str] = None): - super().__init__() - self.file_paths = [Path(fp) for fp in file_paths] - - def process_data(self, reset: bool = False, save_file: Path | str = None) -> DataFrame: - save_file = Path(save_file) - if save_file.exists() and not reset: - return read_csv(save_file, parse_dates=['Time'], index_col='Time') - else: - df = concat([DataReader(file) for file in self.file_paths], axis=1) - - results = DataFrame(index=df.index) - - results['PG'] = df[ - ['Scattering', 'Absorption', 'ScatteringByGas', 'AbsorptionByGas']].dropna().copy().apply(np.sum, - axis=1) - results['MAC'] = df['Absorption'] / df['T_EC'] - results['Ox'] = df['NO2'] + df['O3'] - results['N2O5_tracer'] = df['NO2'] * df['O3'] - results['Vis_cal'] = 1096 / df['Extinction'] - # results['fRH_Mix'] = df['Bext'] / df['Extinction'] - # results['fRH_PNSD'] = df['Bext_internal'] / df['Bext_dry'] - results['fRH_IMPR'] = df['total_ext'] / df['total_ext_dry'] - results['OCEC_ratio'] = df['O_OC'] / df['O_EC'] - results['PM1/PM25'] = np.where(df['PM1'] / df['PM25'] < 1, df['PM1'] / df['PM25'], np.nan) - # results['MEE_PNSD'] = df['Bext_internal'] / df['PM25'] - # results['MEE_dry_PNSD'] = df['Bext_dry'] / df['PM25'] - - return results diff --git a/AeroViz/process/script/PSD.py b/AeroViz/process/script/PSD.py deleted file mode 100644 index 95cd683..0000000 --- a/AeroViz/process/script/PSD.py +++ /dev/null @@ -1,103 +0,0 @@ -from pathlib import Path - -from pandas import concat, read_csv, DataFrame - -from AeroViz.process.core import DataProc -from AeroViz.process.core.SizeDist import SizeDist -from AeroViz.process.script.AbstractDistCalc import DistributionCalculator - - -class ParticleSizeDistProc(DataProc): - """ - A class for process particle size distribution (PSD) data. - - Parameters - ---------- - filename : str, optional - The name of the PSD data file. - Defaults to 'PNSD_dNdlogdp.csv' in the default path. - - Attributes - ---------- - file_path : Path - The directory path where the PSD data file is located. - - psd : SizeDist - The SizeDist object. - - Methods - ------- - process_data(filename='PSD.csv') - Process and save overall PSD properties. - - Examples - -------- - Example 1: Use default path and filename - >>> psd_data = ParticleSizeDistProc(filename='PNSD_dNdlogdp.csv').process_data(reset=True) - """ - - def __init__(self, file_path: Path | str = None): - super().__init__() - self.file_path = Path(file_path) - - self.psd = SizeDist(read_csv(file_path, parse_dates=['Time'], index_col='Time')) - - def process_data(self, reset: bool = False, save_file: Path | str = None) -> DataFrame: - save_file = Path(save_file) - if save_file.exists() and not reset: - return read_csv(save_file, parse_dates=['Time'], index_col='Time') - - number = DistributionCalculator('number', self.psd).useApply() - surface = DistributionCalculator('surface', self.psd).useApply() - volume = DistributionCalculator('volume', self.psd).useApply() - - surface.to_csv(save_file.parent / 'PSSD_dSdlogdp.csv') - volume.to_csv(save_file.parent / 'PVSD_dVdlogdp.csv') - - result_df = concat( - [DistributionCalculator('property', SizeDist(data=number, weighting='n')).useApply(), - DistributionCalculator('property', SizeDist(data=surface, weighting='s')).useApply(), - DistributionCalculator('property', SizeDist(data=volume, weighting='v')).useApply() - ], axis=1) - - result_df.to_csv(save_file) - return result_df - - -class ExtinctionDistProc(DataProc): - - def __init__(self, file_path: Path | str = 'PNSD_dNdlogdp.csv', file_path_chem: Path | str = 'chemical.csv'): - super().__init__() - self.file_path = Path(file_path) - self.file_path_chem = Path(file_path_chem) - - self.psd = SizeDist(read_csv(file_path, parse_dates=['Time'], index_col='Time')) - self.RI = read_csv(file_path_chem, parse_dates=['Time'], index_col='Time')[['n_dry', 'n_amb', 'k_dry', 'k_amb', - 'AS_volume_ratio', - 'AN_volume_ratio', - 'OM_volume_ratio', - 'Soil_volume_ratio', - 'SS_volume_ratio', - 'EC_volume_ratio', - 'ALWC_volume_ratio']] - - def process_data(self, reset: bool = False, save_file: Path | str = 'PESD.csv'): - save_file = Path(save_file) - if save_file.exists() and not reset: - return read_csv(save_file, parse_dates=['Time']).set_index('Time') - - ext_internal = DistributionCalculator('extinction', self.psd, self.RI, method='internal', - result_type='extinction').useApply() - ext_external = DistributionCalculator('extinction', self.psd, self.RI, method='external', - result_type='extinction').useApply() - - ext_internal.to_csv(save_file.parent / 'PESD_dextdlogdp_internal.csv') - ext_external.to_csv(save_file.parent / 'PESD_dextdlogdp_external.csv') - - result_df = concat([ - DistributionCalculator('property', SizeDist(data=ext_internal, weighting='ext_in')).useApply(), - DistributionCalculator('property', SizeDist(data=ext_internal, weighting='ext_ex')).useApply(), - ], axis=1) - - result_df.to_csv(save_file) - return result_df diff --git a/AeroViz/process/script/PSD_dry.py b/AeroViz/process/script/PSD_dry.py deleted file mode 100644 index eee4d67..0000000 --- a/AeroViz/process/script/PSD_dry.py +++ /dev/null @@ -1,94 +0,0 @@ -from pathlib import Path - -import numpy as np -from pandas import DataFrame, read_csv, concat - -from AeroViz.process.core import DataProc -from AeroViz.process.core.SizeDist import SizeDist -from AeroViz.tools import DataReader - - -class DryPSDProc(DataProc): - """ - A class for process impact data. - - Parameters - ---------- - reset : bool, optional - If True, resets the process. Default is False. - filename : str, optional - The name of the file to process. Default is None. - - Methods - ------- - process_data(): - Process data and save the result. - - Attributes - ---------- - DEFAULT_PATH : Path - The default path for data files. - - - Examples - -------- - >>> df = DryPSDProc(reset=True, filename='PNSD_dNdlogdp_dry.csv').process_data() - """ - - def __init__(self, file_path: Path | str = 'PNSD_dNdlogdp.csv', file_path_chem: Path | str = 'chemical.csv'): - super().__init__() - self.file_path = Path(file_path) - self.file_path_chem = Path(file_path_chem) - - self.psd = SizeDist(read_csv(file_path, parse_dates=['Time'], index_col='Time')) - self.RI = read_csv(file_path_chem, parse_dates=['Time'], index_col='Time')[['n_dry', 'n_amb', 'k_dry', 'k_amb', - 'AS_volume_ratio', - 'AN_volume_ratio', - 'OM_volume_ratio', - 'Soil_volume_ratio', - 'SS_volume_ratio', - 'EC_volume_ratio', - 'ALWC_volume_ratio']] - - def process_data(self, reset: bool = False, save_filename: Path | str = None) -> DataFrame: - save_filename = Path(save_filename) - if save_filename.exists() and not reset: - return read_csv(save_filename, parse_dates=['Time']).set_index('Time') - _df = concat([self.psd, self.RI], axis=1) - _df.to_csv(save_filename) - return _df - - -def dry_PNSD_process(dist, dp, **kwargs): - ndp = np.array(dist[:np.size(dp)]) - gRH = resolved_gRH(dp, dist['gRH'], uniform=True) - - dry_dp = dp / gRH - belong_which_ibin = np.digitize(dry_dp, dp) - 1 - - result = {} - for i, (ibin, dn) in enumerate(zip(belong_which_ibin, ndp)): - if dp[ibin] not in result: - result[dp[ibin]] = [] - result[dp[ibin]].append(ndp[i]) - - dry_ndp = [] - for key, val in result.items(): - dry_ndp.append(sum(val) / len(val)) - - return np.array(dry_ndp) - - -def resolved_gRH(dp, gRH=1.31, uniform=True): - if uniform: - return np.array([gRH] * dp.size) - - else: - lognorm_dist = lambda x, geoMean, geoStd: (gRH / (np.log10(geoStd) * np.sqrt(2 * np.pi))) * np.exp( - -(x - np.log10(geoMean)) ** 2 / (2 * np.log10(geoStd) ** 2)) - abc = lognorm_dist(np.log10(dp), 200, 2.0) - return np.where(abc < 1, 1, abc) - - -if __name__ == '__main__': - pass diff --git a/AeroViz/process/script/__init__.py b/AeroViz/process/script/__init__.py deleted file mode 100644 index d27f000..0000000 --- a/AeroViz/process/script/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .Chemical import ChemicalProc -from .IMPACT import ImpactProc -from .IMPROVE import ImproveProc -from .Others import OthersProc -from .PSD import ParticleSizeDistProc, ExtinctionDistProc diff --git a/AeroViz/process/script/retrieve_RI.py b/AeroViz/process/script/retrieve_RI.py deleted file mode 100644 index 4646710..0000000 --- a/AeroViz/process/script/retrieve_RI.py +++ /dev/null @@ -1,70 +0,0 @@ -import numpy as np -import pandas as pd -from pandas import DataFrame - -from AeroViz.process.core.SizeDist import SizeDist -from AeroViz.process.method import Mie_PESD - - -def retrieve_RI(_df: DataFrame, - _PNSD: DataFrame, - nMin: float = 1.33, - nMax: float = 1.60, - kMin: float = 0.00, - kMax: float = 0.60, - spaceSize: int = 31, - dlogdp: float = 0.014 - ) -> DataFrame: - nRange = np.linspace(nMin, nMax, num=spaceSize) - kRange = np.linspace(kMin, kMax, spaceSize) - Delta_array = np.zeros((spaceSize, spaceSize)) - # 同一時間除了折射率其餘數據皆相同 因此在折射率的迴圈外 - bext_mea, bsca_mea, babs_mea = _df['Extinction'], _df['Scattering'], _df['Absorption'] - - dp = SizeDist(data=_PNSD).dp - for ki, k in enumerate(kRange): - for ni, n in enumerate(nRange): - m = n + (1j * k) - ndp = np.array(_df[3:]) - - ext_dist, sca_dist, abs_dist = Mie_PESD(m, 550, dp, ndp) - - bext_cal = sum(ext_dist) * dlogdp - bsca_cal = sum(sca_dist) * dlogdp - babs_cal = sum(abs_dist) * dlogdp - - Delta_array[ni][ki] = ((babs_mea - babs_cal) / 18.23) ** 2 + ((bsca_mea - bsca_cal) / 83.67) ** 2 - - min_delta = Delta_array.argmin() - next_n = nRange[(min_delta // spaceSize)] - next_k = kRange[(min_delta % spaceSize)] - - # 將網格變小 - nMin_small = next_n - 0.02 if next_n > 1.33 else 1.33 - nMax_small = next_n + 0.02 - kMin_small = next_k - 0.04 if next_k > 0.04 else 0 - kMax_small = next_k + 0.04 - spaceSize_small = 41 - - nRange_small = np.linspace(nMin_small, nMax_small, spaceSize_small) - kRange_small = np.linspace(kMin_small, kMax_small, spaceSize_small) - Delta_array_small = np.zeros((spaceSize_small, spaceSize_small)) - # 所有數據與大網格一致所以使用上方便數即可 - for ki, k in enumerate(kRange_small): - for ni, n in enumerate(nRange_small): - m = n + (1j * k) - ndp = np.array(_df[3:]) - ext_dist, sca_dist, abs_dist = Mie_PESD(m, 550, dp, ndp) - - bext_cal = sum(ext_dist) * dlogdp - bsca_cal = sum(sca_dist) * dlogdp - babs_cal = sum(abs_dist) * dlogdp - - Delta_array_small[ni][ki] = ((bext_mea - bext_cal) / 18.23) ** 2 + ((bsca_mea - bsca_cal) / 83.67) ** 2 - - min_delta_small = Delta_array_small.argmin() - _df['re_real'] = (nRange_small[(min_delta_small // spaceSize_small)]) - _df['re_imaginary'] = (kRange_small[(min_delta_small % spaceSize_small)]) - - print(f'\t\tReal part:{_df['re_real']}\tIm part:{_df['re_imaginary']}', end='') - return _df['re_real':] diff --git a/AeroViz/rawDataReader/__init__.py b/AeroViz/rawDataReader/__init__.py index cacb003..a2550d8 100644 --- a/AeroViz/rawDataReader/__init__.py +++ b/AeroViz/rawDataReader/__init__.py @@ -1,68 +1,119 @@ from datetime import datetime +from pathlib import Path +from typing import Any +from pandas import Grouper, Timedelta + +from AeroViz.rawDataReader.config.supported_instruments import meta from AeroViz.rawDataReader.script import * -from AeroViz.rawDataReader.utils.config import meta __all__ = ['RawDataReader'] +SUPPORTED_INSTRUMENTS = [ + NEPH, Aurora, SMPS, GRIMM, APS_3321, AE33, AE43, BC1054, + MA350, TEOM, OCEC, IGAC, VOC, EPA, Minion +] + def RawDataReader(instrument_name: str, - _path, - QC: bool = True, - csv_raw: bool = True, - reset: bool = False, - rate: bool = False, - append_data: bool = False, - update_meta=None, - start: datetime | None = None, - end: datetime | None = None, - mean_freq='1h', - csv_out=True, - **kwargs - ): - # Mapping of instrument names to their respective classes - instrument_class_map = { - 'NEPH': NEPH, - 'Aurora': Aurora, - 'Table': Table, - 'EPA_vertical': EPA_vertical, - 'APS_3321': APS_3321, - 'SMPS_TH': SMPS_TH, - 'AE33': AE33, - 'AE43': AE43, - 'BC1054': BC1054, - 'MA350': MA350, - 'TEOM': TEOM, - 'OCEC_RES': OCEC_RES, - 'OCEC_LCRES': OCEC_LCRES, - 'IGAC_TH': IGAC_TH, - 'IGAC_ZM': IGAC_ZM, - 'VOC_TH': VOC_TH, - 'VOC_ZM': VOC_ZM, - 'SMPS_genr': SMPS_genr, - 'SMPS_aim11': SMPS_aim11, - 'GRIMM': GRIMM - # Add other instruments and their corresponding classes here - } - - # Check if the instrument name is in the map - if instrument_name not in meta.keys(): - raise ValueError(f"Instrument name '{instrument_name}' is not valid. \nMust be one of: {list(meta.keys())}") - - # Instantiate the class and return the instance - reader_module = instrument_class_map[instrument_name].Reader( - _path=_path, - QC=QC, - csv_raw=csv_raw, - reset=reset, - rate=rate, - append_data=append_data, - update_meta=update_meta - ) - return reader_module( - start=start, - end=end, - mean_freq=mean_freq, - csv_out=csv_out, - **kwargs - ) + path: Path | str, + reset: bool = False, + qc: bool | str = True, + qc_freq: str | None = None, + rate: bool = True, + append_data: bool = False, + start: datetime = None, + end: datetime = None, + mean_freq: str = '1h', + csv_out: bool = True, + **kwargs: Any): + """ + Factory function to instantiate the appropriate reader module for a given instrument and + return the processed data over the specified time range. + + :param instrument_name: The name of the instrument for which to read data. Must be a valid key in the `meta` dictionary. + :param path: The directory where raw data files for the instrument are stored. + :param reset: If True, reset the state and reprocess the data from scratch. + :param qc: If True, apply quality control (QC) to the raw data. + :param qc_freq: Frequency at which to perform QC. Must be one of 'W', 'M', 'Q', 'Y' for weekly, monthly, quarterly, or yearly. + :param rate: If True, calculate rates from the data. + :param append_data: If True, append new data to the existing dataset instead of overwriting it. + :param start: Start time for filtering the data. If None, no start time filtering will be applied. + :param end: End time for filtering the data. If None, no end time filtering will be applied. + :param mean_freq: Resampling frequency for averaging the data. Example: '1h' for hourly mean. + :param csv_out: If True, output the processed data as a CSV file. + + :return: An instance of the reader module corresponding to the specified instrument, which processes the data and returns it in a usable format. + + :raises ValueError: If the `instrument_name` provided is not a valid key in the `meta` dictionary. + :raises ValueError: If the specified path does not exist or is not a directory. + :raises ValueError: If the QC frequency is invalid. + :raises ValueError: If start and end times are not both provided or are invalid. + :raises ValueError: If the mean_freq is not a valid frequency string. + + :Example: + + To read and process data for the BC1054 instrument: + + >>> from pathlib import Path + >>> from datetime import datetime + >>> + >>> data = RawDataReader( + ... instrument_name='BC1054', + ... path=Path('/path/to/data'), + ... start=datetime(2024, 2, 1), + ... end=datetime(2024, 7, 31, 23)) + """ + # Mapping of instrument names to their respective classes + instrument_class_map = {cls.__name__.split('.')[-1]: cls for cls in SUPPORTED_INSTRUMENTS} + + # Check if the instrument name is in the map + if instrument_name not in meta.keys(): + raise ValueError(f"Instrument name '{instrument_name}' is not valid. \nMust be one of: {list(meta.keys())}") + + # 檢查 path 是否存在且是一個目錄 + if not isinstance(path, Path): + path = Path(path) + if not path.exists() or not path.is_dir(): + raise FileNotFoundError(f"The specified path '{path}' does not exist or is not a directory.") + + # Validate the QC frequency + if qc_freq is not None: + try: + Grouper(freq=qc_freq) + except ValueError as e: + raise ValueError(f"Invalid frequency: {qc_freq}. Error: {str(e)}") + except TypeError as e: + raise ValueError(f"Invalid frequency type: {qc_freq}. Frequency should be a string.") + + if start and end: + if end.hour == 0 and end.minute == 0 and end.second == 0: + end = end.replace(hour=23, minute=59, second=59) + else: + raise ValueError("Both start and end times must be provided.") + if end <= start: + raise ValueError(f"Invalid time range: start {start} is after end {end}") + + # 驗證 mean_freq 的格式是否正確 + try: + Timedelta(mean_freq) + except ValueError: + raise ValueError( + f"Invalid mean_freq: '{mean_freq}'. It should be a valid frequency string (e.g., '1h', '30min', '1D').") + + # Instantiate the class and return the instance + reader_module = instrument_class_map[instrument_name].Reader( + path=path, + reset=reset, + qc=qc, + qc_freq=qc_freq, + rate=rate, + append_data=append_data, + **kwargs + ) + return reader_module( + start=start, + end=end, + mean_freq=mean_freq, + csv_out=csv_out, + ) diff --git a/AeroViz/rawDataReader/config/__init__.py b/AeroViz/rawDataReader/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AeroViz/rawDataReader/config/supported_instruments.py b/AeroViz/rawDataReader/config/supported_instruments.py new file mode 100644 index 0000000..6b2c77e --- /dev/null +++ b/AeroViz/rawDataReader/config/supported_instruments.py @@ -0,0 +1,170 @@ +# Description: Configuration file for rawDataReader + +meta = { + "NEPH": { + "pattern": ["*.dat"], + "freq": "5min", + "deter_key": {"Scatter Coe. (550 nm)": ["G"]}, + }, + + "Aurora": { + "pattern": ["*.csv"], + "freq": "1min", + "deter_key": {"Scatter Coe. (550 nm)": ["G"]}, + }, + + "SMPS": { + "pattern": ["*.txt", "*.csv"], + "freq": "6min", + "deter_key": {"Bins": ["all"]}, + }, + + "GRIMM": { + "pattern": ["*.dat"], + "freq": "6min", + "deter_key": {"Bins": ["all"]}, + }, + + "APS_3321": { + "pattern": ["*.txt"], + "freq": "6min", + "deter_key": {"Bins": ["all"]}, + }, + + "AE33": { + "pattern": ["[!ST|!CT|!FV]*[!log]_AE33*.dat"], + "freq": "1min", + "deter_key": {"BC Mass Conc. (880 nm)": ["BC6"]}, + "error_state": [], + }, + + "AE43": { + "pattern": ["[!ST|!CT|!FV]*[!log]_AE43*.dat"], + "freq": "1min", + "deter_key": {"BC Mass Conc. (880 nm)": ["BC6"]}, + "error_state": [], + }, + + "BC1054": { + "pattern": ["*.csv"], + "freq": "1min", + "deter_key": {"BC Mass Conc. (880 nm)": ["BC9"]}, + "error_state": [1, 2, 4, 8, 16, 32, 65536], + }, + + "MA350": { + "pattern": ["*.csv"], + "freq": "1min", + "deter_key": {"BC Mass Conc. (880 nm)": ["BC5"]}, + }, + + "TEOM": { + "pattern": ["*.csv"], + "freq": "6min", + "deter_key": { + "PM1.0 Mass Conc.": ["PM_Total"], + "PM1.0 NV Mass Conc.": ["PM_NV"], + }, + }, + + "OCEC": { + "pattern": ["*LCRes.csv"], + "freq": "1h", + "deter_key": { + "Thermal OC": ["Thermal_OC"], + "Thermal EC": ["Thermal_EC"], + "Optical OC": ["Optical_OC"], + "Optical EC": ["Optical_EC"], + "Thermal OC & EC": ["Thermal_OC", "Thermal_EC"], + "Optical OC & EC": ["Optical_OC", "Optical_EC"], + }, + }, + + "IGAC": { + "pattern": ["*.csv"], + "freq": "1h", + "deter_key": { + "Na+": ["Na+"], + "NH4+": ["NH4+"], + "K+": ["K+"], + "Mg2+": ["Mg2+"], + "Ca2+": ["Ca2+"], + "Cl-": ["Cl-"], + "NO2-": ["NO2-"], + "NO3-": ["NO3-"], + "SO42-": ["SO42-"], + "Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"], + }, + # https://www.yangyao-env.com/web/product/product_in2.jsp?pd_id=PD1640151884502 + + # HF: 0.08, F-: 0.08, PO43-: None is not measured + "MDL": { + 'HF': None, 'HCl': 0.05, 'HNO2': 0.01, 'HNO3': 0.05, 'G-SO2': 0.05, 'NH3': 0.1, + 'Na+': 0.05, 'NH4+': 0.08, 'K+': 0.08, 'Mg2+': 0.05, 'Ca2+': 0.05, + 'F-': None, 'Cl-': 0.05, 'NO2-': 0.05, 'NO3-': 0.01, 'PO43-': None, 'SO42-': 0.05, + }, + + "MR": { + 'HF': 200, 'HCl': 200, 'HNO2': 200, 'HNO3': 200, 'G-SO2': 200, 'NH3': 300, + 'Na+': 300, 'NH4+': 300, 'K+': 300, 'Mg2+': 300, 'Ca2+': 300, + 'F-': 300, 'Cl-': 300, 'NO2-': 300, 'NO3-': 300, 'PO43-': None, 'SO42-': 300, + } + }, + + "XRF": { + "pattern": ["*.csv"], + "freq": "1h", + "deter_key": { + "Several trace element (Al, Si, Ti, V, Cr, Mn, Fe)": ["Al", "Si", "Ti", "V", "Cr", "Mn", "Fe"], + + }, + # base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time + "MDL": { + 'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2, 'Cl': 1.7, + 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6, 'V': 0.12, 'Cr': 0.12, + 'Mn': 0.14, 'Fe': 0.17, 'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079, + 'Zn': 0.067, 'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081, + 'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28, 'Zr': 0.33, + 'Nb': 0.41, 'Mo': 0.48, 'Pd': 2.2, 'Ag': 1.9, 'Cd': 2.5, + 'In': 3.1, 'Sn': 4.1, 'Sb': 5.2, 'Te': 0.6, 'Cs': 0.37, + 'Ba': 0.39, 'La': 0.36, 'Ce': 0.3, 'W': 0.0001, 'Pt': 0.12, + 'Au': 0.1, 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, 'Bi': 0.13 + } + }, + + "VOC": { + "pattern": ["*.csv"], + "freq": "1h", + "key": [ + 'Benzene', 'Toluene', 'Ethylbenzene', 'm/p-Xylene', 'o-Xylene', 'Ethane', 'Propane', 'Isobutane', + 'n-Butane', 'Isopentane', 'n-Pentane', 'n-Hexane', 'n-Heptane', 'n-Octane', 'n-Nonane', 'n-Decane', + 'n-Undecane', 'n-Dodecane', 'Ethylene', 'Propylene', '1-Butene', 't-2-Butene', 'cis-2-Butene', + '1-Pentene', 't-2-Pentene', 'cis-2-Pentene', '1-Hexene', 'Acetylene', 'Cyclopentane', 'Methylcyclopentane', + 'Cyclohexane', 'Methylcyclohexane', 'Isoprene', '2,2-Dimethylbutane', '2,3-Dimethylbutane', + '2-Methylpentane', '3-Methylpentane', '2,4-Dimethylpentane', '2-Methylhexane', '2,3-Dimethylpentane', + '3-Methylheptane', '2,2,4-Trimethylpentane', '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylhexane', + 'Styrene', 'Isopropylbenzene', 'n-Propylbenzene', 'm-Ethyltoluene', 'p-Ethyltoluene', 'm-Diethylbenzene', + 'p-Diethylbenzene', '1,3,5-Trimethylbenzene', 'o-Ethyltoluene', '1,2,4-Trimethylbenzene', + '1,2,3-Trimethylbenzene', + '1.2-DCB', '1.4-DCB', '1.3-Butadiene', '1-Octene', '2-Ethyltoluene', '3.4-Ethyltoluene', 'Acetaldehyde', + 'Acetone', 'Butyl Acetate', 'Ethanol', 'Ethyl Acetate', 'Hexane', 'IPA', 'Iso-Propylbenzene', + 'PCE', 'Propene', 'TCE', 'VCM', + ], + "deter_key": None, + }, + + "EPA": { + "pattern": ["*.csv"], + "freq": "1h", + "deter_key": {"Items": ["all"]}, + }, + + "Minion": { + "pattern": ["*.csv", "*.xlsx"], + "freq": "1h", + "deter_key": { + "Main Salt (Na+, NH4+, Cl-, NO3-, SO42-)": ["Na+", "NH4+", "Cl-", "NO3-", "SO42-"], + "Several trace element (Al, Ti, V, Cr, Mn, Fe)": ["Al", "Ti", "V", "Cr", "Mn", "Fe"], + }, + }, +} diff --git a/AeroViz/rawDataReader/core/__init__.py b/AeroViz/rawDataReader/core/__init__.py index 37ad6d8..2ad94e7 100644 --- a/AeroViz/rawDataReader/core/__init__.py +++ b/AeroViz/rawDataReader/core/__init__.py @@ -1,400 +1,311 @@ -import json as jsn -import pickle as pkl +import json from abc import ABC, abstractmethod -from datetime import datetime as dtm, timedelta as dtmdt -from itertools import chain +from contextlib import contextmanager +from datetime import datetime from pathlib import Path +from typing import Optional, Generator import numpy as np -from pandas import DataFrame, date_range, concat, to_numeric, to_datetime +import pandas as pd +from pandas import DataFrame, concat, read_pickle, to_numeric +from rich.console import Console +from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn -from ..utils.config import meta +from AeroViz.rawDataReader.config.supported_instruments import meta +from AeroViz.rawDataReader.core.logger import ReaderLogger +from AeroViz.rawDataReader.core.qc import DataQualityControl __all__ = ['AbstractReader'] class AbstractReader(ABC): - nam = 'AbstractReader' + """ + Abstract class for reading raw data from different instruments. Each instrument should have a separate class that + inherits from this class and implements the abstract methods. The abstract methods are `_raw_reader` and `_QC`. + + List the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the + pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set + 'reset=True' + """ + + nam = 'AbstractReader' + + def __init__(self, + path: Path | str, + reset: bool = False, + qc: bool = True, + qc_freq: Optional[str] = None, + rate: bool = True, + append_data: bool = False, + **kwargs): + + self.path = Path(path) + self.meta = meta[self.nam] + self.logger = ReaderLogger(self.nam, self.path) + + self.reset = reset + self.qc = qc + self.qc_freq = qc_freq + self.rate = rate + self.append = append_data and reset + + self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl' + self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv' + self.pkl_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.pkl' + self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv' + self.csv_out = self.path / f'output_{self.nam.lower()}.csv' + + self.size_range = kwargs.get('size_range', (11.8, 593.5)) + + def __call__(self, + start: datetime, + end: datetime, + mean_freq: str = '1h', + csv_out: bool = True, + ) -> DataFrame: + + data = self._run(start, end) + + if data is not None: + if mean_freq: + data = data.resample(mean_freq).mean() + if csv_out: + data.to_csv(self.csv_out) + + return data + + @abstractmethod + def _raw_reader(self, file): + pass + + @abstractmethod + def _QC(self, df: DataFrame) -> DataFrame: + return df + + def _rate_calculate(self, raw_data, qc_data) -> None: + def __base_rate(raw_data, qc_data): + period_size = len(raw_data.resample('1h').mean().index) + + for _nam, _key in self.meta['deter_key'].items(): + _columns_key, _drop_how = (qc_data.keys(), 'all') if _key == ['all'] else (_key, 'any') + + sample_size = len(raw_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index) + qc_size = len(qc_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index) + + # validate rate calculation + if period_size == 0 or sample_size == 0 or qc_size == 0: + self.logger.warning(f'\t\t No data for this period... skip') + continue + if period_size < sample_size: + self.logger.warning(f'\t\tError: Sample({sample_size}) > Period({period_size})... skip') + continue + if sample_size < qc_size: + self.logger.warning(f'\t\tError: QC({qc_size}) > Sample({sample_size})... skip') + continue + + else: + _sample_rate = round((sample_size / period_size) * 100, 1) + _valid_rate = round((qc_size / sample_size) * 100, 1) + _total_rate = round((qc_size / period_size) * 100, 1) + + self.logger.info(f"\t\t{self.logger.CYAN}{self.logger.ARROW} {_nam}{self.logger.RESET}") + self.logger.info( + f"\t\t\t├─ {'Sample Rate':15}: {self.logger.BLUE}{_sample_rate:>6.1f}%{self.logger.RESET}") + self.logger.info( + f"\t\t\t├─ {'Valid Rate':15}: {self.logger.BLUE}{_valid_rate:>6.1f}%{self.logger.RESET}") + self.logger.info( + f"\t\t\t└─ {'Total Rate':15}: {self.logger.BLUE}{_total_rate:>6.1f}%{self.logger.RESET}") + + if self.meta['deter_key'] is not None: + # use qc_freq to calculate each period rate + if self.qc_freq is not None: + raw_data_grouped = raw_data.groupby(pd.Grouper(freq=self.qc_freq)) + qc_data_grouped = qc_data.groupby(pd.Grouper(freq=self.qc_freq)) + + for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped): + self.logger.info( + f"\t{self.logger.BLUE}{self.logger.ARROW} Processing: {_sub_raw_data.index[0].strftime('%F')}" + f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}") + + __base_rate(_sub_raw_data, _sub_qc_data) + + else: + __base_rate(raw_data, qc_data) + + def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None): + """ + Process time index, resample data, extract specified time range, and optionally append new data. + + :param _df: Input DataFrame with time index + :param user_start: Start of user-specified time range (optional) + :param user_end: End of user-specified time range (optional) + :param append_df: DataFrame to append (optional) + :return: Processed DataFrame + """ + # Round timestamps and remove duplicates + _df = _df.groupby(_df.index.round('1min')).first() + + # Determine frequency + freq = _df.index.inferred_freq or self.meta['freq'] + + # Append new data if provided + if append_df is not None: + append_df.index = append_df.index.round('1min') + _df = pd.concat([append_df.dropna(how='all'), _df.dropna(how='all')]) + _df = _df.loc[~_df.index.duplicated()] + + # Determine time range + df_start, df_end = _df.index.sort_values()[[0, -1]] + + # Create new time index + new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time') + + # Process data: convert to numeric, resample, and reindex + return _df.reindex(new_index) + + def _outlier_process(self, _df): + outlier_file = self.path / 'outlier.json' + + if not outlier_file.exists(): + return _df + + with outlier_file.open('r', encoding='utf-8', errors='ignore') as f: + outliers = json.load(f) + + for _st, _ed in outliers.values(): + _df.loc[_st:_ed] = np.nan + + return _df + + def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None: + try: + raw_data.to_pickle(self.pkl_nam_raw) + raw_data.to_csv(self.csv_nam_raw) + + if self.meta['deter_key'] is not None: + qc_data.to_pickle(self.pkl_nam) + qc_data.to_csv(self.csv_nam) + + except Exception as e: + raise IOError(f"Error saving data. {e}") + + @contextmanager + def progress_reading(self, files: list) -> Generator: + # Create message temporary storage and replace logger method + logs = {level: [] for level in ['info', 'warning', 'error']} + original = {level: getattr(self.logger, level) for level in logs} + + for level, msgs in logs.items(): + setattr(self.logger, level, msgs.append) + + try: + with Progress( + TextColumn("[bold blue]{task.description}", style="bold blue"), + BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"), + TaskProgressColumn(), + TimeRemainingColumn(), + TextColumn("{task.fields[filename]}", style="yellow"), + console=Console(force_terminal=True, color_system="auto", width=120), + expand=False + ) as progress: + task = progress.add_task(f"{self.logger.ARROW} Reading {self.nam} files", total=len(files), filename="") + yield progress, task + finally: + # Restore logger method and output message + for level, msgs in logs.items(): + setattr(self.logger, level, original[level]) + for msg in msgs: + original[level](msg) - # initial config - # input : file path, reset switch + def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]: + files = [f + for file_pattern in self.meta['pattern'] + for pattern in {file_pattern.lower(), file_pattern.upper(), file_pattern} + for f in self.path.glob(pattern) + if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']] - # list the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the - # pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set - # 'reset=True' + if not files: + raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.") - def __init__(self, _path, QC=True, csv_raw=True, reset=False, rate=False, append_data=False, update_meta=None): - # logging.info(f'\n{self.nam}') - # print('='*65) - # logger.info(f"Reading file and process data") + df_list = [] - # class parameter - # self.index = lambda _freq: date_range(_sta, _fin, freq=_freq) - self.path = Path(_path) - self.meta = meta[self.nam] + # Context manager for progress bar display + with self.progress_reading(files) as (progress, task): + for file in files: + progress.update(task, advance=1, filename=file.name) + try: + if (df := self._raw_reader(file)) is not None and not df.empty: + df_list.append(df) + else: + self.logger.warning(f"\tFile {file.name} produced an empty DataFrame or None.") - if update_meta is not None: - self.meta.update(update_meta) + except Exception as e: + self.logger.error(f"Error reading {file.name}: {e}") - self.reset = reset - self.rate = rate - self.qc = QC - self.csv = csv_raw - self.apnd = append_data & reset + if not df_list: + raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m") - self.pkl_nam = f'_read_{self.nam.lower()}.pkl' - self.csv_nam = f'_read_{self.nam.lower()}.csv' + raw_data = concat(df_list, axis=0).groupby(level=0).first() - self.pkl_nam_raw = f'_read_{self.nam.lower()}_raw.pkl' - self.csv_nam_raw = f'_read_{self.nam.lower()}_raw.csv' + if self.nam == 'SMPS': + raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float)) - self.csv_out = f'output_{self.nam.lower()}.csv' + raw_data = self._timeIndex_process(raw_data).apply(to_numeric, errors='coerce').copy(deep=True) + qc_data = self._QC(raw_data).apply(to_numeric, errors='coerce').copy(deep=True) - # print(f" from {_sta.strftime('%Y-%m-%d %X')} to {_fin.strftime('%Y-%m-%d %X')}") - # print('='*65) - # print(f"{dtm.now().strftime('%m/%d %X')}") + return raw_data, qc_data - # get data - def __call__(self, - start: dtm | None = None, - end: dtm | None = None, - mean_freq='1h', - csv_out=True, - **kwarg): + def _run(self, user_start, user_end): + # read pickle if pickle file exists and 'reset=False' or process raw data or append new data + if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset: + self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE") - self._oth_set = kwarg + _f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam) - if start and end and end <= start: - raise ValueError( - f'\nPlease check out input time : \n\tstart : {start.strftime("%Y-%m-%d %X")}\n\tend : {end.strftime("%Y-%m-%d %X")}') + if self.append: + self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data") - fout = self._run(start, end) + _f_raw_new, _f_qc_new = self._read_raw_files() + _f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new) + _f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new) - if fout is not None: - if mean_freq is not None: - fout = fout.resample(mean_freq).mean() + else: + _f_raw, _f_qc = _f_raw_done, _f_qc_done - if csv_out: - fout.to_csv(self.path / self.csv_out) + return _f_qc if self.qc else _f_raw - return fout + else: + self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}", color_part="RAW DATA") - # dependency injection function - @abstractmethod - def _raw_reader(self, _file): - # customize each instrument - pass + _f_raw, _f_qc = self._read_raw_files() - @abstractmethod - def _QC(self, df: DataFrame): - # customize each instrument - return df + # process time index + _f_raw = self._timeIndex_process(_f_raw, user_start, user_end) + _f_qc = self._timeIndex_process(_f_qc, user_start, user_end) + _f_qc = self._outlier_process(_f_qc) - # set each to true datetime(18:30:01 -> 18:30:00) and rindex data - def _raw_process(self, _df): - # get time from df and set time to whole time to create time index - _st, _ed = _df.index.sort_values()[[0, -1]] - _tm_index = date_range(_st.strftime('%Y%m%d %H00'), - (_ed + dtmdt(hours=1)).strftime('%Y%m%d %H00'), - freq=self.meta['freq']) - _tm_index.name = 'time' + # save + self._save_data(_f_raw, _f_qc) - return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index) + if self.rate: + self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'), _f_qc.apply(to_numeric, errors='coerce')) + + return _f_qc if self.qc else _f_raw - # acquisition rate and yield rate - def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw): + @staticmethod + def reorder_dataframe_columns(df, order_lists, others_col=False): + new_order = [] - if self.meta['deter_key'] is not None: - _start, _end = _fout_qc.index[[0, -1]] + for order in order_lists: + # 只添加存在於DataFrame中的欄位,且不重複添加 + new_order.extend([col for col in order if col in df.columns and col not in new_order]) - _drop_how = 'any' - _the_size = len(_fout_raw.resample('1h').mean().index) + if others_col: + # 添加所有不在新順序列表中的原始欄位,保持它們的原始順序 + new_order.extend([col for col in df.columns if col not in new_order]) - _f_pth = (self.path / f'{self.nam}.log') - _f = _f_pth.open('r+' if _f_pth.exists() else 'w+') + return df[new_order] - _cont = _f.read() - _f.seek(0) - - _f.write(f"\n{dtm.now().strftime('%Y/%m/%d %X')}\n") - _f.write(f"{'-' * 60}\n") - _f.write(f"rawdata time : \n\t{_st_raw.strftime('%Y-%m-%d %X')} ~ {_ed_raw.strftime('%Y-%m-%d %X')}\n") - _f.write(f"output time : \n\t{_start.strftime('%Y-%m-%d %X')} ~ {_end.strftime('%Y-%m-%d %X')}\n") - _f.write(f"{'-' * 60}\n") - print(f"\n\t\tfrom {_start.strftime('%Y-%m-%d %X')} to {_end.strftime('%Y-%m-%d %X')}\n") - - for _nam, _key in self.meta['deter_key'].items(): - - if _key == ['all']: - _key, _drop_how = _fout_qc.keys(), 'all' - - _real_size = len(_fout_raw[_key].resample('1h').mean().copy().dropna(how=_drop_how).index) - _QC_size = len(_fout_qc[_key].resample('1h').mean().copy().dropna(how=_drop_how).index) - - try: - _acq_rate = round((_real_size / _the_size) * 100, 1) - _yid_rate = round((_QC_size / _real_size) * 100, 1) - except ZeroDivisionError: - _acq_rate, _yid_rate = 0, 0 - - _f.write(f'{_nam} : \n') - _f.write(f"\tacquisition rate : {_acq_rate}%\n") - _f.write(f'\tyield rate : {_yid_rate}%\n') - - print(f'\t\t{_nam} : ') - print(f'\t\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m') - print(f'\t\t\tyield rate : \033[91m{_yid_rate}%\033[0m') - - _f.write(f"{'=' * 40}\n") - _f.write(_cont) - - _f.close() - - # process time index - @staticmethod - def _tmidx_process(_start, _end, _df): - _st, _ed = _df.index.sort_values()[[0, -1]] - _start, _end = to_datetime(_start) or _st, to_datetime(_end) or _ed - _idx = date_range(_start, _end, freq=_df.index.freq.copy()) - _idx.name = 'time' - - return _df.reindex(_idx), _st, _ed - - # append new data to exist pkl - @staticmethod - def _apnd_prcs(_df_done, _df_apnd): - - if _df_apnd is not None: - _df = concat([_df_apnd.dropna(how='all').copy(), _df_done.dropna(how='all').copy()]) - - _idx = date_range(*_df.index.sort_values()[[0, -1]], freq=_df_done.index.freq.copy()) - _idx.name = 'time' - - return _df.loc[~_df.index.duplicated()].copy().reindex(_idx) - - return _df_done - - # remove outlier - def _outlier_prcs(self, _df): - - if (self.path / 'outlier.json') not in self.path.glob('*.json'): - return _df - - with (self.path / 'outlier.json').open('r', encoding='utf-8', errors='ignore') as f: - self.outlier = jsn.load(f) - - for _st, _ed in self.outlier.values(): - _df.loc[_st:_ed] = np.nan - - return _df - - # save pickle file - def _save_dt(self, _save_raw, _save_qc): - # dump pickle file - _check = True - while _check: - try: - with (self.path / self.pkl_nam).open('wb') as f: - pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL) - - # dump csv file - if self.csv: - _save_qc.to_csv(self.path / self.csv_nam) - - # output raw data if qc file - if self.meta['deter_key'] is not None: - with (self.path / self.pkl_nam_raw).open('wb') as f: - pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL) - - if self.csv: - _save_raw.to_csv(self.path / self.csv_nam_raw) - - _check = False - - except PermissionError as _err: - print('\n', _err) - input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n') - - # read pickle file - def _read_pkl(self, ): - with (self.path / self.pkl_nam).open('rb') as f: - _fout_qc = pkl.load(f) - - if (self.path / self.pkl_nam_raw).exists(): - with (self.path / self.pkl_nam_raw).open('rb') as f: - _fout_raw = pkl.load(f) - else: - _fout_raw = _fout_qc - - return _fout_raw, _fout_qc - - # read raw data - def _read_raw(self, ): - pattern = self.meta['pattern'] - patterns = {pattern, pattern.lower(), pattern.upper()} - _df_con, _f_list = None, list(chain.from_iterable(self.path.glob(p) for p in patterns)) - - for file in _f_list: - if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']: - continue - - print(f"\r\t\treading {file.name}", end='') - - _df = self._raw_reader(file) - - # concat the concated list - if _df is not None: - _df_con = concat([_df_con, _df]) if _df_con is not None else _df - - if _df_con is None: - print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m") - return None, None - - # QC - _fout_raw = self._raw_process(_df_con) - _fout_qc = self._QC(_fout_raw) - - return _fout_raw, _fout_qc - - # main flow - def _run(self, _start, _end): - - _f_raw_done, _f_qc_done = None, None - - # read pickle if pickle file exists and 'reset=False' or process raw data or append new data - _pkl_exist = self.path / self.pkl_nam in list(self.path.glob('*.pkl')) - if _pkl_exist & ((~self.reset) | self.apnd): - print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}") - - _f_raw_done, _f_qc_done = self._read_pkl() - - if not self.apnd: - _f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done) - _f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done) - - _f_qc_done = self._outlier_prcs(_f_qc_done) - - if self.rate: - self._rate_calculate(_f_raw_done, _f_qc_done, _start_raw, _end_raw) - - return _f_qc_done if self.qc else _f_raw_done - - # read raw data - print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it") - - _f_raw, _f_qc = self._read_raw() - if _f_raw is None: - return None - - # append new data and pickle data - if self.apnd & _pkl_exist: - _f_raw = self._apnd_prcs(_f_raw_done, _f_raw) - _f_qc = self._apnd_prcs(_f_qc_done, _f_qc) - - _f_qc = self._outlier_prcs(_f_qc) - - # save - self._save_dt(_f_raw, _f_qc) - - # process time index - # if (_start is not None)|(_end is not None): - _f_raw, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw) - _f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc) - - self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw) - - return _f_qc if self.qc else _f_raw - - # ------------------------------------------------------------------------------------- - # old flow - # def __run(self, _start, _end): - # - # ## read pickle if pickle file exists and 'reset=False' or process raw data - # if (self.path / self.pkl_nam in list(self.path.glob('*.pkl'))) & (~self.reset): - # print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}") - # - # with (self.path / self.pkl_nam).open('rb') as f: - # _fout_qc = pkl.load(f) - # - # _exist = (self.path / self.pkl_nam_raw).exists() - # if _exist: - # with (self.path / self.pkl_nam_raw).open('rb') as f: - # _fout_raw = pkl.load(f) - # else: - # _fout_raw = _fout_qc - # - # _start, _end = to_datetime(_start) or _fout_qc.index[0], to_datetime(_end) or _fout_qc.index[-1] - # _idx = date_range(_start, _end, freq=_fout_qc.index.freq.copy()) - # _idx.name = 'time' - # - # _fout_raw, _fout_qc = _fout_raw.reindex(_idx), _fout_qc.reindex(_idx) - # if (self.rate) & (_exist): - # self._rate_calculate(_fout_raw, _fout_qc) - # - # return _fout_qc if self.qc else _fout_raw - # else: - # print( - # f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it") - # - # ##================================================================================================================= - # ## read raw data - # _df_con, _f_list = None, list(self.path.glob(self.meta['pattern'])) - # - # if len(_f_list) == 0: - # print(f"\t\t\033[31mNo File in '{self.path}' Could Read, Please Check Out the Current Path\033[0m") - # return None - # - # for file in _f_list: - # if file.name in [self.csv_out, self.csv_nam, self.csv_nam_raw, f'{self.nam}.log']: continue - # - # print(f"\r\t\treading {file.name}", end='') - # - # _df = self._raw_reader(file) - # - # ## concat the concated list - # if _df is not None: - # _df_con = concat([_df_con, _df]) if _df_con is not None else _df - # print() - # - # ## QC - # _save_raw = self._raw_process(_df_con) - # _save_qc = self._QC(_save_raw) - # - # _start, _end = to_datetime(_start) or _save_raw.index[0], to_datetime(_end) or _save_raw.index[-1] - # _idx = date_range(_start, _end, freq=_save_raw.index.freq.copy()) - # _idx.name = 'time' - # - # _fout_raw, _fout_qc = _save_raw.reindex(_idx).copy(), _save_qc.reindex(_idx).copy() - # - # self._rate_calculate(_fout_raw, _fout_qc) - # - # ##================================================================================================================= - # ## dump pickle file - # _check = True - # while _check: - # - # try: - # with (self.path / self.pkl_nam).open('wb') as f: - # pkl.dump(_save_qc, f, protocol=pkl.HIGHEST_PROTOCOL) - # - # ## dump csv file - # if self.csv: - # _save_qc.to_csv(self.path / self.csv_nam) - # - # ## output raw data if qc file - # if self.meta['deter_key'] is not None: - # with (self.path / self.pkl_nam_raw).open('wb') as f: - # pkl.dump(_save_raw, f, protocol=pkl.HIGHEST_PROTOCOL) - # - # if self.csv: - # _save_raw.to_csv(self.path / self.csv_nam_raw) - # - # return _fout_qc if self.qc else _fout_raw - # - # _check = False - # - # except PermissionError as _err: - # print('\n', _err) - # input('\t\t\33[41m Please Close The File And Press "Enter" \33[0m\n') - # - # return _fout_qc + @staticmethod + def time_aware_IQR_QC(df: pd.DataFrame, time_window='1D', log_dist=False) -> pd.DataFrame: + return DataQualityControl().time_aware_iqr(df, time_window=time_window, log_dist=log_dist) diff --git a/AeroViz/rawDataReader/core/logger.py b/AeroViz/rawDataReader/core/logger.py new file mode 100644 index 0000000..ff771dc --- /dev/null +++ b/AeroViz/rawDataReader/core/logger.py @@ -0,0 +1,100 @@ +import logging +import platform +import re +import sys +from pathlib import Path + + +class ReaderLogger: + def __init__(self, name: str, log_path: Path): + self.name = name + self.log_path = log_path + + # ANSI color codes + self.CYAN = '\033[96m' + self.BLUE = '\033[94m' + self.GREEN = '\033[92m' + self.YELLOW = '\033[93m' + self.RED = '\033[91m' + self.RESET = '\033[0m' + + # 強制 Windows 使用 UTF-8 + if platform.system().lower() == 'windows': + try: + sys.stdout.reconfigure(encoding='utf-8') + self.unicode_support = True + except Exception: + import codecs + sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer) + self.unicode_support = True + else: + self.unicode_support = True + + # 使用 Unicode 字符 + self.BOX_TOP_LEFT = "╔" + self.BOX_TOP_RIGHT = "╗" + self.BOX_BOTTOM_LEFT = "╚" + self.BOX_BOTTOM_RIGHT = "╝" + self.BOX_HORIZONTAL = "═" + self.BOX_VERTICAL = "║" + self.ARROW = "▶" + + self.logger = self._setup_logger() + + def _setup_logger(self) -> logging.Logger: + logger = logging.getLogger(self.name) + logger.setLevel(logging.INFO) + + # Remove existing handlers + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + + # clean ANSI formatter (for log file) + class CleanFormatter(logging.Formatter): + def format(self, record): + formatted_msg = super().format(record) + return re.sub(r'\033\[[0-9;]*m', '', formatted_msg) + + # Set up handlers with UTF-8 encoding + file_handler = logging.FileHandler(self.log_path / f'{self.name}.log', encoding='utf-8') + file_handler.setFormatter(CleanFormatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')) + + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(logging.Formatter('%(message)s')) + + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + return logger + + def info(self, msg: str): + self.logger.info(msg) + + def warning(self, msg: str): + self.logger.warning(msg) + + def error(self, msg: str): + self.logger.error(msg) + + def info_box(self, text: str, color_part: str = None, width: int = 80): + """ + Create a boxed message with optional colored text + + Args: + text: Base text format (e.g., "Reading {} RAW DATA from {} to {}") + color_part: Part of text to be colored (e.g., "RAW DATA") + width: Box width + """ + display_text = text.replace(color_part, " " * len(color_part)) if color_part else text + + left_padding = " " * ((width - len(display_text)) // 2) + right_padding = " " * (width - len(display_text) - len(left_padding)) + + content = text.replace(color_part, f"{self.CYAN}{color_part}{self.RESET}") if color_part else text + + __content__ = f"{left_padding}{content}{right_padding}" + + self.info(f"╔{'═' * width}╗") + self.info(f"║{__content__}║") + self.info(f"╚{'═' * width}╝") \ No newline at end of file diff --git a/AeroViz/rawDataReader/core/qc.py b/AeroViz/rawDataReader/core/qc.py new file mode 100644 index 0000000..f59f3cd --- /dev/null +++ b/AeroViz/rawDataReader/core/qc.py @@ -0,0 +1,184 @@ +import numpy as np +import pandas as pd + + +class DataQualityControl: + """A class providing various methods for data quality control and outlier detection""" + + @staticmethod + def _ensure_dataframe(df: pd.DataFrame | pd.Series) -> pd.DataFrame: + """Ensure input data is in DataFrame format""" + return df.to_frame() if isinstance(df, pd.Series) else df + + @staticmethod + def _transform_if_log(df: pd.DataFrame, log_dist: bool) -> pd.DataFrame: + """Transform data to log scale if required""" + return np.log10(df) if log_dist else df + + @classmethod + def n_sigma(cls, df: pd.DataFrame, std_range: int = 5) -> pd.DataFrame: + """ + Detect outliers using n-sigma method + + Parameters + ---------- + df : pd.DataFrame + Input data + std_range : int, default=5 + Number of standard deviations to use as threshold + + Returns + ------- + pd.DataFrame + Cleaned DataFrame with outliers masked as NaN + """ + df = cls._ensure_dataframe(df) + df_ave = df.mean() + df_std = df.std() + + lower_bound = df < (df_ave - df_std * std_range) + upper_bound = df > (df_ave + df_std * std_range) + + return df.mask(lower_bound | upper_bound) + + @classmethod + def iqr(cls, df: pd.DataFrame, log_dist: bool = False) -> pd.DataFrame: + """ + Detect outliers using Interquartile Range (IQR) method + + Parameters + ---------- + df : pd.DataFrame + Input data + log_dist : bool, default=False + Whether to apply log transformation to data + + Returns + ------- + pd.DataFrame + Cleaned DataFrame with outliers masked as NaN + """ + df = cls._ensure_dataframe(df) + df_transformed = cls._transform_if_log(df, log_dist) + + q1 = df_transformed.quantile(0.25) + q3 = df_transformed.quantile(0.75) + iqr = q3 - q1 + + lower_bound = df_transformed < (q1 - 1.5 * iqr) + upper_bound = df_transformed > (q3 + 1.5 * iqr) + + return df.mask(lower_bound | upper_bound) + + @classmethod + def rolling_iqr(cls, df: pd.DataFrame, window_size: int = 24, + log_dist: bool = False) -> pd.DataFrame: + """ + Detect outliers using rolling window IQR method + + Parameters + ---------- + df : pd.DataFrame + Input data + window_size : int, default=24 + Size of the rolling window + log_dist : bool, default=False + Whether to apply log transformation to data + + Returns + ------- + pd.DataFrame + Cleaned DataFrame with outliers masked as NaN + """ + df = cls._ensure_dataframe(df) + df_transformed = cls._transform_if_log(df, log_dist) + + def iqr_filter(x): + q1, q3 = x.quantile(0.25), x.quantile(0.75) + iqr = q3 - q1 + lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr + return (x >= lower) & (x <= upper) + + mask = df_transformed.rolling( + window=window_size, + center=True, + min_periods=1 + ).apply(iqr_filter) + + return df.where(mask, np.nan) + + @classmethod + def time_aware_iqr(cls, df: pd.DataFrame, time_window: str = '1D', + log_dist: bool = False) -> pd.DataFrame: + """ + Detect outliers using time-aware IQR method + + Parameters + ---------- + df : pd.DataFrame + Input data + time_window : str, default='1D' + Time window size (e.g., '1D' for one day) + log_dist : bool, default=False + Whether to apply log transformation to data + + Returns + ------- + pd.DataFrame + Cleaned DataFrame with outliers masked as NaN + """ + df = cls._ensure_dataframe(df) + df_transformed = cls._transform_if_log(df, log_dist) + + def iqr_filter(group): + q1, q3 = group.quantile(0.25), group.quantile(0.75) + iqr = q3 - q1 + lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr + return (group >= lower) & (group <= upper) + + mask = df_transformed.groupby( + pd.Grouper(freq=time_window) + ).transform(iqr_filter) + + return df.where(mask, np.nan) + + @classmethod + def mad_iqr_hybrid(cls, df: pd.DataFrame, mad_threshold: float = 3.5, + log_dist: bool = False) -> pd.DataFrame: + """ + Detect outliers using a hybrid of MAD and IQR methods + + Parameters + ---------- + df : pd.DataFrame + Input data + mad_threshold : float, default=3.5 + Threshold for MAD method + log_dist : bool, default=False + Whether to apply log transformation to data + + Returns + ------- + pd.DataFrame + Cleaned DataFrame with outliers masked as NaN + """ + df = cls._ensure_dataframe(df) + df_transformed = cls._transform_if_log(df, log_dist) + + # IQR method + q1, q3 = df_transformed.quantile(0.25), df_transformed.quantile(0.75) + iqr = q3 - q1 + iqr_lower, iqr_upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr + + # MAD method + median = df_transformed.median() + mad = (df_transformed - median).abs().median() + mad_lower = median - mad_threshold * mad + mad_upper = median + mad_threshold * mad + + # Combine both methods + lower = np.maximum(iqr_lower, mad_lower) + upper = np.minimum(iqr_upper, mad_upper) + + mask = (df_transformed >= lower) & (df_transformed <= upper) + return df.where(mask, np.nan) diff --git a/AeroViz/rawDataReader/script/AE33.py b/AeroViz/rawDataReader/script/AE33.py index a145c68..670f5b8 100644 --- a/AeroViz/rawDataReader/script/AE33.py +++ b/AeroViz/rawDataReader/script/AE33.py @@ -1,31 +1,35 @@ -from pandas import read_table +from pandas import read_table, to_numeric from AeroViz.rawDataReader.core import AbstractReader class Reader(AbstractReader): - nam = 'AE33' + nam = 'AE33' - def _raw_reader(self, _file): - _df = read_table(_file, parse_dates={'time': [0, 1]}, index_col='time', - delimiter=r'\s+', skiprows=5, usecols=range(67)) - _df.columns = _df.columns.str.strip(';') + def _raw_reader(self, file): + if file.stat().st_size / 1024 < 550: + self.logger.info(f'\t {file} may not be a whole daily data. Make sure the file is correct.') - # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape) - if not self._oth_set.get('ignore_err', False): - _df = _df.where((_df['Status'] != 0) | (_df['Status'] != 128) | (_df['Status'] != 256)).copy() + _df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time', + delimiter=r'\s+', skiprows=5, usecols=range(67)) + _df.columns = _df.columns.str.strip(';') - return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']] + # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape) + if self.meta.get('error_state', False): + _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy() - def _QC(self, _df): - # remove negative value - _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy()) + _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].apply(to_numeric, errors='coerce') - # QC data in 5 min - def _QC_func(df): - _df_ave, _df_std = df.mean(), df.std() - _df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5) + return _df.loc[~_df.index.duplicated() & _df.index.notna()] - return df.mask(_df_lowb | _df_highb).copy() + def _QC(self, _df): + _index = _df.index.copy() - return _df.resample('5min').apply(_QC_func).resample('1h').mean() + # remove negative value + _df = _df.mask((_df <= 0) | (_df > 20000)) + + # use IQR_QC + _df = self.time_aware_IQR_QC(_df, time_window='1h') + + # make sure all columns have values, otherwise set to nan + return _df.dropna(how='any').reindex(_index) diff --git a/AeroViz/rawDataReader/script/AE43.py b/AeroViz/rawDataReader/script/AE43.py index 3b32bf4..b0d7319 100644 --- a/AeroViz/rawDataReader/script/AE43.py +++ b/AeroViz/rawDataReader/script/AE43.py @@ -1,34 +1,36 @@ -from pandas import read_csv +from pandas import read_csv, to_numeric from AeroViz.rawDataReader.core import AbstractReader class Reader(AbstractReader): - nam = 'AE43' + nam = 'AE43' - def _raw_reader(self, _file): - _df = read_csv(_file, parse_dates={'time': ['StartTime']}, index_col='time') - _df_id = _df['SetupID'].iloc[-1] + def _raw_reader(self, file): + _df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time') + _df_id = _df['SetupID'].iloc[-1] - # get last SetupID data - _df = _df.groupby('SetupID').get_group(_df_id)[ - ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']].copy() + # get last SetupID data + _df = _df.groupby('SetupID').get_group(_df_id)[ + ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'Status']].copy() - # remove data without Status=0 - _df = _df.where(_df['Status'] == 0).copy() + # remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape) + if self.meta.get('error_state', False): + _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy() - return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']] + _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].apply(to_numeric, errors='coerce') - # QC data - def _QC(self, _df): - # remove negative value - _df = _df.mask((_df < 0).copy()) + return _df.loc[~_df.index.duplicated() & _df.index.notna()] - # QC data in 5 min - def _QC_func(df): - _df_ave, _df_std = df.mean(), df.std() - _df_lowb, _df_highb = df < (_df_ave - _df_std * 1.5), df > (_df_ave + _df_std * 1.5) + # QC data + def _QC(self, _df): + _index = _df.index.copy() - return df.mask(_df_lowb | _df_highb).copy() + # remove negative value + _df = _df.mask((_df <= 0) | (_df > 20000)) - return _df.resample('5min').apply(_QC_func).resample('1h').mean() + # use IQR_QC + _df = self.time_aware_IQR_QC(_df, time_window='1h') + + # make sure all columns have values, otherwise set to nan + return _df.dropna(how='any').reindex(_index) diff --git a/AeroViz/rawDataReader/script/APS_3321.py b/AeroViz/rawDataReader/script/APS_3321.py index a1c5e4b..d2f0229 100644 --- a/AeroViz/rawDataReader/script/APS_3321.py +++ b/AeroViz/rawDataReader/script/APS_3321.py @@ -1,47 +1,47 @@ -import numpy as n +import numpy as np from pandas import to_datetime, read_table from AeroViz.rawDataReader.core import AbstractReader class Reader(AbstractReader): - nam = 'APS_3321' + nam = 'APS_3321' - def _raw_reader(self, _file): - with open(_file, 'r', encoding='utf-8', errors='ignore') as f: - _df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time') - _key = list(_df.keys()[3:54]) ## 542 ~ 1981 + def _raw_reader(self, file): + with open(file, 'r', encoding='utf-8', errors='ignore') as f: + _df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time') + _key = list(_df.keys()[3:54]) ## 542 ~ 1981 - ## create new keys - _newkey = {} - for _k in _key: - _newkey[_k] = float(_k).__round__(4) - # _newkey['Mode(m)'] = 'mode' + # create new keys + _newkey = {} + for _k in _key: + _newkey[_k] = float(_k).__round__(4) + # _newkey['Mode(m)'] = 'mode' - ## get new dataframe - _df = _df[_newkey.keys()].rename(_newkey, axis=1) - # _df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy() + # get new dataframe + _df = _df[_newkey.keys()].rename(_newkey, axis=1) + # df['total'] = _df[list(_newkey.values())[:-1]].sum(axis=1)*(n.diff(n.log(_df.keys()[:-1].to_numpy(float))).mean()).copy() - _df_idx = to_datetime(_df.index, errors='coerce') + _df_idx = to_datetime(_df.index, errors='coerce') - return _df.set_index(_df_idx).loc[_df_idx.dropna()] + return _df.set_index(_df_idx).loc[_df_idx.dropna()] - ## QC data - def _QC(self, _df): - ## mask out the data size lower than 7 - _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean() - _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill() - _df = _df.mask(_df_size < 7) + # QC data + def _QC(self, _df): + # mask out the data size lower than 7 + _df['total'] = _df.sum(axis=1, min_count=1) * (np.diff(np.log(_df.keys().to_numpy(float)))).mean() + _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill() + _df = _df.mask(_df_size < 7) - ## remove total conc. lower than 700 - _df = _df.mask(_df['total'] > 700) + # remove total conc. lower than 700 + _df = _df.mask(_df['total'] > 700) - # not confirmed - """ - ## remove the bin over 4000 nm which num. conc. larger than 1 - # _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2]>=4.] + # not confirmed + """ + ## remove the bin over 4000 nm which num. conc. larger than 1 + # _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2]>=4.] - # _df_1hr[_df_remv_ky] = _df_1hr[_df_remv_ky].copy().mask(_df_1hr[_df_remv_ky]>1.) - # """ + # _df_1hr[_df_remv_ky] = _df_1hr[_df_remv_ky].copy().mask(_df_1hr[_df_remv_ky]>1.) + # """ - return _df[_df.keys()[:-1]] + return _df[_df.keys()[:-1]] diff --git a/AeroViz/rawDataReader/script/Aurora.py b/AeroViz/rawDataReader/script/Aurora.py index 317beb3..92eff66 100644 --- a/AeroViz/rawDataReader/script/Aurora.py +++ b/AeroViz/rawDataReader/script/Aurora.py @@ -1,38 +1,44 @@ -from pandas import to_datetime, read_csv +from pandas import to_datetime, read_csv, to_numeric from AeroViz.rawDataReader.core import AbstractReader class Reader(AbstractReader): - nam = 'Aurora' + nam = 'Aurora' - def _raw_reader(self, _file): - with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f: - _df = read_csv(f, low_memory=False, index_col=0) + def _raw_reader(self, file): + with file.open('r', encoding='utf-8-sig', errors='ignore') as f: + _df = read_csv(f, low_memory=False, index_col=0) - _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed') - _df.index.name = 'time' + _df.index = to_datetime(_df.index, errors='coerce') + _df.index.name = 'time' - _df.columns = _df.keys().str.strip(' ') + _df.columns = _df.keys().str.strip(' ') - _df = _df.loc[ - _df.index.dropna(), ['0°σspB', '0°σspG', '0°σspR', '90°σspB', '90°σspG', '90°σspR', 'RH']].copy() - _df.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH'] + # consider another csv format + _df = _df.rename(columns={ + '0°σspB': 'B', '0°σspG': 'G', '0°σspR': 'R', + '90°σspB': 'BB', '90°σspG': 'BG', '90°σspR': 'BR', + 'Blue': 'B', 'Green': 'G', 'Red': 'R', + 'B_Blue': 'BB', 'B_Green': 'BG', 'B_Red': 'BR', + 'RH': 'RH' + }) - return _df + _df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR']].apply(to_numeric, errors='coerce') - ## QC data - def _QC(self, _df): - ## remove negative value - _df = _df.mask((_df <= 0).copy()) + return _df.loc[~_df.index.duplicated() & _df.index.notna()] - ## call by _QC function - ## QC data in 1 hr - def _QC_func(_df_1hr): - _df_ave = _df_1hr.mean() - _df_std = _df_1hr.std() - _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5) + def _QC(self, _df): + _index = _df.index.copy() - return _df_1hr.mask(_df_lowb | _df_highb).copy() + _df = _df.mask((_df <= 0) | (_df > 2000)) - return _df.resample('1h', group_keys=False).apply(_QC_func) + _df = _df.loc[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])] + + _df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])] + + # use IQR_QC + _df = self.time_aware_IQR_QC(_df, time_window='1h') + + # make sure all columns have values, otherwise set to nan + return _df.dropna(how='any').reindex(_index) diff --git a/AeroViz/rawDataReader/script/BC1054.py b/AeroViz/rawDataReader/script/BC1054.py index 956f7a1..467a721 100644 --- a/AeroViz/rawDataReader/script/BC1054.py +++ b/AeroViz/rawDataReader/script/BC1054.py @@ -1,46 +1,47 @@ -from pandas import read_csv +from pandas import read_csv, to_numeric from AeroViz.rawDataReader.core import AbstractReader class Reader(AbstractReader): - nam = 'BC1054' - - def _raw_reader(self, _file): - with open(_file, 'r', encoding='utf-8', errors='ignore') as f: - _df = read_csv(f, parse_dates=['Time'], index_col='Time') - - _df = _df.rename(columns={ - 'BC1(ng/m3)': 'BC1', - 'BC2(ng/m3)': 'BC2', - 'BC3(ng/m3)': 'BC3', - 'BC4(ng/m3)': 'BC4', - 'BC5(ng/m3)': 'BC5', - 'BC6(ng/m3)': 'BC6', - 'BC7(ng/m3)': 'BC7', - 'BC8(ng/m3)': 'BC8', - 'BC9(ng/m3)': 'BC9', - 'BC10(ng/m3)': 'BC10' - }) - - # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move) - # if not self._oth_set.get('ignore_err', False): - # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy() - - return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10', 'Status']] - - # QC data - def _QC(self, _df): - # remove negative value - _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].mask((_df < 0).copy()) - - # call by _QC function - # QC data in 1 hr - def _QC_func(_df_1hr): - _df_ave = _df_1hr.mean() - _df_std = _df_1hr.std() - _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5) - - return _df_1hr.mask(_df_lowb | _df_highb).copy() - - return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean() + nam = 'BC1054' + + def _raw_reader(self, file): + with open(file, 'r', encoding='utf-8', errors='ignore') as f: + _df = read_csv(f, parse_dates=True, index_col=0) + + _df.columns = _df.columns.str.replace(' ', '') + + _df = _df.rename(columns={ + 'BC1(ng/m3)': 'BC1', + 'BC2(ng/m3)': 'BC2', + 'BC3(ng/m3)': 'BC3', + 'BC4(ng/m3)': 'BC4', + 'BC5(ng/m3)': 'BC5', + 'BC6(ng/m3)': 'BC6', + 'BC7(ng/m3)': 'BC7', + 'BC8(ng/m3)': 'BC8', + 'BC9(ng/m3)': 'BC9', + 'BC10(ng/m3)': 'BC10' + }) + + # remove data without Status=1, 8, 16, 32 (Automatic Tape Advance), 65536 (Tape Move) + if self.meta.get('error_state', False): + _df = _df[~_df['Status'].isin(self.meta.get('error_state'))] + + _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].apply(to_numeric, + errors='coerce') + + return _df.loc[~_df.index.duplicated() & _df.index.notna()] + + def _QC(self, _df): + _index = _df.index.copy() + + # remove negative value + _df = _df.mask((_df <= 0) | (_df > 20000)) + + # use IQR_QC + _df = self.time_aware_IQR_QC(_df, time_window='1h') + + # make sure all columns have values, otherwise set to nan + return _df.dropna(how='any').reindex(_index) diff --git a/AeroViz/rawDataReader/script/EPA.py b/AeroViz/rawDataReader/script/EPA.py new file mode 100644 index 0000000..238e757 --- /dev/null +++ b/AeroViz/rawDataReader/script/EPA.py @@ -0,0 +1,41 @@ +import numpy as np +from pandas import read_csv, to_numeric + +from AeroViz.rawDataReader.core import AbstractReader + +desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC', + 'CH4', 'PM10', 'PM2.5', 'PM1', 'WS', 'WD', 'AT', 'RH'] + +desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene'] + + +class Reader(AbstractReader): + nam = 'EPA' + + def _raw_reader(self, file): + # 查詢小時值(測項).csv & 查詢小時值(直式).csv (有、無輸出有效值都可以) + df = read_csv(file, encoding='big5', encoding_errors='ignore', index_col=0, parse_dates=True, + on_bad_lines='skip') + + if len(df.groupby('測站')) > 1: + raise ValueError(f"Multiple stations found in the file: {df['測站'].unique()}") + else: + if '測站' in df.columns: + df.drop(columns=['測站'], inplace=True) + + if '測項' in df.columns: + df = df.pivot(columns='測項', values='資料') + + df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True) + df.index.name = 'Time' + + # 如果沒有將無效值拿掉就輸出 請將包含 #、L 的字串替換成 # 或 _ + df = df.replace(to_replace=r'\d*\.?\d*[#]\b', value='#', regex=True) + df = df.replace(to_replace=r'\d*\.?\d*[L]\b', value='_', regex=True) + + # 欄位排序 + return self.reorder_dataframe_columns(df, [desired_order1]).apply(to_numeric, errors='coerce') + + def _QC(self, _df): + _df = _df.mask(_df < 0, np.nan) + return _df diff --git a/AeroViz/rawDataReader/script/EPA_vertical.py b/AeroViz/rawDataReader/script/EPA_vertical.py deleted file mode 100644 index 1bcc46d..0000000 --- a/AeroViz/rawDataReader/script/EPA_vertical.py +++ /dev/null @@ -1,18 +0,0 @@ -from pandas import read_csv, to_numeric - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'EPA_vertical' - - def _raw_reader(self, _file): - with _file.open('r', encoding='big5', errors='ignore') as f: - _df = read_csv(f, names=['time', 'station', 'comp', 'data', None], skiprows=1, na_values=['-'], - parse_dates=['time'], index_col='time') - _df['data'] = to_numeric(_df['data'], errors='coerce') - - _df_piv = _df.pivot_table(values='data', columns='comp', index='time') - _df_piv.index.name = 'time' - - return _df_piv diff --git a/AeroViz/rawDataReader/script/GRIMM.py b/AeroViz/rawDataReader/script/GRIMM.py index 1f6b89e..e144c85 100644 --- a/AeroViz/rawDataReader/script/GRIMM.py +++ b/AeroViz/rawDataReader/script/GRIMM.py @@ -4,32 +4,23 @@ class Reader(AbstractReader): - nam = 'GRIMM' + nam = 'GRIMM' - def _raw_reader(self, _file): + def _raw_reader(self, file): + _df = read_csv(file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1', + dayfirst=True).rename_axis("Time") + _df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True) - _df = read_csv(_file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1', - dayfirst=True).rename_axis("Time") - _df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True) + if file.name.startswith("A407ST"): + _df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True) + else: + _df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True) - if _file.name.startswith("A407ST"): - _df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True) - else: - _df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True) + if _df.empty: + print(file, "is empty") + return None - if _df.empty: - print(_file, "is empty") - return None + return _df / 0.035 - return _df / 0.035 - - def _QC(self, _df): - # QC data in 1 hr - def _QC_func(_df_1hr): - _df_ave = _df_1hr.mean() - _df_std = _df_1hr.std() - _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5) - - return _df_1hr.mask(_df_lowb | _df_highb).copy() - - return _df.resample('5min').apply(_QC_func).resample('1h').mean() + def _QC(self, _df): + return _df diff --git a/AeroViz/rawDataReader/script/IGAC.py b/AeroViz/rawDataReader/script/IGAC.py new file mode 100644 index 0000000..25d5386 --- /dev/null +++ b/AeroViz/rawDataReader/script/IGAC.py @@ -0,0 +1,75 @@ +# read meteorological data from google sheet + + +from pandas import read_csv, to_numeric + +from AeroViz.rawDataReader.core import AbstractReader + + +class Reader(AbstractReader): + nam = 'IGAC' + + def _raw_reader(self, file): + + with file.open('r', encoding='utf-8-sig', errors='ignore') as f: + _df = read_csv(f, parse_dates=True, index_col=0, na_values='-') + + _df.columns = _df.keys().str.strip(' ') + _df.index.name = 'time' + + _df = _df.apply(to_numeric, errors='coerce') + + return _df.loc[~_df.index.duplicated() & _df.index.notna()] + + def _QC(self, _df): + + # QC parameter, function (MDL SE LE) + _mdl = { + 'Na+': 0.06, + 'NH4+': 0.05, + 'K+': 0.05, + 'Mg2+': 0.12, + 'Ca2+': 0.07, + 'Cl-': 0.07, + 'NO2-': 0.05, + 'NO3-': 0.11, + 'SO42-': 0.08, + } + + _cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], + ['Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-', ], + ['SO42-', 'NO3-', 'NH4+']) + + _df_salt = _df[_mdl.keys()].copy() + _df_pm = _df['PM2.5'].copy() + + # lower than PM2.5 + # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-) + _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy() + + # mdl + for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()): + _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2) + + # calculate SE LE + # salt < LE + _se, _le = self.IQR_QC(_df_salt, log_dist=True) + _df_salt = _df_salt.mask(_df_salt > _le).copy() + + # C/A, A/C + _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame() + _rat_AC = (1 / _rat_CA).copy() + + _se, _le = self.IQR_QC(_rat_CA, ) + _cond_CA = (_rat_CA < _le) & (_rat_CA > 0) + + _se, _le = self.IQR_QC(_rat_AC, ) + _cond_AC = (_rat_AC < _le) & (_rat_AC > 0) + + _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy() + + # conc. of main salt > SE + _se, _le = self.IQR_QC(_df_salt[_main], log_dist=True) + _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy() + + return _df_salt.reindex(_df.index) diff --git a/AeroViz/rawDataReader/script/IGAC_TH.py b/AeroViz/rawDataReader/script/IGAC_TH.py deleted file mode 100644 index aef8ee5..0000000 --- a/AeroViz/rawDataReader/script/IGAC_TH.py +++ /dev/null @@ -1,104 +0,0 @@ -# read meteorological data from google sheet - - -import numpy as np -from pandas import read_csv, concat, to_datetime - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'IGAC_TH' - - def _raw_reader(self, _file): - - self.meta['freq'] = self._oth_set.get('data_freq') or self.meta['freq'] - - with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f: - _df = read_csv(f, low_memory=False, index_col=0) - - _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed') - _df.index.name = 'time' - - _df.columns = _df.keys().str.strip(' ') - - _df = _df.loc[_df.index.dropna()].copy() - - return _df.loc[~_df.index.duplicated()] - - ## QC data - def _QC(self, _df): - - ## QC parameter, function (MDL SE LE) - _mdl = { - 'Na+': 0.05, - 'NH4+': 0.05, - 'K+': 0.05, - 'Mg2+': 0.05, - 'Ca2+': 0.05, - 'Cl-': 0.05, - 'NO2-': 0.05, - 'NO3-': 0.05, - 'SO42-': 0.05, - } - - def _se_le(_df_, _log=False): - _df_ = np.log10(_df_) if _log else _df_ - - _df_qua = _df_.quantile([.25, .75]) - _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy() - _df_iqr = _df_q3 - _df_q1 - - _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index) - _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index) - - if _log: - return 10 ** _se, 10 ** _le - return _se, _le - - _cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], ['Cl-', 'NO2-', 'NO3-', 'SO42-', ], ['SO42-', - 'NO3-', - 'NH4+'] - - _df_salt = _df[_mdl.keys()].copy() - _df_pm = _df['PM2.5'].copy() - - ## lower than PM2.5 - ## conc. of main salt should be present at the same time (NH4+, SO42-, NO3-) - _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy() - - ## mdl - for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()): - _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2) - - ## group by time (per month) - _df_salt['tm'] = _df_salt.index.strftime('%Y-%m') - - _df_lst = [] - for _ky, _df_grp in _df_salt.groupby('tm'): - _df_grp = _df_grp[_mdl.keys()].copy() - - ## calculate SE LE - ## salt < LE - _se, _le = _se_le(_df_grp, _log=True) - _df_grp = _df_grp.mask(_df_grp > _le).copy() - - ## C/A, A/C - _rat_CA = (_df_grp[_cation].sum(axis=1) / _df_grp[_anion].sum(axis=1)).to_frame() - _rat_AC = (1 / _rat_CA).copy() - - _se, _le = _se_le(_rat_CA, ) - _cond_CA = (_rat_CA < _le) & (_rat_CA > 0) - - _se, _le = _se_le(_rat_AC, ) - _cond_AC = (_rat_AC < _le) & (_rat_AC > 0) - - _df_grp = _df_grp.where((_cond_CA * _cond_AC)[0]).copy() - - ## conc. of main salt > SE - _se, _le = _se_le(_df_grp[_main], _log=True) - _df_grp[_main] = _df_grp[_main].mask(_df_grp[_main] < _se).copy() - - _df_lst.append(_df_grp) - - return concat(_df_lst).reindex(_df.index) diff --git a/AeroViz/rawDataReader/script/IGAC_ZM.py b/AeroViz/rawDataReader/script/IGAC_ZM.py deleted file mode 100644 index 921ecce..0000000 --- a/AeroViz/rawDataReader/script/IGAC_ZM.py +++ /dev/null @@ -1,90 +0,0 @@ -# read meteorological data from google sheet - - -import numpy as np -from pandas import read_csv, concat, to_numeric - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'IGAC_ZM' - - def _raw_reader(self, _file): - - with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f: - _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-']).apply(to_numeric, errors='coerce') - - _df.columns = _df.keys().str.strip(' ') - _df.index.name = 'time' - - return _df.loc[_df.index.dropna()].loc[~_df.index.duplicated()] - - ## QC data - def _QC(self, _df): - - ## QC parameter, function (MDL SE LE) - _mdl = { - 'Na+': 0.06, - 'NH4+': 0.05, - 'K+': 0.05, - 'Mg2+': 0.12, - 'Ca2+': 0.07, - 'Cl-': 0.07, - 'NO2-': 0.05, - 'NO3-': 0.11, - 'SO42-': 0.08, - } - _mdl.update(self._oth_set.get('mdl', {})) - - def _se_le(_df_, _log=False): - _df_ = np.log10(_df_) if _log else _df_ - - _df_qua = _df_.quantile([.25, .75]) - _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy() - _df_iqr = _df_q3 - _df_q1 - - _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index) - _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index) - - if _log: - return 10 ** _se, 10 ** _le - return _se, _le - - _cation, _anion, _main = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], ['Cl-', 'NO2-', 'NO3-', 'SO42-', ], ['SO42-', - 'NO3-', - 'NH4+'] - - _df_salt = _df[_mdl.keys()].copy() - _df_pm = _df['PM2.5'].copy() - - ## lower than PM2.5 - ## conc. of main salt should be present at the same time (NH4+, SO42-, NO3-) - _df_salt = _df_salt.mask(_df_salt.sum(axis=1, min_count=1) > _df_pm).dropna(subset=_main).copy() - - ## mdl - for (_key, _df_col), _mdl_val in zip(_df_salt.items(), _mdl.values()): - _df_salt[_key] = _df_col.mask(_df_col < _mdl_val, _mdl_val / 2) - - ## calculate SE LE - ## salt < LE - _se, _le = _se_le(_df_salt, _log=True) - _df_salt = _df_salt.mask(_df_salt > _le).copy() - - ## C/A, A/C - _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame() - _rat_AC = (1 / _rat_CA).copy() - - _se, _le = _se_le(_rat_CA, ) - _cond_CA = (_rat_CA < _le) & (_rat_CA > 0) - - _se, _le = _se_le(_rat_AC, ) - _cond_AC = (_rat_AC < _le) & (_rat_AC > 0) - - _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy() - - ## conc. of main salt > SE - _se, _le = _se_le(_df_salt[_main], _log=True) - _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy() - - return _df_salt.reindex(_df.index) diff --git a/AeroViz/rawDataReader/script/MA350.py b/AeroViz/rawDataReader/script/MA350.py index 94edee8..22fd7fd 100644 --- a/AeroViz/rawDataReader/script/MA350.py +++ b/AeroViz/rawDataReader/script/MA350.py @@ -1,45 +1,46 @@ -from pandas import read_csv +from pandas import read_csv, to_numeric from AeroViz.rawDataReader.core import AbstractReader class Reader(AbstractReader): - nam = 'MA350' - - def _raw_reader(self, _file): - _df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time") - - _df = _df.rename(columns={ - 'UV BCc': 'BC1', - 'Blue BCc': 'BC2', - 'Green BCc': 'BC3', - 'Red BCc': 'BC4', - 'IR BCc': 'BC5', - 'Biomass BCc (ng/m^3)': 'BB mass', - 'Fossil fuel BCc (ng/m^3)': 'FF mass', - 'Delta-C (ng/m^3)': 'Delta-C', - 'AAE': 'AAE', - 'BB (%)': 'BB', - }) - - # remove data without Status=32 (Automatic Tape Advance), 65536 (Tape Move) - # if not self._oth_set.get('ignore_err', False): - # _df = _df.where((_df['Status'] != 32) | (_df['Status'] != 65536)).copy() - - return _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']] - - # QC data - def _QC(self, _df): - # remove negative value - _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy()) - - # call by _QC function - # QC data in 1 hr - def _QC_func(_df_1hr): - _df_ave = _df_1hr.mean() - _df_std = _df_1hr.std() - _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5) - - return _df_1hr.mask(_df_lowb | _df_highb).copy() - - return _df.resample('1h', group_keys=False).apply(_QC_func).resample('5min').mean() + nam = 'MA350' + + def _raw_reader(self, file): + _df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis( + "Time") + + _df = _df.rename(columns={ + 'UV BCc': 'BC1', + 'Blue BCc': 'BC2', + 'Green BCc': 'BC3', + 'Red BCc': 'BC4', + 'IR BCc': 'BC5', + 'Biomass BCc (ng/m^3)': 'BB mass', + 'Fossil fuel BCc (ng/m^3)': 'FF mass', + 'Delta-C (ng/m^3)': 'Delta-C', + 'AAE': 'AAE', + 'BB (%)': 'BB', + }) + + # if self.meta.get('error_state', False): + # _df = _df.where(~_df['Status'].isin(self.meta['error_state'])).copy() + + _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE', 'BB']].apply(to_numeric, + errors='coerce') + + return _df.loc[~_df.index.duplicated() & _df.index.notna()] + + # QC data + def _QC(self, _df): + _index = _df.index.copy() + + # remove negative value + _df = _df.mask( + (_df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5']] <= 0) | (_df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5']] > 20000)) + + # use IQR_QC + _df = self.time_aware_IQR_QC(_df, time_window='1h') + + # make sure all columns have values, otherwise set to nan + return _df.dropna(how='any').reindex(_index) diff --git a/AeroViz/rawDataReader/script/Minion.py b/AeroViz/rawDataReader/script/Minion.py new file mode 100644 index 0000000..be5f0b9 --- /dev/null +++ b/AeroViz/rawDataReader/script/Minion.py @@ -0,0 +1,215 @@ +from typing import Literal + +import numpy as np +import pandas +from pandas import DataFrame, read_excel + +from AeroViz.rawDataReader.config.supported_instruments import meta +from AeroViz.rawDataReader.core import AbstractReader + +pandas.set_option("future.no_silent_downcasting", True) + +desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC', + 'CH4', 'PM10', 'PM2.5', 'WS', 'WD', 'AT', 'RH'] + +desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene'] + +MDL_NUMBER = -999 + + +class Reader(AbstractReader): + nam = 'Minion' + + # 楠梓8月數據(環境部)(空品、重金屬和氣膠可用率) -> 楠梓8月數據_level1 -> NZ_minion_XXXX + def _raw_reader(self, file): + df = read_excel(file, index_col=0, parse_dates=True) + df.index.name = 'Time' + + # 重命名列,去除空白 + df = df.rename(columns=lambda x: x.strip()) + + # 保存單位 + self.units = df.iloc[0].copy() + + # 刪除原始數據中的單位行 + df = df.iloc[1:] + + # 替換特定值 + df = df.replace({'維護校正': '*', np.nan: '-', 'Nodata': '-', '0L': MDL_NUMBER}) + # df = df.replace(to_replace=r'\d*\.?\d*[#]\b', value='_', regex=True) + df = df.replace(to_replace=r'\d*\.?\d*[L]\b', value=MDL_NUMBER, regex=True) + + # 處理除了'WD'列的 0 值 替換為 '_' + for col in [col for col in df.columns if col != 'WD']: + df[col] = df[col].replace({0: MDL_NUMBER}) + + # replace to numeric for estimating qc rate + df = df.replace({'_': MDL_NUMBER}) + + XRF_col = list(meta.get('XRF').get('MDL').keys()) + IGAC_col = list(meta.get('IGAC').get('MDL').keys()) + + # 重新排序列 + df = self.reorder_dataframe_columns(df, [desired_order1, desired_order2, XRF_col, IGAC_col]) + + # 將單位行添加回 DataFrame + # df = concat([units.to_frame().T, df]) + + # save Level1 data + output_folder = file.parent / 'Level1' + output_folder.mkdir(parents=True, exist_ok=True) + df.to_csv(output_folder / f'{file.stem}_Level1.csv') + + return df.loc[~df.index.duplicated() & df.index.notna()] + + def _QC(self, _df): + IGAC_col = list(meta.get('IGAC').get('MDL')) + XRF_col = list(meta.get('XRF').get('MDL')) + + # IGAC MDL QC + _df[IGAC_col] = self.IGAC_QAQC(_df[IGAC_col]) + + # XRF MDL QC + _df[XRF_col] = self.XRF_QAQC(_df[XRF_col]) + + # remove negative value + # _df = _df.mask((_df < 0)) + _df = _df.mask(_df == MDL_NUMBER, np.nan) + + col = [col for col in desired_order1 if col != 'WD'] + _df[col] = self.time_aware_IQR_QC(_df[col]) + + # Calculate the mass and ion balance + # mass tolerance = ± 1, ions balance tolerance = ± 1 + + # # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-) + # _df_salt = df.mask(df.sum(axis=1, min_count=1) > df.PM25).dropna(subset=_main).copy() + + ions_mass = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'Cl-', 'NO3-', 'SO42-']].sum(axis=1) + element_mass = _df[XRF_col].sum(axis=1) + + estimated_mass = ions_mass + element_mass + + valid_mask = 2 * _df['PM2.5'] > estimated_mass + + _df.loc[~valid_mask, IGAC_col + XRF_col] = np.nan + + return _df + + def mdlReplace_timeAware_qc(self, df: DataFrame, MDL: dict, MDL_replace) -> DataFrame: + # Step 1: Track MDL positions and values below threshold + mdl_mask = (df.eq(MDL_NUMBER) | + df.apply(lambda x: x < MDL.get(x.name, float('-inf')))) + + # Step 2: Convert all values below MDL to MDL_NUMBER (-999) + df_mdl = df.mask(mdl_mask, MDL_NUMBER) + + # Step 3: Apply time_aware_IQR_QC (excluding MDL_NUMBER values) + df_qc = self.time_aware_IQR_QC(df_mdl.mask(df_mdl == MDL_NUMBER)) + + # Step 4: Handle values below MDL according to specified method + if MDL_replace == '0.5 * MDL': + for column, threshold in MDL.items(): + if column in df.columns and threshold is not None: + df_qc.loc[df_mdl[column] == MDL_NUMBER, column] = 0.5 * threshold + else: + df_qc.loc[df_mdl[column] == MDL_NUMBER, column] = np.nan + else: # 'nan' + df_qc = df_qc.mask(df_mdl == MDL_NUMBER, np.nan) + + return df_qc + + def XRF_QAQC(self, + df: DataFrame, + MDL_replace: Literal['nan', '0.5 * MDL'] = '0.5 * MDL' + ) -> DataFrame: + """ + Perform Quality Assurance and Quality Control for XRF data + + Parameters + ---------- + df : pd.DataFrame + Input dataframe with XRF data + MDL_replace : {'nan', '0.5 * MDL'}, default='nan' + Method to handle values below MDL: + - 'nan': Replace with NaN + - '0.5 * MDL': Replace with half of MDL value + + Returns + ------- + pd.DataFrame + Processed dataframe with QC applied and MDL values handled + """ + MDL = meta.get('XRF').get('MDL') + + df = self.mdlReplace_timeAware_qc(df, MDL, MDL_replace) + + # 轉換單位 ng/m3 -> ug/m3 + if df.Al.max() > 10 and df.Fe.max() > 10: + columns_to_convert = [col for col in MDL.keys() if col in df.columns] + df[columns_to_convert] = df[columns_to_convert].div(1000) + + self.logger.info(f"\t{'XRF QAQC summary':21}: transform values below MDL to {MDL_replace}") + + return df + + def IGAC_QAQC(self, + df: DataFrame, + MDL_replace: Literal['nan', '0.5 * MDL'] = '0.5 * MDL', + tolerance: float = 1 + ) -> DataFrame: + """ + Perform Quality Assurance and Quality Control for IGAC data + + Parameters + ---------- + df : pd.DataFrame + Input dataframe with IGAC data + MDL_replace : {'nan', '0.5 * MDL'}, default='nan' + Method to handle values below MDL: + - 'nan': Replace with NaN + - '0.5 * MDL': Replace with half of MDL value + tolerance : float, default=1 + Tolerance value for QC checks + + Returns + ------- + pd.DataFrame + Processed dataframe with QC applied and MDL values handled + """ + MDL = meta.get('IGAC').get('MDL') + + df = self.mdlReplace_timeAware_qc(df, MDL, MDL_replace) + + # Define the ions + _df = df.copy() + _cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'], + ['Cl-', 'NO2-', 'NO3-', 'SO42-'], + ['SO42-', 'NO3-', 'NH4+']) + + _df['+_mole'] = _df[_cation].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1, skipna=True) + _df['-_mole'] = _df[_anion].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True) + + # Avoid division by zero + _df['ratio'] = np.where(_df['-_mole'] != 0, _df['+_mole'] / _df['-_mole'], np.nan) + + # Calculate bounds + lower_bound, upper_bound = 1 - tolerance, 1 + tolerance + + # 根據ratio决定是否保留原始数据 + valid_mask = ((_df['ratio'] <= upper_bound) & (_df['ratio'] >= lower_bound) & + ~np.isnan(_df['+_mole']) & ~np.isnan(_df['-_mole'])) + + # 保留数據或將不符合的條件設為NaN + df.loc[~valid_mask] = np.nan + + # 計算保留的数據的百分比 + retained_percentage = (valid_mask.sum() / len(df)) * 100 + + self.logger.info( + f"\t{'Ions balance summary':21}: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}") + + if retained_percentage < 70: + self.logger.warning("\tWarning: The percentage of retained data is less than 70%") + + return df diff --git a/AeroViz/rawDataReader/script/NEPH.py b/AeroViz/rawDataReader/script/NEPH.py index c00b963..97ddf38 100644 --- a/AeroViz/rawDataReader/script/NEPH.py +++ b/AeroViz/rawDataReader/script/NEPH.py @@ -1,80 +1,80 @@ -from pandas import to_datetime, read_csv, DataFrame +from pandas import to_datetime, read_csv, DataFrame, to_numeric from AeroViz.rawDataReader.core import AbstractReader class Reader(AbstractReader): - nam = 'NEPH' - - def _raw_reader(self, _file): - with _file.open('r', encoding='utf-8', errors='ignore') as f: - _df = read_csv(f, header=None, names=range(11)) - - _df_grp = _df.groupby(0) - - # T : time - _df_tm = _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]].astype(int) - - for _k in [2, 3, 4, 5, 6]: - _df_tm[_k] = _df_tm[_k].astype(int).map('{:02d}'.format).copy() - _df_tm = _df_tm.astype(str) - - _idx_tm = to_datetime((_df_tm[1] + _df_tm[2] + _df_tm[3] + _df_tm[4] + _df_tm[5] + _df_tm[6]), - format='%Y%m%d%H%M%S') - - # D : data - # col : 3~8 B G R BB BG BR - # 1e6 - try: - _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm) - _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm) - _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR'] - _df_out.index.name = 'Time' - - # Y : state - # col : 5 RH - _df_st = _df_grp.get_group('Y') - _df_out['RH'] = _df_st[5].values - _df_out['status'] = _df_st[9].values - - _df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0 - - return _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']] - - except ValueError: - group_sizes = _df_grp.size() - print(group_sizes) - # Define the valid groups - valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'} - - # Find the rows where the value in the first column is not in valid_groups - invalid_indices = _df[~_df[0].isin(valid_groups)].index - - # Print the invalid indices and their corresponding values - invalid_values = _df.loc[invalid_indices, 0] - print("Invalid values and their indices:") - for idx, value in zip(invalid_indices, invalid_values): - print(f"Index: {idx}, Value: {value}") - - # If there's a length mismatch, return an empty DataFrame with the same index and column names - columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH'] - _df_out = DataFrame(index=_idx_tm, columns=columns) - _df_out.index.name = 'Time' - print(f'\n\t\t\t Length mismatch in {_file} data. Returning an empty DataFrame.') - return _df_out - - # QC data - def _QC(self, _df): - # remove negative value - _df = _df.mask((_df <= 0).copy()) - - # call by _QC function - # QC data in 1 hr - def _QC_func(_df_1hr): - _df_ave = _df_1hr.mean() - _df_std = _df_1hr.std() - _df_lowb, _df_highb = _df_1hr < (_df_ave - _df_std * 1.5), _df_1hr > (_df_ave + _df_std * 1.5) - - return _df_1hr.mask(_df_lowb | _df_highb).copy() - - return _df.resample('1h', group_keys=False).apply(_QC_func) + nam = 'NEPH' + + def _raw_reader(self, file): + with file.open('r', encoding='utf-8', errors='ignore') as f: + _df = read_csv(f, header=None, names=range(11)) + + _df_grp = _df.groupby(0) + + # T : time + _idx_tm = to_datetime( + _df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]] + .map(lambda x: f"{int(x):02d}") + .agg(''.join, axis=1), + format='%Y%m%d%H%M%S' + ) + + # D : data + # col : 3~8 B G R BB BG BR + # 1e6 + try: + _df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm) + + try: + _df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm) + except KeyError: + _df_out = (_df_dt.groupby(1).get_group('NTXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm) + + _df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR'] + _df_out.index.name = 'Time' + + # Y : state + # col : 5 RH + _df_st = _df_grp.get_group('Y') + _df_out['RH'] = _df_st[5].values + _df_out['status'] = _df_st[9].values + + _df_out.mask(_df_out['status'] != 0) # 0000 -> numeric to 0 + + _df = _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']].apply(to_numeric, errors='coerce') + + return _df.loc[~_df.index.duplicated() & _df.index.notna()] + + except ValueError: + # Define valid groups and find invalid indices + invalid_indices = _df[~_df[0].isin({'B', 'G', 'R', 'D', 'T', 'Y', 'Z'})].index + print("Invalid values and their indices:") + print("\n".join([f"Index: {idx}, Value: {_df.at[idx, 0]}" for idx in invalid_indices])) + + # Return an empty DataFrame with specified columns if there's a length mismatch + _df_out = DataFrame(index=_idx_tm, columns=['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']) + _df_out.index.name = 'Time' + print(f'\n\t\t\t Length mismatch in {file} data. Returning an empty DataFrame.') + return _df_out + + def _QC(self, _df): + MDL_sensitivity = {'B': .1, 'G': .1, 'R': .3} + + _index = _df.index.copy() + + # remove negative value + _df = _df.mask((_df <= 0) | (_df > 2000)) + + # total scattering is larger than back scattering + _df = _df.loc[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])] + + # blue scattering is larger than green scattering, green scattering is larger than red scattering + # due to the nephelometer's Green PMT in FS is already aged, this QC may delete too many data + # _df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])] + + # use IQR_QC + _df = self.time_aware_IQR_QC(_df, time_window='1h') + + # make sure all columns have values, otherwise set to nan + return _df.dropna(how='any').reindex(_index) diff --git a/AeroViz/rawDataReader/script/OCEC.py b/AeroViz/rawDataReader/script/OCEC.py new file mode 100644 index 0000000..7236139 --- /dev/null +++ b/AeroViz/rawDataReader/script/OCEC.py @@ -0,0 +1,92 @@ +import numpy as np +from pandas import to_datetime, read_csv, to_numeric + +from AeroViz.rawDataReader.core import AbstractReader + + +class Reader(AbstractReader): + nam = 'OCEC' + + def _raw_reader(self, file): + with open(file, 'r', encoding='utf-8', errors='ignore') as f: + _df = read_csv(f, skiprows=3) + + _df['Start Date/Time'] = _df['Start Date/Time'].str.strip() + _df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce') + + if _df['time'].isna().any(): + _df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %H:%M:%S', errors='coerce') + + _df = _df.set_index('time') + + _df = _df.loc[~_df.index.duplicated() & _df.index.notna()] + + _df.index = _df.index.round('1h') + + _df = _df.rename(columns={ + 'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC', + 'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC', + 'OC=TC-BC (ugC/LCm^3)': 'Optical_OC', + 'BC (ugC/LCm^3)': 'Optical_EC', + 'TC (ugC/LCm^3)': 'TC', + + 'OC ugC/m^3 (Thermal/Optical)': 'Thermal_OC', + 'EC ugC/m^3 (Thermal/Optical)': 'Thermal_EC', + 'OC by diff ugC (TC-OptEC)': 'Optical_OC', + 'OptEC ugC/m^3': 'Optical_EC', + 'TC ugC/m^3': 'TC', + + 'Sample Volume Local Condition Actual m^3': 'Sample_Volume', + + 'OCPk1-ug C': 'OC1_raw', + 'OCPk2-ug C': 'OC2_raw', + 'OCPk3-ug C': 'OC3_raw', + 'OCPk4-ug C': 'OC4_raw', + 'Pyrolized C ug': 'PC_raw', + + 'ECPk1-ug C': 'EC1_raw', + 'ECPk2-ug C': 'EC2_raw', + 'ECPk3-ug C': 'EC3_raw', + 'ECPk4-ug C': 'EC4_raw', + 'ECPk5-ug C': 'EC5_raw', + }) + + _df = _df.apply(to_numeric, errors='coerce') + + _df['OC1'] = _df['OC1_raw'] / _df['Sample_Volume'] + _df['OC2'] = _df['OC2_raw'] / _df['Sample_Volume'] + _df['OC3'] = _df['OC3_raw'] / _df['Sample_Volume'] + _df['OC4'] = _df['OC4_raw'] / _df['Sample_Volume'] + + _df['PC'] = _df['Thermal_OC'] - _df['OC1'] - _df['OC2'] - _df['OC3'] - _df['OC4'] + + # _df['EC1'] = _df['EC1_raw'] / _df['Sample_Volume'] + # _df['EC2'] = _df['EC2_raw'] / _df['Sample_Volume'] + # _df['EC3'] = _df['EC3_raw'] / _df['Sample_Volume'] + # _df['EC4'] = _df['EC4_raw'] / _df['Sample_Volume'] + # _df['EC5'] = _df['EC5_raw'] / _df['Sample_Volume'] + + _df = _df[['Thermal_OC', 'Thermal_EC', 'Optical_OC', 'Optical_EC', 'TC', 'Sample_Volume', + 'OC1', 'OC2', 'OC3', 'OC4', 'PC']] + + return _df.loc[~_df.index.duplicated() & _df.index.notna()] + + # QC data + def _QC(self, _df): + MDL = {'Thermal_OC': 0.3, + 'Optical_OC': 0.3, + 'Thermal_EC': 0.015, + 'Optical_EC': 0.015 + } + + _index = _df.index.copy() + + _df = _df.mask((_df <= -5) | (_df > 100)) + + for col, threshold in MDL.items(): + _df.loc[_df[col] <= threshold, col] = np.nan + + # use IQR_QC + _df = self.time_aware_IQR_QC(_df) + + return _df.dropna(subset=['Thermal_OC', 'Optical_OC']).reindex(_index) diff --git a/AeroViz/rawDataReader/script/OCEC_LCRES.py b/AeroViz/rawDataReader/script/OCEC_LCRES.py deleted file mode 100644 index 15086cd..0000000 --- a/AeroViz/rawDataReader/script/OCEC_LCRES.py +++ /dev/null @@ -1,34 +0,0 @@ -from pandas import to_datetime, read_csv - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'OCEC_LCRES' - - def _raw_reader(self, _file): - with open(_file, 'r', encoding='utf-8', errors='ignore') as f: - _df = read_csv(f, skiprows=3) - - _col = {'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC', - 'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC', - 'OC=TC-BC (ugC/LCm^3)': 'Optical_OC', - 'BC (ugC/LCm^3)': 'Optical_EC', - 'Sample Volume Local Condition Actual m^3': 'Sample_Volume', - 'TC (ugC/LCm^3)': 'TC', } - - _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce') - _df['time'] = _tm_idx - - _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time') - - return _df[_col.keys()].rename(columns=_col) - - ## QC data - def _QC(self, _df): - _df[['Thermal_OC', 'Optical_OC']] = _df[['Thermal_OC', 'Optical_OC']].where( - _df[['Thermal_OC', 'Optical_OC']] > 0.3).copy() - _df[['Thermal_EC', 'Optical_EC']] = _df[['Thermal_EC', 'Optical_EC']].where( - _df[['Thermal_EC', 'Optical_EC']] > .015).copy() - - return _df diff --git a/AeroViz/rawDataReader/script/OCEC_RES.py b/AeroViz/rawDataReader/script/OCEC_RES.py deleted file mode 100644 index 2faeeb7..0000000 --- a/AeroViz/rawDataReader/script/OCEC_RES.py +++ /dev/null @@ -1,28 +0,0 @@ -from pandas import to_datetime, read_csv - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'OCEC_RES' - - def _raw_reader(self, _file): - with open(_file, 'r', encoding='utf-8', errors='ignore') as f: - _df = read_csv(f, skiprows=3) - - _col = {'OCPk1-ug C': 'OC1', - 'OCPk2-ug C': 'OC2', - 'OCPk3-ug C': 'OC3', - 'OCPk4-ug C': 'OC4', - 'Pyrolized C ug': 'PC', } - - _tm_idx = to_datetime(_df['Start Date/Time'], errors='coerce') - _df['time'] = _tm_idx - - _df = _df.dropna(subset='time').loc[~_tm_idx.duplicated()].set_index('time') - - return _df[_col.keys()].rename(columns=_col) - - ## QC data - def _QC(self, _df): - return _df.where(_df > 0) diff --git a/AeroViz/rawDataReader/script/SMPS.py b/AeroViz/rawDataReader/script/SMPS.py new file mode 100644 index 0000000..5620af7 --- /dev/null +++ b/AeroViz/rawDataReader/script/SMPS.py @@ -0,0 +1,83 @@ +import csv + +import numpy as np +from pandas import to_datetime, to_numeric, read_csv + +from AeroViz.rawDataReader.core import AbstractReader + + +def find_header_row(file_obj, delimiter): + csv_reader = csv.reader(file_obj, delimiter=delimiter) + for skip, row in enumerate(csv_reader): + if row and (row[0] in ['Sample #', 'Scan Number']): + return skip + raise ValueError("Header row not found") + + +def parse_date(df, date_format): + if 'Date' in df.columns and 'Start Time' in df.columns: + return to_datetime(df['Date'] + ' ' + df['Start Time'], format=date_format, errors='coerce') + elif 'DateTime Sample Start' in df.columns: + return to_datetime(df['DateTime Sample Start'], format=date_format, errors='coerce') + else: + raise ValueError("Expected date columns not found") + + +class Reader(AbstractReader): + nam = 'SMPS' + + def _raw_reader(self, file): + with open(file, 'r', encoding='utf-8', errors='ignore') as f: + if file.suffix.lower() == '.txt': + delimiter, date_formats = '\t', ['%m/%d/%y %X', '%m/%d/%Y %X'] + else: # csv + delimiter, date_formats = ',', ['%d/%m/%Y %X'] + + skip = find_header_row(f, delimiter) + f.seek(0) + + _df = read_csv(f, sep=delimiter, skiprows=skip) + + for date_format in date_formats: + _time_index = parse_date(_df, date_format) + if not _time_index.isna().all(): + break + else: + raise ValueError("Unable to parse dates with given formats") + + # sequence the data + numeric_cols = [col for col in _df.columns if col.strip().replace('.', '').isdigit()] + numeric_cols.sort(key=lambda x: float(x.strip())) + + _df.index = _time_index + _df.index.name = 'time' + + _df_smps = _df[numeric_cols] + _df_smps.columns = _df_smps.columns.astype(float) + _df_smps = _df_smps.loc[_df_smps.index.dropna().copy()] + + if _df_smps.columns[0] != self.size_range[0] or _df_smps.columns[-1] != self.size_range[1]: + self.logger.info(f'\tSMPS file: {file.name} is not match the default size range {self.size_range}, ' + f'it is ({_df_smps.columns[0]}, {_df_smps.columns[-1]})') + + return _df_smps.apply(to_numeric, errors='coerce') + + # QC data + def _QC(self, _df): + size_range_mask = (_df.columns.astype(float) >= self.size_range[0]) & ( + _df.columns.astype(float) <= self.size_range[1]) + _df = _df.loc[:, size_range_mask] + + # mask out the data size lower than 7 + _df['total'] = _df.sum(axis=1, min_count=1) * (np.diff(np.log(_df.keys().to_numpy(float)))).mean() + _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill() + _df = _df.mask(_df_size < 7) + + # remove total conc. lower than 2000 + _df = _df.mask(_df['total'] < 2000) + + # remove the bin over 400 nm which num. conc. larger than 4000 + _df_remv_ky = _df.keys()[:-1][_df.keys()[:-1] >= 400.] + _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.) + + return _df[_df.keys()[:-1]] diff --git a/AeroViz/rawDataReader/script/SMPS_TH.py b/AeroViz/rawDataReader/script/SMPS_TH.py deleted file mode 100644 index f7e1bb8..0000000 --- a/AeroViz/rawDataReader/script/SMPS_TH.py +++ /dev/null @@ -1,41 +0,0 @@ -from pandas import to_datetime, read_table - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'SMPS_TH' - - def _raw_reader(self, _file): - with open(_file, 'r', encoding='utf-8', errors='ignore') as f: - _df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time') - _key = list(_df.keys()[6:-26]) - - _newkey = {} - for _k in _key: - _newkey[_k] = float(_k).__round__(4) - - # _newkey['Total Conc.(#/cm)'] = 'total' - # _newkey['Mode(nm)'] = 'mode' - - _df_idx = to_datetime(_df.index, errors='coerce') - return _df[_newkey.keys()].rename(_newkey, axis=1).set_index(_df_idx).loc[_df_idx.dropna()] - - ## QC data - def _QC(self, _df): - import numpy as n - - ## mask out the data size lower than 7 - _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean() - _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill() - _df = _df.mask(_df_size < 7) - - ## remove total conc. lower than 2000 - _df = _df.mask(_df['total'] < 2000) - - ## remove the bin over 400 nm which num. conc. larger than 4000 - _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.] - - _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.) - - return _df[_df.keys()[:-1]] diff --git a/AeroViz/rawDataReader/script/SMPS_aim11.py b/AeroViz/rawDataReader/script/SMPS_aim11.py deleted file mode 100644 index 2ccb72c..0000000 --- a/AeroViz/rawDataReader/script/SMPS_aim11.py +++ /dev/null @@ -1,51 +0,0 @@ -from pandas import to_datetime, read_csv, to_numeric - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'SMPS_aim11' - - def _raw_reader(self, _file): - with open(_file, 'r', encoding='utf-8', errors='ignore') as f: - - skiprows = 0 - for _line in f: - - if _line.split(',')[0] == 'Scan Number': - f.seek(0) - break - - skiprows += 1 - # breakpoint() - _df = read_csv(f, skiprows=skiprows) - _tm_idx = to_datetime(_df['DateTime Sample Start'], format='%d/%m/%Y %X', errors='coerce') - - ## index - _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()] - - ## keys - _key = to_numeric(_df.keys(), errors='coerce') - _df.columns = _key - _df = _df.loc[:, ~_key.isna()] - - return _df.apply(to_numeric, errors='coerce') - - ## QC data - def _QC(self, _df): - import numpy as n - - ## mask out the data size lower than 7 - _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean() - _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill() - _df = _df.mask(_df_size < 7) - - ## remove total conc. lower than 2000 - _df = _df.mask(_df['total'] < 2000) - - ## remove the bin over 400 nm which num. conc. larger than 4000 - _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.] - - _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.) - - return _df[_df.keys()[:-1]] diff --git a/AeroViz/rawDataReader/script/SMPS_genr.py b/AeroViz/rawDataReader/script/SMPS_genr.py deleted file mode 100644 index 14cf08f..0000000 --- a/AeroViz/rawDataReader/script/SMPS_genr.py +++ /dev/null @@ -1,51 +0,0 @@ -from pandas import to_datetime, read_table, to_numeric - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'SMPS_genr' - - def _raw_reader(self, _file): - with open(_file, 'r', encoding='utf-8', errors='ignore') as f: - - skiprows = 0 - for _line in f: - - if _line.split('\t')[0] == 'Sample #': - f.seek(0) - break - - skiprows += 1 - - _df = read_table(f, skiprows=skiprows) - _tm_idx = to_datetime(_df['Date'] + _df['Start Time'], format='%m/%d/%y%X', errors='coerce') - - ## index - _df = _df.set_index(_tm_idx).loc[_tm_idx.dropna()] - - ## keys - _key = to_numeric(_df.keys(), errors='coerce') - _df.columns = _key - _df = _df.loc[:, ~_key.isna()] - - return _df.apply(to_numeric, errors='coerce') - - ## QC data - def _QC(self, _df): - import numpy as n - - ## mask out the data size lower than 7 - _df['total'] = _df.sum(axis=1, min_count=1) * (n.diff(n.log(_df.keys().to_numpy(float)))).mean() - _df_size = _df['total'].dropna().resample('1h').size().resample(_df.index.freq).ffill() - _df = _df.mask(_df_size < 7) - - ## remove total conc. lower than 2000 - _df = _df.mask(_df['total'] < 2000) - - ## remove the bin over 400 nm which num. conc. larger than 4000 - _df_remv_ky = _df.keys()[:-2][_df.keys()[:-2] >= 400.] - - _df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.) - - return _df[_df.keys()[:-1]] diff --git a/AeroViz/rawDataReader/script/TEOM.py b/AeroViz/rawDataReader/script/TEOM.py index f622ac8..a996444 100644 --- a/AeroViz/rawDataReader/script/TEOM.py +++ b/AeroViz/rawDataReader/script/TEOM.py @@ -1,46 +1,52 @@ -from pandas import to_datetime, read_csv +from pandas import to_datetime, read_csv, Timedelta, to_numeric from AeroViz.rawDataReader.core import AbstractReader class Reader(AbstractReader): - nam = 'TEOM' + nam = 'TEOM' - def _raw_reader(self, _file): - with open(_file, 'r', encoding='utf-8', errors='ignore') as f: - _df = read_csv(f, skiprows=3, index_col=False) + def _raw_reader(self, file): + with open(file, 'r', encoding='utf-8', errors='ignore') as f: + _df = read_csv(f, skiprows=3, index_col=False) - _df = _df.rename(columns={'Time Stamp': 'time', - 'System status': 'status', - 'PM-2.5 base MC': 'PM_NV', - 'PM-2.5 MC': 'PM_Total', - 'PM-2.5 TEOM noise': 'noise', }) + _df = _df.rename(columns={'Time Stamp': 'time', + 'System status': 'status', + 'PM-2.5 base MC': 'PM_NV', + 'PM-2.5 MC': 'PM_Total', + 'PM-2.5 TEOM noise': 'noise', }) - _time_replace = {'十一月': '11', '十二月': '12', '一月': '01', '二月': '02', '三月': '03', '四月': '04', - '五月': '05', '六月': '06', '七月': '07', '八月': '08', '九月': '09', '十月': '10'} + _time_replace = {'十一月': '11', '十二月': '12', '一月': '01', '二月': '02', '三月': '03', '四月': '04', + '五月': '05', '六月': '06', '七月': '07', '八月': '08', '九月': '09', '十月': '10'} - _tm_idx = _df.time - for _ori, _rpl in _time_replace.items(): - _tm_idx = _tm_idx.str.replace(_ori, _rpl) + _tm_idx = _df.time + for _ori, _rpl in _time_replace.items(): + _tm_idx = _tm_idx.str.replace(_ori, _rpl) - _df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X')) + _df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X')) - _df = _df.where(_df['status'] < 1e-7) + _df = _df.where(_df['status'] < 1) - return _df[['PM_NV', 'PM_Total', 'noise', ]] + _df = _df[['PM_NV', 'PM_Total', 'noise']].apply(to_numeric, errors='coerce') - ## QC data - def _QC(self, _df): + return _df.loc[~_df.index.duplicated() & _df.index.notna()] - _df_idx = _df.index.copy() + # QC data + def _QC(self, _df): + _index = _df.index.copy() - ## remove negative value - _df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df < 0).copy()) + # remove negative value + _df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0)) - ## QC data in 1 hr - ## remove data where size < 8 in 1-hr - for _key in ['PM_Total', 'PM_NV']: - _size = _df[_key].dropna().resample('1h').size().reindex(_df_idx).ffill().copy() - _df[_key] = _df[_key].mask(_size < 8) + # QC data in 1 hr + # use time_aware_IQR_QC + _df = self.time_aware_IQR_QC(_df, time_window='6h') - return _df.reindex(_df_idx) + # remove data where size < 50% in 1-hr + points_per_hour = Timedelta('1h') / Timedelta(self.meta['freq']) + for _key in ['PM_Total', 'PM_NV']: + _size = _df[_key].dropna().resample('1h').size().reindex(_index).ffill() + _df[_key] = _df[_key].mask(_size < points_per_hour * 0.5) + + # make sure all columns have values, otherwise set to nan + return _df.dropna(how='any').reindex(_index) diff --git a/AeroViz/rawDataReader/script/Table.py b/AeroViz/rawDataReader/script/Table.py deleted file mode 100644 index efcf547..0000000 --- a/AeroViz/rawDataReader/script/Table.py +++ /dev/null @@ -1,28 +0,0 @@ -# read meteorological data from google sheet - - -from pandas import read_csv, to_datetime - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'Table' - - def _raw_reader(self, _file): - self.meta['freq'] = self._oth_set.get('data_freq') or self.meta['freq'] - - with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f: - _df = read_csv(f, low_memory=False, index_col=0) - - _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed') - _df.index.name = 'time' - - _df.columns = _df.keys().str.strip(' ') - - _df = _df.loc[_df.index.dropna()].copy() - - return _df.loc[~_df.index.duplicated()] - - def _QC(self, _df): - return _df diff --git a/AeroViz/rawDataReader/script/VOC.py b/AeroViz/rawDataReader/script/VOC.py new file mode 100644 index 0000000..1a19ac8 --- /dev/null +++ b/AeroViz/rawDataReader/script/VOC.py @@ -0,0 +1,33 @@ +from pandas import read_csv + +from AeroViz.rawDataReader.core import AbstractReader + + +class Reader(AbstractReader): + nam = 'VOC' + + def _raw_reader(self, file): + with file.open('r', encoding='utf-8-sig', errors='ignore') as f: + _df = read_csv(f, parse_dates=True, index_col=0, na_values=('-', 'N.D.')) + + _df.columns = _df.keys().str.strip(' ') + _df.index.name = 'time' + + support_voc = set(self.meta["key"]) + + valid_keys = [key for key in _df.keys() if key in support_voc] + invalid_keys = [key for key in _df.keys() if key not in support_voc] + + if invalid_keys: + self.logger.warning(f'{invalid_keys} are not supported keys.') + print(f'\n\t{invalid_keys} are not supported keys.' + f'\n\tPlease check the\033[91m support_voc.md\033[0m file to use the correct name.') + + if valid_keys: + return _df[valid_keys].loc[~_df.index.duplicated() & _df.index.notna()] + else: + self.logger.warning("沒有找到匹配的鍵。返回原始DataFrame。") + return _df.loc[~_df.index.duplicated() & _df.index.notna()] + + def _QC(self, _df): + return _df diff --git a/AeroViz/rawDataReader/script/VOC_TH.py b/AeroViz/rawDataReader/script/VOC_TH.py deleted file mode 100644 index e31c53c..0000000 --- a/AeroViz/rawDataReader/script/VOC_TH.py +++ /dev/null @@ -1,30 +0,0 @@ -# read meteorological data from google sheet - - -from pandas import read_csv - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'VOC_TH' - - def _raw_reader(self, _file): - _keys = ['Isopentane', 'Hexane', '2-Methylhexane', '3-Methylhexane', '2-Methylheptane', '3-Methylheptane', - 'Propene', '1.3-Butadiene', 'Isoprene', '1-Octene', - 'Benzene', 'Toluene', 'Ethylbenzene', 'm.p-Xylene', 'o-Xylene', 'Iso-Propylbenzene', 'Styrene', - 'n-Propylbenzene', '3.4-Ethyltoluene', '1.3.5-TMB', '2-Ethyltoluene', '1.2.4-TMB', '1.2.3-TMB', - 'Acetaldehyde', 'Ethanol', 'Acetone', 'IPA', 'Ethyl Acetate', 'Butyl Acetate', - 'VCM', 'TCE', 'PCE', '1.4-DCB', '1.2-DCB'] - - with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f: - _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-', 'N.D.']) - - _df.columns = _df.keys().str.strip(' ') - _df.index.name = 'time' - - _df = _df[_keys].loc[_df.index.dropna()] - return _df.loc[~_df.index.duplicated()] - - def _QC(self, _df): - return _df diff --git a/AeroViz/rawDataReader/script/VOC_ZM.py b/AeroViz/rawDataReader/script/VOC_ZM.py deleted file mode 100644 index 6a4975d..0000000 --- a/AeroViz/rawDataReader/script/VOC_ZM.py +++ /dev/null @@ -1,37 +0,0 @@ -# read meteorological data from google sheet - - -from pandas import read_csv - -from AeroViz.rawDataReader.core import AbstractReader - - -class Reader(AbstractReader): - nam = 'VOC_ZM' - - def _raw_reader(self, _file): - _keys = ['Ethane', 'Propane', 'Isobutane', 'n-Butane', 'Cyclopentane', 'Isopentane', - 'n-Pentane', '2,2-Dimethylbutane', '2,3-Dimethylbutane', '2-Methylpentane', - '3-Methylpentane', 'n-Hexane', 'Methylcyclopentane', '2,4-Dimethylpentane', - 'Cyclohexane', '2-Methylhexane', '2-Methylhexane', '3-Methylheptane', - '2,2,4-Trimethylpentane', 'n-Heptane', 'Methylcyclohexane', - '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylhexane', 'n-Octane', - 'n-Nonane', 'n-Decane', 'n-Undecane', 'Ethylene', 'Propylene', 't-2-Butene', - '1-Butene', 'cis-2-Butene', 't-2-Pentene', '1-Pentene', 'cis-2-Pentene', - 'isoprene', 'Acetylene', 'Benzene', 'Toluene', 'Ethylbenzene', 'm,p-Xylene', - 'Styrene', 'o-Xylene', 'Isopropylbenzene', 'n-Propylbenzene', 'm-Ethyltoluene', - 'p-Ethyltoluene', '1,3,5-Trimethylbenzene', 'o-Ethyltoluene', - '1,2,4-Trimethylbenzene', '1,2,3-Trimethylbenzene', 'm-Diethylbenzene', - 'p-Diethylbenzene'] - - with (_file).open('r', encoding='utf-8-sig', errors='ignore') as f: - _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=['-']) - - _df.columns = _df.keys().str.strip(' ') - _df.index.name = 'time' - - _df = _df[_keys].loc[_df.index.dropna()] - return _df.loc[~_df.index.duplicated()] - - def _QC(self, _df): - return _df diff --git a/AeroViz/rawDataReader/script/XRF.py b/AeroViz/rawDataReader/script/XRF.py new file mode 100644 index 0000000..d87beda --- /dev/null +++ b/AeroViz/rawDataReader/script/XRF.py @@ -0,0 +1,11 @@ +from AeroViz.rawDataReader.core import AbstractReader + + +class Reader(AbstractReader): + nam = 'XRF' + + def _raw_reader(self, file): + pass + + def _QC(self, _df): + pass diff --git a/AeroViz/rawDataReader/script/__init__.py b/AeroViz/rawDataReader/script/__init__.py index 049ad2c..ae1869d 100644 --- a/AeroViz/rawDataReader/script/__init__.py +++ b/AeroViz/rawDataReader/script/__init__.py @@ -1,22 +1,18 @@ __all__ = [ - 'NEPH', - 'Aurora', - 'Table', - 'EPA_vertical', - 'APS_3321', - 'SMPS_TH', - 'AE33', - 'AE43', - 'BC1054', - 'MA350', - 'TEOM', - 'OCEC_RES', - 'OCEC_LCRES', - 'IGAC_TH', - 'IGAC_ZM', - 'VOC_TH', - 'VOC_ZM', - 'SMPS_genr', - 'SMPS_aim11', - 'GRIMM' + 'NEPH', + 'Aurora', + 'SMPS', + 'APS_3321', + 'GRIMM', + 'AE33', + 'AE43', + 'BC1054', + 'MA350', + 'TEOM', + 'OCEC', + 'IGAC', + 'XRF', + 'VOC', + 'EPA', + 'Minion' ] diff --git a/AeroViz/rawDataReader/utils/config.py b/AeroViz/rawDataReader/utils/config.py deleted file mode 100644 index 27bd37b..0000000 --- a/AeroViz/rawDataReader/utils/config.py +++ /dev/null @@ -1,169 +0,0 @@ -# Description: Configuration file for rawDataReader - -instrument = [ - "NEPH", - "Aurora", - "AE33", - "AE43", - "Table", - "EPA_vertical", - "SMPS_NTU(SMPS_3080_3788)", - "SMPS_TH(SMPS_3080_3772)", - "APS_3321", - "TEOM", - "OCEC" -] - -meta = { - "NEPH": { - "pattern": "*.DAT", - "freq": "5min", - "deter_key": {"Scatter Coe. (550 nm)": ["G"]}, - }, - - "Aurora": { - "pattern": "*.csv", - "freq": "1min", - "deter_key": {"Scatter Coe. (550 nm)": ["G"]}, - }, - - "Table": { - "pattern": "*.csv", - "freq": "1h", - "deter_key": None, - }, - - "EPA_vertical": { - "pattern": "*.csv", - "freq": "1h", - "deter_key": None, - }, - - "SMPS_TH": { - "pattern": "*.txt", - "freq": "6min", - "deter_key": {"Bins": ["all"]}, - }, - - "SMPS_genr": { - "pattern": "*.txt", - "freq": "6min", - "deter_key": {"Bins": ["all"]}, - }, - - "SMPS_aim11": { - "pattern": "*.csv", - "freq": "6min", - "deter_key": {"Bins": ["all"]}, - }, - - "GRIMM": { - "pattern": "*.dat", - "freq": "6min", - "deter_key": {"Bins": ["all"]}, - }, - - "APS_3321": { - "pattern": "*.TXT", - "freq": "6min", - "deter_key": {"Bins": ["all"]}, - }, - - "AE33": { - "pattern": "[!ST|!CT|!FV]*[!log]_AE33*.dat", - "freq": "1min", - "deter_key": {"BC Mass Conc. (880 nm)": ["BC6"]}, - }, - - "AE43": { - "pattern": "[!ST|!CT|!FV]*[!log]_AE43*.dat", - "freq": "1min", - "deter_key": {"BC Mass Conc. (880 nm)": ["BC6"]}, - }, - - "BC1054": { - "pattern": "*.csv", - "freq": "1min", - "deter_key": {"BC Mass Conc. (880 nm)": ["BC6"]}, - }, - - "MA350": { - "pattern": "*.csv", - "freq": "1min", - "deter_key": {"BC Mass Conc. (880 nm)": ["BC5"]}, - }, - - "TEOM": { - "pattern": "*.csv", - "freq": "6min", - "deter_key": { - "PM1.0 Mass Conc.": ["PM_Total"], - "PM1.0 NV Mass Conc.": ["PM_NV"], - }, - }, - - "OCEC_LCRES": { - "pattern": "*LCRes.csv", - "freq": "1h", - "deter_key": { - "Thermal OC/EC": ["Thermal_EC", "Thermal_OC"], - "Thermal OC": ["Thermal_OC"], - "Thermal EC": ["Thermal_EC"], - "Optical OC/EC": ["Optical_EC", "Optical_OC"], - "Optical OC": ["Optical_OC"], - "Optical EC": ["Optical_EC"], - }, - }, - - "OCEC_RES": { - "pattern": "*[!LC|!Blanks]Res.csv", - "freq": "1h", - "deter_key": None, - }, - - "IGAC_TH": { - "pattern": "*.csv", - "freq": "1h", - "deter_key": { - "Na+": ["Na+"], - "NH4+": ["NH4+"], - "K+": ["K+"], - "Mg2+": ["Mg2+"], - "Ca2+": ["Ca2+"], - "Cl-": ["Cl-"], - "NO2-": ["NO2-"], - "NO3-": ["NO3-"], - "SO42-": ["SO42-"], - "Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"], - }, - }, - - "IGAC_ZM": { - "pattern": "*.csv", - "freq": "1h", - "deter_key": {"Na+": ["Na+"], - "NH4+": ["NH4+"], - "K+": ["K+"], - "Mg2+": ["Mg2+"], - "Ca2+": ["Ca2+"], - "Cl-": ["Cl-"], - "NO2-": ["NO2-"], - "NO3-": ["NO3-"], - "SO42-": ["SO42-"], - "Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"], - }, - }, - - "VOC_TH": { - "pattern": "*.csv", - "freq": "1h", - "deter_key": None, - }, - - "VOC_ZM": { - "pattern": "*.csv", - "freq": "1h", - "deter_key": None, - }, - -} diff --git a/AeroViz/tools/__init__.py b/AeroViz/tools/__init__.py index f917fe6..3b64717 100644 --- a/AeroViz/tools/__init__.py +++ b/AeroViz/tools/__init__.py @@ -1,3 +1,2 @@ from .database import DataBase from .dataclassifier import DataClassifier -from .datareader import DataReader diff --git a/AeroViz/tools/database.py b/AeroViz/tools/database.py index 0b81fcb..c058d7d 100644 --- a/AeroViz/tools/database.py +++ b/AeroViz/tools/database.py @@ -6,92 +6,90 @@ def load_default_chemical_data(): - # The following data is from the chemical composition of real atmospheric particles. - # - # The six main chemical components that comprised PM2.5 are listed in the data. - # Here, we test the radar charts to see if we can clearly identify how the - # chemical components vary between the three pollutant scenarios: - # - # 1) Whole sampling period (Total) - # 2) Clean period (Clean) - # 3) Transition period (Transition) - # 4) Event period (Event) - - data = { - 'Sulfate': [0.01, 0.34, 0.02, 0.71], - 'Nitrate': [0.88, 0.13, 0.34, 0.13], - 'OC': [0.07, 0.95, 0.04, 0.05], - 'EC': [0.20, 0.02, 0.85, 0.19], - 'Soil': [0.20, 0.10, 0.07, 0.01], - 'SS': [0.20, 0.10, 0.07, 0.01] - } - - return DataFrame(data, index=['Total', 'Clean', 'Transition', 'Event']) + # The following data is from the chemical composition of real atmospheric particles. + # + # The six main chemical components that comprised PM2.5 are listed in the data. + # Here, we test the radar charts to see if we can clearly identify how the + # chemical components vary between the three pollutant scenarios: + # + # 1) Whole sampling period (Total) + # 2) Clean period (Clean) + # 3) Transition period (Transition) + # 4) Event period (Event) + + data = { + 'Sulfate': [0.01, 0.34, 0.02, 0.71], + 'Nitrate': [0.88, 0.13, 0.34, 0.13], + 'OC': [0.07, 0.95, 0.04, 0.05], + 'EC': [0.20, 0.02, 0.85, 0.19], + 'Soil': [0.20, 0.10, 0.07, 0.01], + 'SS': [0.20, 0.10, 0.07, 0.01] + } + + return DataFrame(data, index=['Total', 'Clean', 'Transition', 'Event']) def load_dataset_by_url(dataset_name: Literal["Tunghai", "Taipei"] = "Tunghai") -> DataFrame: - import requests - dataset_uris = { - "Tunghai": "https://raw.githubusercontent.com/alex870521/DataPlot/main/DataPlot/config/default_data.csv" - } + import requests + dataset_uris = { + "Tunghai": "https://raw.githubusercontent.com/alex870521/DataPlot/main/DataPlot/config/default_data.csv" + } - # Ensure the dataset name is valid - if dataset_name not in dataset_uris: - raise ValueError(f"Dataset {dataset_name} is not supported.") + # Ensure the dataset name is valid + if dataset_name not in dataset_uris: + raise ValueError(f"Dataset {dataset_name} is not supported.") - url = dataset_uris[dataset_name] + url = dataset_uris[dataset_name] - # Make a request to the URL - response = requests.get(url) + # Make a request to the URL + response = requests.get(url) - if response.status_code == 200: - return read_csv(StringIO(response.text), parse_dates=['Time'], index_col='Time') - else: - print(f"Failed to download file: {response.status_code}") - print(response.text) # Print the response text for debugging - return DataFrame() # Return an empty DataFrame in case of failure + if response.status_code == 200: + return read_csv(StringIO(response.text), na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, + parse_dates=True, low_memory=False) + else: + print(f"Failed to download file: {response.status_code}") + print(response.text) # Print the response text for debugging + return DataFrame() # Return an empty DataFrame in case of failure def load_dataset_local(dataset_name: Literal["Tunghai", "Taipei", "PNSD"] = "Tunghai") -> DataFrame: - base_dir = Path(__file__).resolve().parent.parent - config_dir = base_dir / 'config' + base_dir = Path(__file__).resolve().parent.parent + config_dir = base_dir / 'data' - dataset_paths = { - "Tunghai": config_dir / 'DEFAULT_DATA.csv', - "Taipei": config_dir / 'DEFAULT_DATA.csv', - "PNSD": config_dir / 'DEFAULT_PNSD_DATA.csv' - } + dataset_paths = { + "Tunghai": config_dir / 'DEFAULT_DATA.csv', + "Taipei": config_dir / 'DEFAULT_DATA.csv', + "PNSD": config_dir / 'DEFAULT_PNSD_DATA.csv' + } - if dataset_name not in dataset_paths: - raise ValueError(f"Dataset {dataset_name} is not supported.") + if dataset_name not in dataset_paths: + raise ValueError(f"Dataset {dataset_name} is not supported.") - file_path = dataset_paths[dataset_name] + file_path = dataset_paths[dataset_name] - if not file_path.exists(): - raise FileNotFoundError(f"The file {file_path} does not exist.") + if not file_path.exists(): + raise FileNotFoundError(f"The file {file_path} does not exist.") - return read_csv(file_path, parse_dates=['Time'], index_col='Time', na_values=('-', 'E', 'F'), low_memory=False) + return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True, + low_memory=False) class DataBase: - def __new__(cls, file_path: Path | str = None, load_data: bool = False, load_PSD: bool = False): - print(f'\t\t \033[96m --- Loading Data --- \033[0m') - if file_path is not None: - file_path = Path(file_path) - if file_path.exists(): - return read_csv(file_path, parse_dates=['Time'], index_col='Time', na_values=('-', 'E', 'F'), - low_memory=False) + def __new__(cls, file_path: Path | str = None, load_data: bool = False, load_PSD: bool = False): + print(f'Loading:\033[96m Default Data\033[0m') + if file_path is not None: + file_path = Path(file_path) + if file_path.exists(): + return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True, + low_memory=False) - if load_data ^ load_PSD: - if load_data: - return load_dataset_local("Tunghai") + if load_data ^ load_PSD: + return load_dataset_local("Tunghai") if load_data else load_dataset_local("PNSD") - elif load_PSD: - return load_dataset_local("PNSD") - - else: - raise ValueError("Exactly one of 'load_data' or 'load_PSD' must be True.") + else: + raise ValueError("Exactly one of 'load_data' or 'load_PSD' must be True.") if __name__ == '__main__': - df = DataBase("Tunghai") + df = DataBase("Tunghai") diff --git a/AeroViz/tools/dataclassifier.py b/AeroViz/tools/dataclassifier.py index 1646b41..2e8cd34 100644 --- a/AeroViz/tools/dataclassifier.py +++ b/AeroViz/tools/dataclassifier.py @@ -6,112 +6,112 @@ class Classifier: - Seasons = {'2020-Summer': (datetime(2020, 9, 4), datetime(2020, 9, 21, 23)), - '2020-Autumn': (datetime(2020, 9, 22), datetime(2020, 12, 29, 23)), - '2020-Winter': (datetime(2020, 12, 30), datetime(2021, 3, 25, 23)), - '2021-Spring': (datetime(2021, 3, 26), datetime(2021, 5, 6, 23))} - - # '2021-Summer': (datetime(2021, 5, 7), datetime(2021, 10, 16, 23)) - # '2021-Autumn': (datetime(2021, 10, 17), datetime(2021, 12, 31, 23)) - - @classmethod - def classify(cls, df) -> DataFrame: - df = cls.classify_by_diurnal(df) - df = cls.classify_by_state(df) - df = cls.classify_by_season(df) - df = cls.classify_by_season_state(df) - - return df - - @classmethod - def classify_by_diurnal(cls, df): - df['Hour'] = df.index.hour - df['Diurnal'] = df['Hour'].apply(cls.map_diurnal) - return df - - @classmethod - def classify_by_state(cls, df): - df['State'] = df.apply(cls.map_state, axis=1, clean_bound=df.Extinction.quantile(0.2), - event_bound=df.Extinction.quantile(0.8)) - return df - - @classmethod - def classify_by_season(cls, df): - for season, (season_start, season_end) in cls.Seasons.items(): - df.loc[season_start:season_end, 'Season'] = season - return df - - @classmethod - def classify_by_season_state(cls, df): - for _grp, _df in df.groupby('Season'): - df['Season_State'] = df.apply(cls.map_state, axis=1, clean_bound=_df.Extinction.quantile(0.2), - event_bound=_df.Extinction.quantile(0.8)) - return df - - @staticmethod - def map_diurnal(hour): - return 'Day' if 7 <= hour <= 18 else 'Night' - - @staticmethod - def map_state(row, clean_bound, event_bound): - return 'Event' if row['Extinction'] >= event_bound else 'Clean' if row[ - 'Extinction'] < clean_bound else 'Transition' + Seasons = {'2020-Summer': (datetime(2020, 9, 4), datetime(2020, 9, 21, 23)), + '2020-Autumn': (datetime(2020, 9, 22), datetime(2020, 12, 29, 23)), + '2020-Winter': (datetime(2020, 12, 30), datetime(2021, 3, 25, 23)), + '2021-Spring': (datetime(2021, 3, 26), datetime(2021, 5, 6, 23))} + + # '2021-Summer': (datetime(2021, 5, 7), datetime(2021, 10, 16, 23)) + # '2021-Autumn': (datetime(2021, 10, 17), datetime(2021, 12, 31, 23)) + + @classmethod + def classify(cls, df) -> DataFrame: + df = cls.classify_by_diurnal(df) + df = cls.classify_by_state(df) + df = cls.classify_by_season(df) + df = cls.classify_by_season_state(df) + + return df + + @classmethod + def classify_by_diurnal(cls, df): + df['Hour'] = df.index.hour + df['Diurnal'] = df['Hour'].apply(cls.map_diurnal) + return df + + @classmethod + def classify_by_state(cls, df): + df['State'] = df.apply(cls.map_state, axis=1, clean_bound=df.Extinction.quantile(0.2), + event_bound=df.Extinction.quantile(0.8)) + return df + + @classmethod + def classify_by_season(cls, df): + for season, (season_start, season_end) in cls.Seasons.items(): + df.loc[season_start:season_end, 'Season'] = season + return df + + @classmethod + def classify_by_season_state(cls, df): + for _grp, _df in df.groupby('Season'): + df['Season_State'] = df.apply(cls.map_state, axis=1, clean_bound=_df.Extinction.quantile(0.2), + event_bound=_df.Extinction.quantile(0.8)) + return df + + @staticmethod + def map_diurnal(hour): + return 'Day' if 7 <= hour <= 18 else 'Night' + + @staticmethod + def map_state(row, clean_bound, event_bound): + return 'Event' if row['Extinction'] >= event_bound else 'Clean' if row[ + 'Extinction'] < clean_bound else 'Transition' class DataClassifier(Classifier): - """ - Notes - ----- - First, create group then return the selected statistic method. - If the 'by' does not exist in DataFrame, import the default DataFrame to help to sign the different group. - - """ - - def __new__(cls, - df: DataFrame, - by: Literal["Hour", "State", "Season", "Season_state"] | str, - df_support: DataFrame | Series = None, - cut_bins: Sequence = None, - qcut: int = None, - labels: list[str] = None - ) -> tuple[DataFrame, DataFrame]: - group = cls._group_data(df, by, df_support, cut_bins, qcut, labels) - return cls._compute_statistics(df, group) - - @staticmethod - def _group_data(df, by, df_support, cut_bins, qcut, labels): - if by not in df.columns: - if df_support is None: - raise KeyError(f"Column '{by}' does not exist in DataFrame." - f"Please provide a support DataFrame or Series to help classify.") - else: - df = concat([df, Classifier.classify(df_support.copy())[by]], axis=1) - - if cut_bins is not None: - df[f'{by}_cut'] = pd.cut(df.loc[:, f'{by}'], cut_bins, - labels=labels or (cut_bins + (cut_bins[1] - cut_bins[0]) / 2)[:-1]) - return df.groupby(f'{by}_cut', observed=False) - - elif qcut is not None: - df[f'{by}_qcut'] = pd.qcut(df.loc[:, f'{by}'], q=qcut, labels=labels) - return df.groupby(f'{by}_qcut', observed=False) - - else: - if by == 'State': - return df.groupby(by) - - elif by == 'Season': - return df.groupby(pd.Categorical(df['Season'], categories=['2020-Summer', '2020-Autumn', '2020-Winter', - '2021-Spring']), observed=False) - else: - return df.groupby(by, observed=False) - - @staticmethod - def _compute_statistics(df, group): - mean_df = group.mean(numeric_only=True) - mean_df.loc['Total'] = df.mean(numeric_only=True) - - std_df = group.std(numeric_only=True) - std_df.loc['Total'] = df.std(numeric_only=True) - - return mean_df, std_df + """ + Notes + ----- + First, create group then return the selected statistic method. + If the 'by' does not exist in DataFrame, import the default DataFrame to help to sign the different group. + + """ + + def __new__(cls, + df: DataFrame, + by: Literal["Hour", "State", "Season", "Season_state"] | str, + df_support: DataFrame | Series = None, + cut_bins: Sequence = None, + qcut: int = None, + labels: list[str] = None + ) -> tuple[DataFrame, DataFrame]: + group = cls._group_data(df, by, df_support, cut_bins, qcut, labels) + return cls._compute_statistics(df, group) + + @staticmethod + def _group_data(df, by, df_support, cut_bins, qcut, labels): + if by not in df.columns: + if df_support is None: + raise KeyError(f"Column '{by}' does not exist in DataFrame." + f"Please provide a support DataFrame or Series to help classify.") + else: + df = concat([df, Classifier.classify(df_support.copy())[by]], axis=1) + + if cut_bins is not None: + df[f'{by}_cut'] = pd.cut(df.loc[:, f'{by}'], cut_bins, + labels=labels or (cut_bins + (cut_bins[1] - cut_bins[0]) / 2)[:-1]) + return df.groupby(f'{by}_cut', observed=False) + + elif qcut is not None: + df[f'{by}_qcut'] = pd.qcut(df.loc[:, f'{by}'], q=qcut, labels=labels) + return df.groupby(f'{by}_qcut', observed=False) + + else: + if by == 'State': + return df.groupby(by) + + elif by == 'Season': + return df.groupby(pd.Categorical(df['Season'], categories=['2020-Summer', '2020-Autumn', '2020-Winter', + '2021-Spring']), observed=False) + else: + return df.groupby(by, observed=False) + + @staticmethod + def _compute_statistics(df, group): + mean_df = group.mean(numeric_only=True) + mean_df.loc['Total'] = df.mean(numeric_only=True) + + std_df = group.std(numeric_only=True) + std_df.loc['Total'] = df.std(numeric_only=True) + + return mean_df, std_df diff --git a/AeroViz/tools/dataprinter.py b/AeroViz/tools/dataprinter.py index 30a4758..f3b3470 100644 --- a/AeroViz/tools/dataprinter.py +++ b/AeroViz/tools/dataprinter.py @@ -5,54 +5,54 @@ def data_table(df: DataFrame, - items: list[str] | str = None, - times: list[datetime | Timestamp | str] = None, - ): - """ - This function cuts the DataFrame based on the given time periods and calculates the mean and standard deviation - of the specified items for each period. - - Parameters - ---------- - df : pd.DataFrame - The DataFrame to be processed. It should have a DateTime index. - items : list[str] | str, optional - The columns of the DataFrame to be processed. It can be a list of column names or a single column name. - By default, it is ['NO', 'NO2', 'NOx']. - times : list[str] | str, optional - The time periods to cut the DataFrame. It can be a list of time strings or a single time string. - Each time string should be in the format of 'YYYY-MM-DD'. By default, it is ['2024-03-21', '2024-04-30']. - - Returns - ------- - None - This function doesn't return any value. It prints out a table showing the mean and standard deviation - of the specified items for each time period. - """ - items = [items] if isinstance(items, str) else items - times = [times] if isinstance(times, str) else times - times = list(map(Timestamp, times)) - - times.sort() - - results = [] - periods = [] - for i in range(len(times) + 1): - if i == 0: - df_period = df.loc[df.index <= times[i], items] - period_label = f'Before {times[i].date()}' - elif i == len(times): - df_period = df.loc[df.index > times[i - 1], items] - period_label = f'After {times[i - 1].date()}' - else: - df_period = df.loc[(df.index > times[i - 1]) & (df.index <= times[i]), items] - period_label = f'{times[i - 1].date()} to {times[i].date()}' - - mean, std = df_period.mean().round(2).to_numpy(), df_period.std().round(2).to_numpy() - - results.append([f'{m} ± {s}' for m, s in zip(mean, std)]) - periods.append(period_label) - - result = DataFrame(results, columns=items, index=periods) - - print(tabulate(result, headers='keys', tablefmt='fancy_grid')) + items: list[str] | str = None, + times: list[datetime | Timestamp | str] = None, + ): + """ + This function cuts the DataFrame based on the given time periods and calculates the mean and standard deviation + of the specified items for each period. + + Parameters + ---------- + df : pd.DataFrame + The DataFrame to be processed. It should have a DateTime index. + items : list[str] | str, optional + The columns of the DataFrame to be processed. It can be a list of column names or a single column name. + By default, it is ['NO', 'NO2', 'NOx']. + times : list[str] | str, optional + The time periods to cut the DataFrame. It can be a list of time strings or a single time string. + Each time string should be in the format of 'YYYY-MM-DD'. By default, it is ['2024-03-21', '2024-04-30']. + + Returns + ------- + None + This function doesn't return any value. It prints out a table showing the mean and standard deviation + of the specified items for each time period. + """ + items = [items] if isinstance(items, str) else items + times = [times] if isinstance(times, str) else times + times = list(map(Timestamp, times)) + + times.sort() + + results = [] + periods = [] + for i in range(len(times) + 1): + if i == 0: + df_period = df.loc[df.index <= times[i], items] + period_label = f'Before {times[i].date()}' + elif i == len(times): + df_period = df.loc[df.index > times[i - 1], items] + period_label = f'After {times[i - 1].date()}' + else: + df_period = df.loc[(df.index > times[i - 1]) & (df.index <= times[i]), items] + period_label = f'{times[i - 1].date()} to {times[i].date()}' + + mean, std = df_period.mean().round(2).to_numpy(), df_period.std().round(2).to_numpy() + + results.append([f'{m} ± {s}' for m, s in zip(mean, std)]) + periods.append(period_label) + + result = DataFrame(results, columns=items, index=periods) + + print(tabulate(result, headers='keys', tablefmt='fancy_grid')) diff --git a/AeroViz/tools/datareader.py b/AeroViz/tools/datareader.py deleted file mode 100644 index 5cfbf68..0000000 --- a/AeroViz/tools/datareader.py +++ /dev/null @@ -1,66 +0,0 @@ -from abc import ABC, abstractmethod -from pathlib import Path - -from pandas import read_csv, read_json, read_excel, DataFrame - - -class FileHandler(ABC): - """ An abstract base class for reading data files with different extensions (.csv, .json, .xls, .xlsx). """ - - @abstractmethod - def read_data(self, file_path: Path) -> DataFrame: - pass - - -class CsvFileHandler(FileHandler): - def read_data(self, file_path: Path) -> DataFrame: - return read_csv(file_path, na_values=('-', 'E', 'F', '#', '*'), - parse_dates=['Time'], index_col='Time', low_memory=False) - - -class JsonFileHandler(FileHandler): - def read_data(self, file_path: Path) -> DataFrame: - return read_json(file_path) - - -class ExcelFileHandler(FileHandler): - def read_data(self, file_path: Path) -> DataFrame: - return read_excel(file_path, parse_dates=['Time']) - - -class DataReaderFactory: - _handler_mapping = { - '.csv': CsvFileHandler(), - '.json': JsonFileHandler(), - '.xls': ExcelFileHandler(), - '.xlsx': ExcelFileHandler(), - } - - @staticmethod - def create_handler(file_extension: str) -> FileHandler: - reader_class = DataReaderFactory._handler_mapping.get(file_extension) - if reader_class is None: - raise ValueError(f"Unsupported file format: {file_extension}") - return reader_class - - -class DataReader: - """ - A class for reading data files with different extensions (.csv, .json, .xls, .xlsx). - - Parameters - ---------- - filename (Path | str): The name of the file to be read or the Path of the file. - - Returns - ------- - pandas.DataFrame: data - - Examples - -------- - >>> psd = DataReader(Path(...)) - """ - - def __new__(cls, file_path: Path | str) -> DataFrame: - file_path = Path(file_path) - return DataReaderFactory.create_handler(file_path.suffix.lower()).read_data(file_path) diff --git a/README.md b/README.md index 7ecd8ad..2486ae5 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ ## <div align="center">AeroViz for Aerosol Science Visualization</div> -<p align="center"> +<div align="center"> - <img alt="Static Badge" src="https://img.shields.io/badge/python-3.12-blue?logo=python"> - <img alt="Static Badge" src="https://img.shields.io/badge/License-MIT-yellow"> - <img alt="Static Badge" src="https://img.shields.io/badge/github-updating-red?logo=github"> - <img src="https://img.shields.io/badge/testing-green?logo=Pytest&logoColor=blue"> + + + + -</p> +</div> <div align="center"> @@ -16,86 +16,78 @@ <a href="https://www.linkedin.com/in/Alex870521/"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-linkedin.png?raw=true" width="3%" alt="Alex870521 LinkedIn"></a> <img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-transparent.png?raw=true" width="3%"> <a href="https://medium.com/@alex870521"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-medium.png?raw=true" width="3%" alt="Alex870521 Medium"></a> - - </div> ## <div align="center">Installation</div> - ```bash -pip install AeroViz # ensure the python version is >= 3.12 +pip install AeroViz ``` -## <div align="center">Usage</div> +## <div align="center">Key Features</div> -```python -import AeroViz -``` +### 📊 Data Reading ▶ RawDataReader -## <div align="center">RawDataReader Supported Instruments</div> +Built-in `RawDataReader` supporting multiple aerosol instruments: +- **Particle Sizers**: SMPS, APS, GRIMM, OPC +- **Mass & Optical**: TEOM, NEPH, Aurora, AE33/43, BC1054 +- **Chemical Analysis**: OCEC, IGAC, XRF, VOC -> [!NOTE]\ -> We are continuously working to support more instruments. Please check back for updates or contribute to our project on -> GitHub. +> Features include quality control, data filtering, flexible resampling, and CSV export. For detailed instrument support +> and usage, check our [RawDataReader Guide](docs/guide/RawDataReader.md). -The AeroViz project currently supports data from the following instruments: +### 🔬 Data Processing ▶ DataProcess -- **SMPS (Scanning Mobility Particle Sizer)** -- **APS (Aerodynamic Particle Sizer)** -- **GRIMM (GRIMM Aerosol Technik)** -- **TEOM (Continuous Ambient Particulate Monitor)** -- **NEPH (Nephelometer)** -- **Aurora (Nephelometer)** -- **AE33 (Aethalometer Model 33)** -- **AE43 (Aethalometer Model 43)** -- **BC1054 (Black Carbon Monitor 1054)** -- **MA350 (MicroAeth MA350)** -- **OCEC (Organic Carbon Elemental Carbon Analyzer)** -- **IGAC (In-situ Gas and Aerosol Compositions monitor)** -- **VOC (Volatile Organic Compounds Monitor)** +Built-in `DataProcess` provides advanced aerosol analysis: +- **Size Distribution**: Mode Fitting, Log-Normal Analysis +- **Optical Properties**: Mie Theory, SOAP Calculation +- **Chemical**: Mass Closure, Source Apportionment +- **VOC**: OFP, SOAP -## <div align="center">DataProcess Supported Method</div> +### 📈 Data Visualization ▶ plot -The AeroViz project currently supports the following processing methods: +Comprehensive visualization tools `plot`: +- **Time Analysis**: Trends, Diurnal Patterns +- **Statistical**: Distributions, Correlations +- **Specialized**: Size Contours, Wind Rose, Polar Plots, Hysplit, CBPF -- **Chemistry** -- **Optical** -- **SizeDistr** -- **VOC** +> **Note:** We are continuously adding support for more instruments and features. Contributions are welcome! -## <div align="center">Documentation</div> +## <div align="center">Quick Start</div> -For detailed documentation, please refer to the `docs` folder, which includes: +```python +from datetime import datetime +from pathlib import Path +from AeroViz import RawDataReader, DataProcess, plot -<div align="center"> +# Read data from a supported instrument +data = RawDataReader('NEPH', Path('/path/to/data'), start=datetime(2024, 2, 1), end=datetime(2024, 4, 30)) -| Documentation | Description | -|--------------------------------------------|----------------------------| -| [User Guide](docs/user_guide.md) | Basic usage instructions | -| [Developer Guide](docs/developer_guide.md) | Developer guidelines | -| [API Reference](docs/api_reference.md) | API documentation | -| [FAQ](docs/faq.md) | Frequently Asked Questions | -| [Changelog](docs/changelog.md) | List of changes | +# Create a visualization +plot.timeseries(data, y='scattering_coefficient') +``` -</div> +For more detailed usage instructions, please refer to our [User Guide](docs/guide). -## <div align="center">Related Dependencies</div> -* #### [PyMieScatt](https://github.com/bsumlin/PyMieScatt.git) -* #### [py-smps](https://github.com/quant-aq/py-smps.git) -* #### [ContainerHandle](https://github.com/yrr-Su/ContainerHandle.git) +## <div align="center">Documentation</div> +For detailed documentation, please refer to the `docs` folder, which includes: -## <div align="center">Contact</div> +<div align="center"> +| Documentation | Description | +|--------------------------------|--------------------------| +| [User Guide](docs/guide) | Basic usage instructions | +| [Changelog](docs/CHANGELOG.md) | List of changes | +</div> + +## <div align="center">Contact</div> For bug reports and feature requests please visit [GitHub Issues](https://github.com/Alex870521/DataPlot/issues). <div align="center"> -<a href="https://github.com/Alex870521"><img src="https://github.com/Alex870521/assets_repo/blob/main/assets/media/logo-social-github.png?raw=true" width="3%" alt="Alex870521 GitHub"></a> -<img src="https://github.com/Alex870521/assets_repo/blob/main/assets/media/logo-transparent.png?raw=true" width="3%"> -<a href="https://www.linkedin.com/in/Alex870521/"><img src="https://github.com/Alex870521/assets_repo/blob/main/assets/media/logo-social-linkedin.png?raw=true" width="3%" alt="Alex870521 LinkedIn"></a> -<img src="https://github.com/Alex870521/assets_repo/blob/main/assets/media/logo-transparent.png?raw=true" width="3%"> -<a href="https://medium.com/@alex870521"><img src="https://github.com/Alex870521/assets_repo/blob/main/assets/media/logo-social-medium.png?raw=true" width="3%" alt="Alex870521 Medium"></a> - - +<a href="https://github.com/Alex870521"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-github.png?raw=true" width="3%" alt="Alex870521 GitHub"></a> +<img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-transparent.png?raw=true" width="3%"> +<a href="https://www.linkedin.com/in/Alex870521/"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-linkedin.png?raw=true" width="3%" alt="Alex870521 LinkedIn"></a> +<img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-transparent.png?raw=true" width="3%"> +<a href="https://medium.com/@alex870521"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-medium.png?raw=true" width="3%" alt="Alex870521 Medium"></a> </div> \ No newline at end of file diff --git a/asserts/diagram.py b/asserts/diagram.py deleted file mode 100644 index becfd99..0000000 --- a/asserts/diagram.py +++ /dev/null @@ -1,36 +0,0 @@ -from diagrams import Cluster, Diagram -from diagrams.onprem.aggregator import Fluentd -from diagrams.onprem.analytics import Spark -from diagrams.onprem.compute import Server -from diagrams.onprem.database import PostgreSQL -from diagrams.onprem.inmemory import Redis -from diagrams.onprem.monitoring import Grafana, Prometheus -from diagrams.onprem.network import Nginx -from diagrams.onprem.queue import Kafka - -with Diagram("DataPlot flowchart", show=True, filename="assets/figure/diagram"): - ingress = Nginx("ABC") - - metrics = Prometheus("metric") - metrics << Grafana("monitoring") - - with Cluster("Service Cluster"): - grpcsvc = [ - Server("grpc1"), - Server("grpc2"), - Server("grpc3")] - - with Cluster("Sessions HA"): - primary = Redis("session") - primary - Redis("replica") << metrics - grpcsvc >> primary - - with Cluster("Database HA"): - primary = PostgreSQL("users") - primary - PostgreSQL("replica") << metrics - grpcsvc >> primary - - aggregator = Fluentd("logging") - aggregator >> Kafka("stream") >> Spark("analytics") - - ingress >> grpcsvc >> aggregator diff --git a/asserts/figure/IMPROVE_MLR.png b/assets/figure/IMPROVE_MLR.png similarity index 100% rename from asserts/figure/IMPROVE_MLR.png rename to assets/figure/IMPROVE_MLR.png diff --git a/asserts/figure/IMPROVE_donut.png b/assets/figure/IMPROVE_donut.png similarity index 100% rename from asserts/figure/IMPROVE_donut.png rename to assets/figure/IMPROVE_donut.png diff --git a/asserts/figure/IMPROVE_donuts.png b/assets/figure/IMPROVE_donuts.png similarity index 100% rename from asserts/figure/IMPROVE_donuts.png rename to assets/figure/IMPROVE_donuts.png diff --git a/asserts/figure/IMPROVE_pie.png b/assets/figure/IMPROVE_pie.png similarity index 100% rename from asserts/figure/IMPROVE_pie.png rename to assets/figure/IMPROVE_pie.png diff --git a/asserts/figure/Mie_MEE.png b/assets/figure/Mie_MEE.png similarity index 100% rename from asserts/figure/Mie_MEE.png rename to assets/figure/Mie_MEE.png diff --git a/asserts/figure/Mie_Q.png b/assets/figure/Mie_Q.png similarity index 100% rename from asserts/figure/Mie_Q.png rename to assets/figure/Mie_Q.png diff --git a/asserts/figure/OverPSD.png b/assets/figure/OverPSD.png similarity index 100% rename from asserts/figure/OverPSD.png rename to assets/figure/OverPSD.png diff --git a/asserts/figure/corr_matrix.png b/assets/figure/corr_matrix.png similarity index 100% rename from asserts/figure/corr_matrix.png rename to assets/figure/corr_matrix.png diff --git a/asserts/figure/psd_3D.png b/assets/figure/psd_3D.png similarity index 100% rename from asserts/figure/psd_3D.png rename to assets/figure/psd_3D.png diff --git a/asserts/figure/scatter.png b/assets/figure/scatter.png similarity index 100% rename from asserts/figure/scatter.png rename to assets/figure/scatter.png diff --git a/asserts/figure/windrose_CBPF.png b/assets/figure/windrose_CBPF.png similarity index 100% rename from asserts/figure/windrose_CBPF.png rename to assets/figure/windrose_CBPF.png diff --git a/asserts/media/logo-social-discord.png b/assets/media/logo-social-discord.png similarity index 100% rename from asserts/media/logo-social-discord.png rename to assets/media/logo-social-discord.png diff --git a/asserts/media/logo-social-github.png b/assets/media/logo-social-github.png similarity index 100% rename from asserts/media/logo-social-github.png rename to assets/media/logo-social-github.png diff --git a/asserts/media/logo-social-instagram.png b/assets/media/logo-social-instagram.png similarity index 100% rename from asserts/media/logo-social-instagram.png rename to assets/media/logo-social-instagram.png diff --git a/asserts/media/logo-social-linkedin.png b/assets/media/logo-social-linkedin.png similarity index 100% rename from asserts/media/logo-social-linkedin.png rename to assets/media/logo-social-linkedin.png diff --git a/asserts/media/logo-social-medium.png b/assets/media/logo-social-medium.png similarity index 100% rename from asserts/media/logo-social-medium.png rename to assets/media/logo-social-medium.png diff --git a/asserts/media/logo-social-threads.png b/assets/media/logo-social-threads.png similarity index 100% rename from asserts/media/logo-social-threads.png rename to assets/media/logo-social-threads.png diff --git a/asserts/media/logo-social-tiktok.png b/assets/media/logo-social-tiktok.png similarity index 100% rename from asserts/media/logo-social-tiktok.png rename to assets/media/logo-social-tiktok.png diff --git a/asserts/media/logo-social-twitter.png b/assets/media/logo-social-twitter.png similarity index 100% rename from asserts/media/logo-social-twitter.png rename to assets/media/logo-social-twitter.png diff --git a/asserts/media/logo-social-youtube.png b/assets/media/logo-social-youtube.png similarity index 100% rename from asserts/media/logo-social-youtube.png rename to assets/media/logo-social-youtube.png diff --git a/asserts/media/logo-transparent.png b/assets/media/logo-transparent.png similarity index 100% rename from asserts/media/logo-transparent.png rename to assets/media/logo-transparent.png diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 100644 index 0000000..a225887 --- /dev/null +++ b/docs/CHANGELOG.md @@ -0,0 +1,12 @@ +## v0.1.9.6 (2024-11-07) + +### Fix + +- set SMPS default size range (11.8, 593.5) + +### Refactor + +- **logger**: enhance progress bar visualization and formatting +- minor syntax improvements + +## v0.1.9.5 (2024-10-24) diff --git a/docs/api/RawDataReader.md b/docs/api/RawDataReader.md new file mode 100644 index 0000000..01b058d --- /dev/null +++ b/docs/api/RawDataReader.md @@ -0,0 +1,203 @@ +# RawDataReader API Reference + +## RawDataReader Function + +### Overview + +`RawDataReader` is a factory function that instantiates the appropriate reader module for a given instrument and returns +the processed data over a specified time range. + +### Function Signature + +```python +def RawDataReader( + instrument_name: str, + path: Path | str, + reset: bool = False, + qc: bool | str = True, + qc_freq: str | None = None, + rate: bool = True, + append_data: bool = False, + start: datetime = None, + end: datetime = None, + mean_freq: str = '1h', + csv_out: bool = True, +) -> DataFrame: +``` + +### Parameters + +- `instrument_name` (str): The name of the instrument for which to read data. Must be a valid key in the `meta` + dictionary. +- `path` (Path | str): The directory where raw data files for the instrument are stored. +- `reset` (bool, optional): If True, reset the state and reprocess the data from scratch. Default is False. +- `qc` (bool | str, optional): If True, apply quality control (QC) to the raw data. Default is True. +- `qc_freq` (str | None, optional): Frequency at which to perform QC. Must be one of 'W', 'M', 'Q', 'Y' for weekly, + monthly, quarterly, or yearly. Default is None. +- `rate` (bool, optional): If True, calculate rates from the data. Default is True. +- `append_data` (bool, optional): If True, append new data to the existing dataset instead of overwriting it. Default is + False. +- `start` (datetime, optional): Start time for filtering the data. If None, no start time filtering will be applied. +- `end` (datetime, optional): End time for filtering the data. If None, no end time filtering will be applied. +- `mean_freq` (str, optional): Resampling frequency for averaging the data. Example: '1h' for hourly mean. Default is ' + 1h'. +- `csv_out` (bool, optional): If True, output the processed data as a CSV file. Default is True. + +### Returns + +- DataFrame: An instance of the reader module corresponding to the specified instrument, which processes the data and + returns it in a usable format. + +### Raises + +- ValueError: If the `instrument_name` provided is not a valid key in the `meta` dictionary. +- ValueError: If the specified path does not exist or is not a directory. +- ValueError: If the QC frequency is invalid. +- ValueError: If start and end times are not both provided or are invalid. +- ValueError: If the mean_freq is not a valid frequency string. + +### Example + +```python +from pathlib import Path +from datetime import datetime + +data = RawDataReader( + instrument_name='BC1054', + path=Path('/path/to/data'), + start=datetime(2024, 2, 1), + end=datetime(2024, 7, 31, 23) +) +``` + +## AbstractReader Class + +### Overview + +`AbstractReader` is an abstract base class for reading raw data from different instruments. Each instrument should have +a separate class that inherits from this class and implements the abstract methods. + +### Class Definition + +```python +class AbstractReader(ABC): +``` + +### Constructor + +```python +def __init__(self, + path: Path | str, + reset: bool = False, + qc: bool = True, + qc_freq: Optional[str] = None, + rate: bool = True, + append_data: bool = False): +``` + +#### Parameters + +- `path` (Path | str): The directory path where raw data files are stored. +- `reset` (bool, optional): If True, reprocess the data from scratch. Default is False. +- `qc` (bool, optional): If True, apply quality control to the data. Default is True. +- `qc_freq` (str, optional): Frequency at which to perform QC. Default is None. +- `rate` (bool, optional): If True, calculate rates from the data. Default is True. +- `append_data` (bool, optional): If True, append new data to existing dataset. Default is False. + +### Abstract Methods + +#### _raw_reader + +```python +@abstractmethod +def _raw_reader(self, file): + pass +``` + +This method should be implemented in child classes to read raw data files. + +#### _QC + +```python +@abstractmethod +def _QC(self, df: DataFrame) -> DataFrame: + return df +``` + +This method should be implemented in child classes to perform quality control on the data. + +### Key Methods + +#### __call__ + +```python +def __call__(self, + start: datetime, + end: datetime, + mean_freq: str = '1h', + csv_out: bool = True, + ) -> DataFrame: +``` + +This method processes the data for the specified time range and returns the result. + +#### _timeIndex_process + +```python +def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None): +``` + +Processes time index, resamples data, extracts specified time range, and optionally appends new data. + +#### _outlier_process + +```python +def _outlier_process(self, _df): +``` + +Processes outliers based on a JSON file containing outlier information. + +#### _save_data + +```python +def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None: +``` + +Saves raw and quality-controlled data to pickle and CSV files. + +#### _read_raw_files + +```python +def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]: +``` + +Reads raw data files and performs initial processing and quality control. + +### Static Methods + +#### reorder_dataframe_columns + +```python +@staticmethod +def reorder_dataframe_columns(df, order_lists, others_col=False): +``` + +Reorders DataFrame columns based on specified order lists. + +#### n_sigma_QC + +```python +@staticmethod +def n_sigma_QC(df: DataFrame, std_range: int = 5) -> DataFrame: +``` + +Performs n-sigma quality control on the data. + +#### IQR_QC + +```python +@staticmethod +def IQR_QC(df: DataFrame, log_dist=False) -> tuple[DataFrame, DataFrame]: +``` + +Performs Inter-quartile Range (IQR) quality control on the data. \ No newline at end of file diff --git a/docs/example/scatter_examples.py b/docs/example/scatter_examples.py new file mode 100644 index 0000000..19d0bdf --- /dev/null +++ b/docs/example/scatter_examples.py @@ -0,0 +1,49 @@ +import numpy as np +import pandas as pd + +from AeroViz.plot import scatter + +# Set random seed to ensure reproducibility +np.random.seed(0) + +# Example 1: Basic Scatter Plot +print("Example 1: Basic Scatter Plot") +df = pd.DataFrame({ + 'x': np.random.rand(50), + 'y': np.random.rand(50) +}) + +fig, ax = scatter(df, x='x', y='y', title='Basic Scatter Plot') +fig.savefig('basic_scatter_plot.png') +print("Basic scatter plot saved as 'basic_scatter_plot.png'") + +# Example 2: Scatter Plot with Color and Size Encoding +print("\nExample 2: Scatter Plot with Color and Size Encoding") +df['color'] = np.random.rand(50) +df['size'] = np.random.randint(10, 100, 50) + +fig, ax = scatter(df, x='x', y='y', c='color', s='size', fig_kws={'figsize': (5, 4)}, + title='Scatter Plot with Color and Size Encoding') +fig.savefig('color_size_scatter_plot.png') +print("Scatter plot with color and size encoding saved as 'color_size_scatter_plot.png'") + +# Example 3: Scatter Plot with Regression Line and Diagonal +print("\nExample 3: Scatter Plot with Regression Line and Diagonal") +df = pd.DataFrame({ + 'x': np.arange(0, 10, 0.2), + 'y': np.arange(0, 10, 0.2) + np.random.normal(0, 1, 50) +}) + +fig, ax = scatter(df, x='x', y='y', regression=True, diagonal=True, + title='Scatter Plot with Regression and Diagonal Lines') +fig.savefig('regression_diagonal_scatter_plot.png') +print("Scatter plot with regression and diagonal lines saved as 'regression_diagonal_scatter_plot.png'") + +print("\nAll example plots have been generated. Please check the PNG files in the current directory.") + +# Additional usage instructions +print("\nUsage Instructions:") +print("1. Ensure that the AeroViz library is installed.") +print("2. Run this script to generate all example plots.") +print("3. View the generated PNG files to see different types of scatter plots.") +print("4. You can modify the parameters in this script to customize your own scatter plots.") diff --git a/docs/guide/RawDataReader.md b/docs/guide/RawDataReader.md new file mode 100644 index 0000000..ec5d393 --- /dev/null +++ b/docs/guide/RawDataReader.md @@ -0,0 +1,170 @@ +# RawDataReader Usage Guide + +This guide demonstrates various usage scenarios for the `RawDataReader` function from the AeroViz package. Each scenario +shows different configurations and explains the expected outputs. + +## Installation + +Before using `RawDataReader`, ensure you have the AeroViz package installed: + +```bash +pip install AeroViz +``` + +## Basic Usage + +Here are several scenarios showcasing different ways to use `RawDataReader`: + +```python +from pathlib import Path +from datetime import datetime +from AeroViz.rawDataReader import RawDataReader + +# Common parameters +data_path = Path('/path/to/your/data') +start_time = datetime(2024, 1, 1) +end_time = datetime(2024, 12, 31, 23, 59, 59) +``` + +### Scenario 1: Basic Usage with NEPH Instrument + +```python +neph_data = RawDataReader( + instrument_name='NEPH', + path=data_path / 'NEPH', + start=start_time, + end=end_time, + mean_freq='1h' +) +``` + +**Expected Output:** + +- Hourly averaged NEPH data for the entire year. +- Will include scattering coefficients and other NEPH-related metrics. + +### Scenario 2: AE33 with Quality Control and Rate Calculation + +```python +ae33_data = RawDataReader( + instrument_name='AE33', + path=data_path / 'AE33', + reset=True, + qc=True, + qc_freq='1M', + rate=True, + start=start_time, + end=end_time, + mean_freq='1h', + csv_out=True +) +``` + +**Expected Output:** + +- Hourly AE33 data with quality control applied monthly. +- Includes black carbon concentrations and absorption coefficients. +- Will generate a CSV file with the processed data. + +### Scenario 3: SMPS with Specific Time Range + +```python +smps_data = RawDataReader( + instrument_name='SMPS', + path=data_path / 'SMPS', + start=datetime(2024, 6, 1), + end=datetime(2024, 8, 31, 23, 59, 59), + mean_freq='30min', + csv_out=False +) +``` + +**Expected Output:** + +- SMPS data for the summer months (June to August). +- 30-minute averaged data points. +- Includes particle size distribution information. +- No CSV file will be generated. + +### Scenario 4: Minion with Appending New Data + +```python +minion_data = RawDataReader( + instrument_name='Minion', + path=data_path / 'Minion', + reset=False, + append_data=True, + start=start_time, + end=end_time, + mean_freq='1d' +) +``` + +**Expected Output:** + +- Daily averaged Minion data for the entire year. +- New data will be appended to the existing dataset. + +## Console Output + +For each scenario, you may see console output similar to this: + +``` +Reading [Instrument] RAW DATA from [Start Date] to [End Date] +Reading [Instrument] files ━━━━━━━━━━━━━━━━━━ 100% [Time] + Acquisition rate : [Percentage]% + Yield rate : [Percentage]% +``` + +## Data Sample + +To view a sample of the processed data, you can use: + +```python +print(data.head()) +``` + +This will display the first few rows of the processed data, including timestamps and instrument-specific measurements. + +## Parameter Explanation + +- `instrument_name`: Name of the instrument (e.g., 'NEPH', 'AE33', 'SMPS', 'Minion') +- `path`: Directory path where raw data files are stored +- `reset`: If True, reprocess data from scratch +- `qc`: If True, apply quality control +- `qc_freq`: Frequency of quality control ('1M' for monthly, '1W' for weekly, etc.) +- `rate`: If True, calculate rates from the data +- `append_data`: If True, append new data to existing dataset +- `start` and `end`: Date range for data processing +- `mean_freq`: Frequency for data averaging ('1h' for hourly, '30min' for half-hourly, etc.) +- `csv_out`: If True, output processed data as CSV + +# Supported Instruments: Default Time Resolutions and File Types +### The AeroViz project currently supports data from the following instruments: + +| Instrument | Time Resolution | File Type | Display Columns | QAQC method | +|:------------------------------------------------------:|:---------------:|:------------|-------------------------------------------------------|:-----------:| +| NEPH (Nephelometer) | 5min | .dat | G | default | +| Aurora (Nephelometer) | 1min | .csv | G | default | +| SMPS (Scanning Mobility Particle Sizer) | 6min | .txt, .csv | all | default | +| GRIMM (GRIMM Aerosol Technik) | 6min | .dat | all | default | +| APS_3321 (Aerodynamic Particle Sizer) | 6min | .txt | all | default | +| AE33 (Aethalometer Model 33) | 1min | .dat | BC6 | default | +| AE43 (Aethalometer Model 43) | 1min | .dat | BC6 | default | +| BC1054 (Black Carbon Monitor 1054) | 1min | .csv | BC9 | default | +| MA350 (MicroAeth MA350) | 1min | .csv | BC5 | default | +| TEOM (Continuous Ambient Particulate Monitor) | 6min | .csv | PM_Total, PM_NV | default | +| OCEC (Sunset Organic Carbon Elemental Carbon Analyzer) | 1h | *LCRes.csv | Thermal_OC, Thermal_EC, Optical_OC, Optical_EC | default | +| IGAC (In-situ Gas and Aerosol Compositions monitor) | 1h | .csv | Na+, NH4+, K+, Mg2+, Ca2+, Cl-, NO2-, NO3-, SO42- | default | +| XRF (X-ray Fluorescence Spectrometer) | 1h | .csv | Al, Si, P, S, Cl, K, Ca, Ti, V, Cr, Mn, Fe, Ni, Cu... | default | +| VOC (Volatile Organic Compounds Monitor) | 1h | .csv | voc | default | +| EPA | 1h | .csv | all | default | +| Minion | 1h | .csv, .xlsx | Na+, NH4+, Cl-, NO3-, SO42-, Al, Ti, V, Cr, Mn, Fe | default | + +```{note} +Notes: +1. For VOC, due to the numerous display columns, we've simply noted "voc" in the table. In reality, it includes many specific VOC compound names. +2. For instruments marked with "all", it means all available columns or intervals are displayed. +3. The display columns for XRF include a large number of element names, all of which are listed. +4. The file types for AE33 and AE43 actually have more specific patterns, but are simplified to ".dat" in this table. +``` diff --git a/docs/user_guide.md b/docs/guide/plot.md similarity index 80% rename from docs/user_guide.md rename to docs/guide/plot.md index d9fe67a..4cc2c64 100644 --- a/docs/user_guide.md +++ b/docs/guide/plot.md @@ -1,27 +1,3 @@ -## <div align="center">RawDataReader Usage</div> - -```python -from datetime import datetime as dtm -from pathlib import Path -from AeroViz.rawDataReader import RawDataReader - -# 設定資料的起始和結束時間 -start, end = dtm(2024, 2, 1), dtm(2024, 7, 31) - -# 設定資料路徑 -path_raw = Path('/path/to/data') - -# 讀取 AE33 資料 -dt_ae33 = RawDataReader('AE33', path_raw / 'AE33', reset=False, start=start, end=end) - -dt_neph = RawDataReader('NEPH', path_raw / 'NEPH', reset=False, start=start, end=end) -``` - -## <div align="center">DataProcess Usage</div> - -```python -``` - ## <div align="center">AeroViz.plot Usage</div> ### <div align="center">WindRose and Conditional Bivariate Probability Function (CBPF)</div> @@ -35,10 +11,10 @@ df = DataBase() # build default data, uers can use their own data # wind rose plot.meteorology.wind_rose(df, 'WS', 'WD', typ='bar') -plot.meteorology.wind_rose(df, 'WS', 'WD', 'PM25', typ='scatter') +plot.meteorology.wind_rose(df, 'WS', 'WD', 'PM2.5', typ='scatter') -plot.meteorology.CBPF(df, 'WS', 'WD', 'PM25') -plot.meteorology.CBPF(df, 'WS', 'WD', 'PM25', percentile=[75, 100]) +plot.meteorology.CBPF(df, 'WS', 'WD', 'PM2.5') +plot.meteorology.CBPF(df, 'WS', 'WD', 'PM2.5', percentile=[75, 100]) ``` ### <div align="center">Linear Regression</div> @@ -61,17 +37,18 @@ plot.multiple_linear_regression(df, x=['NO', 'NO2', 'CO', 'PM1'], y=['PM25']) ```python from AeroViz import plot, DataBase -df = DataBase() # build default data, uers can use their own data + +df = DataBase() # build default data, uers can use their own data # timeseries plot.timeseries.timeseries(df, y=['Extinction', 'Scattering'], - c=[None, None], + color=[None, None], style=['line', 'line'], times=('2021-02-01', '2021-03-31'), ylim=[0, None], ylim2=[0, None], rolling=50, inset_kws2=dict(bbox_to_anchor=(1.12, 0, 1.2, 1))) -plot.timeseries.timeseries(df, y='WS', c='WD', style='scatter', times=('2020-10-01', '2020-11-30'), +plot.timeseries.timeseries(df, y='WS', color='WD', style='scatter', times=('2020-10-01', '2020-11-30'), scatter_kws=dict(cmap='hsv'), cbar_kws=dict(ticks=[0, 90, 180, 270, 360]), ylim=[0, None]) @@ -90,11 +67,11 @@ plot.timeseries.timeseries_template(df.loc['2021-02-01', '2021-03-31']) ```python from pathlib import Path from AeroViz import plot -from AeroViz.tools import DataBase, DataReader +from AeroViz.tools import DataBase df = DataBase() # build default data, uers can use their own data -PNSD = DataReader(Path(__file__)/'AeroViz'/'config'/'DEFAULT_PNSD_DATA.csv') +PNSD = DataBase('DEFAULT_PNSD_DATA.csv') plot.distribution.distribution.heatmap(PNSD, unit='Number') plot.distribution.distribution.heatmap_tms(PNSD, unit='Number', freq='60d') diff --git a/docs/guide/support_voc.md b/docs/guide/support_voc.md new file mode 100644 index 0000000..d875b7b --- /dev/null +++ b/docs/guide/support_voc.md @@ -0,0 +1,125 @@ +# VOC Species Support and Usage Guide + +## Introduction + +This document provides information on the Volatile Organic Compound (VOC) species supported by our analysis package, +along with basic usage instructions. Our package is designed to assist researchers and environmental scientists in +effectively analyzing and processing VOC-related data. + +## Supported VOC Species + +### Our package currently supports the following VOC species: + +| class | Species | MIR | MW | SOAP | KOH | +|:--------:|:----------------------:|:-----:|:------:|:-----:|:-----:| +| aromatic | Benzene | 0.72 | 78.11 | 92.9 | 1.22 | +| | Toluene | 4.0 | 92.14 | 100.0 | 5.63 | +| | Ethylbenzene | 3.04 | 106.17 | 111.6 | 7.0 | +| | m/p-Xylene | 7.8 | 106.2 | 75.8 | 18.95 | +| | o-Xylene | 7.64 | 106.16 | 95.5 | 13.6 | +| alkane | Ethane | 0.28 | 30.07 | 0.1 | 0.248 | +| | Propane | 0.49 | 44.1 | 0.0 | 1.09 | +| | Isobutane | 1.23 | 58.12 | 0.0 | 2.12 | +| | n-Butane | 1.15 | 58.12 | 0.3 | 2.36 | +| | Isopentane | 1.45 | 72.15 | 0.2 | 3.6 | +| | n-Pentane | 1.31 | 72.15 | 0.3 | 3.8 | +| | n-Hexane | 1.24 | 86.18 | 0.1 | 5.2 | +| | n-Heptane | 1.07 | 100.21 | 0.1 | 6.76 | +| | n-Octane | 0.9 | 114.23 | 0.8 | 8.11 | +| | n-Nonane | 0.78 | 128.2 | 1.9 | 9.7 | +| | n-Decane | 0.68 | 142.29 | 7.0 | 11.0 | +| | n-Undecane | 0.61 | 156.31 | 16.2 | 12.3 | +| | n-Dodecane | 0.55 | null | null | null | +| alkene | Ethylene | 9.0 | 28.05 | 1.3 | 8.52 | +| | Propylene (Propene) | 11.66 | 42.08 | 1.6 | 26.3 | +| | 1-Butene | 9.73 | 56.1 | 1.2 | 31.4 | +| | t-2-Butene | 15.16 | 56.1 | 3.1 | 56.4 | +| | cis-2-Butene | 14.24 | 56.1 | 3.6 | 64.0 | +| | 1-Pentene | 7.21 | 70.13 | 0.0 | 31.4 | +| | t-2-Pentene | 10.56 | 70.13 | 4.0 | 67.0 | +| | cis-2-Pentene | 10.38 | 70.13 | 3.6 | 65.0 | +| | 1-Hexene | 5.49 | null | null | null | +| | Isoprene | 10.61 | 68.1 | 1.9 | 100.0 | +| alkyne | Acetylene | 0.95 | 26.04 | 0.1 | 0.85 | +| alkane | Cyclopentane | 2.39 | 70.1 | 0.0 | 4.97 | +| | Methylcyclopentane | 2.19 | 84.16 | 0.0 | 5.2 | +| | Cyclohexane | 1.25 | 84.16 | 0.0 | 6.97 | +| | Methylcyclohexane | 1.7 | 98.19 | 0.0 | 4.97 | +| | 2,2-Dimethylbutane | 1.17 | 86.17 | 0.0 | 2.23 | +| | 2,3-Dimethylbutane | 0.97 | 86.18 | 0.0 | 5.78 | +| | 2-Methylpentane | 1.5 | 86.18 | 0.0 | 5.4 | +| | 3-Methylpentane | 1.8 | 86.18 | 0.2 | 5.2 | +| | 2,3-Dimethylpentane | 1.34 | 100.2 | 0.4 | 1.5 | +| | 2,4-Dimethylpentane | 1.55 | 100.2 | 0.0 | 4.77 | +| | 2-Methylhexane | 1.19 | 100.2 | 0.0 | 5.65 | +| | 3-Methylheptane | 1.24 | 114.23 | 0.0 | 5.6 | +| | 2,2,4-Trimethylpentane | 1.26 | 114.23 | 0.0 | 3.34 | +| | 2,3,4-Trimethylpentane | 1.03 | 114.23 | 0.0 | 6.6 | +| | 2-Methylheptane | 1.07 | 114.23 | 0.0 | 7.0 | +| | 3-Methylhexane | 1.61 | 100.2 | 0.0 | 7.0 | +| aromatic | Styrene | 1.73 | 104.15 | 212.3 | 58.0 | +| | Isopropylbenzene | 2.52 | 120.19 | 95.5 | 6.3 | +| | n-Propylbenzene | 2.03 | null | null | null | +| | m-Ethyltoluene | 7.39 | 120.19 | 100.6 | 11.8 | +| | p-Ethyltoluene | 4.44 | 120.19 | 69.7 | 18.6 | +| | o-Ethyltoluene | 5.59 | 120.19 | 94.8 | 11.9 | +| | m-Diethylbenzene | 7.1 | 134.22 | 0.0 | 32.5 | +| | p-Diethylbenzene | 4.43 | 134.22 | 0.0 | 32.7 | +| | 1,3,5-Trimethylbenzene | 11.76 | 120.19 | 13.5 | 56.7 | +| | 1,2,4-Trimethylbenzene | 8.87 | 120.19 | 20.6 | 32.5 | +| | 1,2,3-Trimethylbenzene | 11.97 | 120.19 | 43.9 | 32.7 | +| | 1,3-Butadiene | 12.61 | 54.1 | 1.8 | 66.6 | +| | 1-Octene | 3.25 | 112.2 | null | 30.0 | +| | 2-Ethyltoluene | 5.59 | 120.2 | 94.8 | 11.9 | +| | 3,4-Ethyltoluene | 5.92 | 120.2 | 85.2 | 15.2 | +| OVOC | Acetaldehyde | 6.54 | 44.1 | 0.6 | 15.0 | +| OVOC | Acetone | 0.36 | 58.1 | 0.3 | 0.17 | +| OVOC | Butyl Acetate | 0.83 | 116.2 | 0.0 | null | +| OVOC | Ethanol | 1.53 | 46.1 | 0.6 | 3.2 | +| OVOC | Ethyl Acetate | 0.63 | 88.1 | 0.1 | null | +| OVOC | IPA | 0.61 | 60.1 | 0.4 | 5.1 | +| ClVOC | 1,2-DCB | 0.18 | 147.0 | null | null | +| ClVOC | 1,4-DCB | 0.18 | 147.0 | null | null | +| ClVOC | PCE | 0.03 | 165.8 | null | 0.16 | +| ClVOC | TCE | 0.64 | 131.4 | null | 1.9 | +| ClVOC | VCM | 2.83 | 62.5 | null | null | + +### Notes: + +1. MIR: Maximum Incremental Reactivity +2. MW: Molecular Weight +3. SOAP: Secondary Organic Aerosol Potential +4. KOH: Rate constant for the reaction with OH radicals +5. Some data appears as "null", indicating that the value was not provided in the original data + +## Usage Instructions + +### Example Code + +```python +from datetime import datetime as dtm +from pathlib import Path + +from AeroViz.dataProcess import * +from AeroViz.rawDataReader import * + +start, end = dtm(2024, 2, 1), dtm(2024, 7, 31, 23) + +path_raw = Path('data') +path_prcs = Path('prcs') + +# read data +dt_VOC = RawDataReader('VOC', path_raw / 'VOC', reset=False, start=start, end=end) +dt_VOC.rename(columns={'isoprene': 'Isoprene', 'm,p-Xylene': 'm/p-Xylene'}, inplace=True) + +voc_prcs = DataProcess('VOC', path_out=path_prcs, excel=False, csv=True) + +df = voc_prcs.VOC_basic(dt_VOC) +``` + +## Important Notes + +1. Ensure your data file is in the correct format, typically CSV. +2. Species names in your data file should match those in the supported species list above. +3. The package will ignore or warn about species not in the supported list. +4. Analysis results include concentration, MIR value, SOAP value, and KOH reaction rate for each VOC. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..0da4bf9 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,133 @@ +## <div align="center">AeroViz for Aerosol Science Visualization</div> + +<p align="center"> + + <img alt="Static Badge" src="https://img.shields.io/badge/python-3.12-blue?logo=python"> + <img alt="Static Badge" src="https://img.shields.io/badge/License-MIT-yellow"> + <img alt="Static Badge" src="https://img.shields.io/badge/github-updating-red?logo=github"> + <img alt="Static Badge" src="https://img.shields.io/badge/testing-green?logo=Pytest&logoColor=blue"> + +</p> + +<div align="center"> + +<a href="https://github.com/Alex870521"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-github.png?raw=true" width="3%" alt="Alex870521 GitHub"></a> +<img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-transparent.png?raw=true" width="3%"> +<a href="https://www.linkedin.com/in/Alex870521/"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-linkedin.png?raw=true" width="3%" alt="Alex870521 LinkedIn"></a> +<img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-transparent.png?raw=true" width="3%"> +<a href="https://medium.com/@alex870521"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-medium.png?raw=true" width="3%" alt="Alex870521 Medium"></a> + +</div> + +## <div align="center">Key Features</div> + +* Data Reading: Supports reading multiple aerosol data formats. +* Data Visualization: Offers various charts and graphs, including time series plots, distribution plots, and correlation + matrices. +* Data Processing: Includes multiple data processing tools, such as linear regression and Mie theory calculations. + +## <div align="center">Installation</div> + +```bash +pip install AeroViz +``` + +For Windows users: Run `install_windows.bat` + +For Linux and Mac users: Run `install_unix.bat` + +## <div align="center">Quick Start</div> + +```python +from datetime import datetime +from pathlib import Path +from AeroViz import RawDataReader, DataProcess, plot + +# Read data from a supported instrument +data = RawDataReader('NEPH', Path('/path/to/data'), start=datetime(2024, 2, 1), end=datetime(2024, 4, 30)) + +# Create a visualization +plot.timeseries(data, y='scattering_coefficient') +``` + +For more detailed usage instructions, please refer to our [User Guide](). + +## <div align="center"> RawDataReader + +RawDataReader supports a wide range of aerosol instruments, including NEPH, SMPS, AE33, and many more. It handles +various file types and time resolutions, making data processing efficient and standardized. + +For a detailed list of supported instruments, file types, and data columns, please refer to +our [RawDataReader Usage Guide](docs/guide/RawDataReader) in the `docs` folder. + +### Key Features: + +- Supports multiple aerosol instruments +- Applies customizable quality control measures +- Offers flexible data filtering and resampling options +- Enables easy data export to CSV format + +### Supported Instruments + +The AeroViz project currently supports data from the following instruments: + +- SMPS (Scanning Mobility Particle Sizer) +- APS (Aerodynamic Particle Sizer) +- GRIMM (GRIMM Aerosol Technik) +- TEOM (Continuous Ambient Particulate Monitor) +- NEPH (Nephelometer) +- Aurora (Nephelometer) +- AE33 (Aethalometer Model 33) +- AE43 (Aethalometer Model 43) +- BC1054 (Black Carbon Monitor 1054) +- MA350 (MicroAeth MA350) +- OCEC (Organic Carbon Elemental Carbon Analyzer) +- IGAC (In-situ Gas and Aerosol Compositions monitor) +- XRF (X-ray Fluorescence Spectrometer) +- VOC (Volatile Organic Compounds Monitor) + +> **Note:** We are continuously working to support more instruments. Please check back for updates or contribute to our +> project on GitHub. + +## <div align="center">DataProcess</div> + +The AeroViz project currently supports the following processing methods: + +- **Chemistry**: +- **Optical** +- **SizeDistr** +- **VOC** + +## <div align="center">Documentation</div> + +For detailed documentation, please refer to the `docs` folder, which includes: + +<div align="center"> + +| Documentation | Description | +|--------------------------------|--------------------------| +| [User Guide](docs/guide) | Basic usage instructions | +| [Changelog](docs/changelog.md) | List of changes | + +</div> + +## <div align="center">Related Source</div> + +* #### [PyMieScatt](https://github.com/bsumlin/PyMieScatt.git) +* #### [py-smps](https://github.com/quant-aq/py-smps.git) +* #### [ContainerHandle](https://github.com/yrr-Su/ContainerHandle.git) + +## <div align="center">Contact</div> + +For bug reports and feature requests please visit [GitHub Issues](https://github.com/Alex870521/DataPlot/issues). + +<div align="center"> + +<a href="https://github.com/Alex870521"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-github.png?raw=true" width="3%" alt="Alex870521 GitHub"></a> +<img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-transparent.png?raw=true" width="3%"> +<a href="https://www.linkedin.com/in/Alex870521/"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-linkedin.png?raw=true" width="3%" alt="Alex870521 LinkedIn"></a> +<img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-transparent.png?raw=true" width="3%"> +<a href="https://medium.com/@alex870521"><img src="https://github.com/Alex870521/AeroViz/blob/main/assets/media/logo-social-medium.png?raw=true" width="3%" alt="Alex870521 Medium"></a> + + +</div> \ No newline at end of file diff --git a/install_mac_linux.sh b/install_mac_linux.sh deleted file mode 100755 index 967f85b..0000000 --- a/install_mac_linux.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -# 获取脚本所在的目录 -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" - -echo "Script directory: $SCRIPT_DIR" -# 检查是否有虚拟环境 -if [ -d "$SCRIPT_DIR/.venvv" ]; then - echo "Virtual environment found. Checking Python version..." - - # 虚拟环境中的 Python 路径 - PYTHON_PATH="$SCRIPT_DIR/.venv/bin/python" - PYTHON_VERSION=$($PYTHON_PATH -c "import sys; print(sys.version_info[:2])") - - echo "Python path in virtual environment: $PYTHON_PATH" - echo "version: $PYTHON_VERSION" - - if [ "$PYTHON_VERSION" \< "(3, 12)" ]; then - echo "Python version in the virtual environment is less than 3.12." - echo "Please upgrade the Python version in the virtual environment." - deactivate - exit 1 - fi - - echo "Activating virtual environment..." - source "$SCRIPT_DIR/.venv/bin/activate" - -else - echo "Virtual environment not found. Using system's Python." - - # 尝试找到系统中的 Python 3.12 或更高版本 - PYTHON_PATH=$(which python3.12 || which python3) - PYTHON_VERSION=$($PYTHON_PATH -c "import sys; print(sys.version_info[:2])") - - echo "Python path in virtual environment: $PYTHON_PATH" - echo "version: $PYTHON_VERSION" - - if [ "$PYTHON_VERSION" \< "(3, 12)" ]; then - echo "Python version in the virtual environment is less than 3.12." - echo "Please upgrade the Python version in the virtual environment." - deactivate - exit 1 - fi - -fi - -# 安装包和依赖 -pip install --upgrade pip -pip install . -pip install -r requirements.txt - -echo "Installation complete. You can now use the package." \ No newline at end of file diff --git a/install_windows.bat b/install_windows.bat deleted file mode 100644 index 8ade04a..0000000 --- a/install_windows.bat +++ /dev/null @@ -1,71 +0,0 @@ -@echo off -setlocal enabledelayedexpansion - -REM 將目錄切換到腳本所在目錄 -cd /d "%~dp0" - -REM 檢查是否存在虛擬環境 -if exist "venv\Scripts\activate.bat" ( - echo Activating virtual environment... - call venv\Scripts\activate.bat - echo Virtual environment activated. - - REM 檢查虛擬環境中的 python 版本 - echo Checking Python version in the virtual environment... - - REM 使用更簡單的 Python 命令捕獲版本信息 - for /f "delims=" %%V in ('python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')"') do ( - set "PYTHON_VERSION=%%V" - ) - echo Python version is: !PYTHON_VERSION! - - REM 提取主要和次要版本号 - for /f "tokens=1,2 delims=." %%a in ("!PYTHON_VERSION!") do ( - set "major=%%a" echo %%a - set "minor=%%b" echo %%b - ) - - REM 比较版本号是否小于 3.12 - if !major! LSS 3 ( - echo Python version is too low. Please use Python 3.12 or higher. - pause - exit /b - ) else if !major! EQU 3 if !minor! LSS 12 ( - echo Python version is too low. Please use Python 3.12 or higher. - pause - exit /b - ) - -) else ( - echo No virtual environment found. - set /p CREATE_VENV="Do you want to create a virtual environment? (y/n): " - if /i "!CREATE_VENV!"=="y" ( - py -m venv venv - echo Virtual environment created. - call venv\Scripts\activate.bat - echo Virtual environment activated. - - ) else ( - echo Searching for Python 3.12 or higher... - REM 如果沒有找到虛擬環境,則查 python 預設路徑 - for %%P in ("C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python312\python.exe") do ( - %%P -c "import sys; exit(0) if sys.version_info >= (3, 12) else exit(1)" - if not errorlevel 1 ( - echo Found Python 3.12+ at %%P - - ) else ( - echo No suitable Python 3.12+ found. Please install Python 3.12 or higher. - pause - exit /b - ) - ) - ) -) -echo Installing package and dependencies... -py -m pip install --upgrade pip -py -m pip install . -py -m pip install -r requirements.txt -echo Installation complete. You can now use the package. - -pause -endlocal diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a241345 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,82 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "AeroViz" +version = "0.1.11" +description = "Aerosol science" +authors = [{ name = "alex", email = "alex870521@gmail.com" }] +license = { text = "MIT" } +readme = "README.md" +requires-python = ">=3.11" +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] +dependencies = [ + "pandas>=2.2.0", + "numpy>=1.26.4", + "matplotlib==3.8.4", + "scipy==1.14.0", + "seaborn==0.13.2", + "scikit-learn==1.5.1", + "windrose==1.9.2", + "cartopy==0.24.1", + "tabulate==0.9.0", + "rich~=13.7.1", +] + +[project.optional-dependencies] +test = [ + "pytest>=7.0.0", + "pytest-cov>=4.1.0", + "pytest-mock>=3.10.0", +] +dev = [ + "black>=23.0", + "isort>=5.12.0", + "flake8>=6.0.0", + "mypy>=1.5.0", + "build", + "twine", +] +docs = [ + "mkdocs>=1.4.0", + "mkdocs-material>=8.0", + "mkdocstrings[python]>=0.18.0", +] + +[tool.pytest.ini_options] +pythonpath = "." +markers = [ + "requires_data: marks tests that require actual data files", +] + + +[tool.commitizen] +name = "cz_conventional_commits" +tag_format = "v$version" +changelog_file = "docs/CHANGELOG.md" +version_scheme = "pep440" +version_provider = "pep621" +update_changelog_on_bump = true +major_version_zero = true + +[project.urls] +Homepage = "https://github.com/Alex870521/AeroViz" +Repository = "https://github.com/Alex870521/AeroViz" +Issues = "https://github.com/Alex870521/AeroViz/issues" + +# 只保留一個 setuptools 配置部分 +[tool.setuptools.packages.find] +where = ["."] +include = ["AeroViz*"] +exclude = ["tests*"] +namespaces = false + +[tool.setuptools.package-data] +AeroViz = ["*", "**/*"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 9845777..0000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -pandas==2.2.2 -numpy==1.26.4 -matplotlib==3.8.4 -scipy==1.14.0 -seaborn==0.13.2 -scikit-learn==1.5.1 -windrose==1.9.2 -tabulate==0.9.0 \ No newline at end of file diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt new file mode 100644 index 0000000..fd7eb83 --- /dev/null +++ b/requirements/requirements-dev.txt @@ -0,0 +1 @@ +-e .[dev,test] \ No newline at end of file diff --git a/requirements/requirements-docs.txt b/requirements/requirements-docs.txt new file mode 100644 index 0000000..1fa4112 --- /dev/null +++ b/requirements/requirements-docs.txt @@ -0,0 +1 @@ +-e .[docs] \ No newline at end of file diff --git a/requirements/requirements.txt b/requirements/requirements.txt new file mode 100644 index 0000000..ecf975e --- /dev/null +++ b/requirements/requirements.txt @@ -0,0 +1 @@ +-e . \ No newline at end of file diff --git a/scripts/install_unix.sh b/scripts/install_unix.sh new file mode 100755 index 0000000..826f644 --- /dev/null +++ b/scripts/install_unix.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# 获取脚本所在的目录 +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +echo "Script directory: $SCRIPT_DIR" + +# 检查 Python 版本的函数 +check_python_version() { + local python_path=$1 + if [ ! -x "$python_path" ]; then + echo "Python executable not found at $python_path" + return 1 + fi + local python_version=$($python_path -c "import sys; print('{}.{}'.format(*sys.version_info[:2]))") + echo "Python version: $python_version" + if [ "$(echo "$python_version 3.12" | awk '{print ($1 < $2)}')" -eq 1 ]; then + echo "Python version is less than 3.12." + return 1 + fi + return 0 +} + +# 检查是否有虚拟环境 +if [ -d "$SCRIPT_DIR/.venv" ]; then + echo "Virtual environment found." + PYTHON_PATH="$SCRIPT_DIR/.venv/bin/python" + if check_python_version "$PYTHON_PATH"; then + echo "Activating virtual environment..." + source "$SCRIPT_DIR/.venv/bin/activate" + else + echo "Please upgrade the Python version in the virtual environment." + exit 1 + fi +else + echo "Virtual environment not found. Using system's Python." + PYTHON_PATH=$(command -v python3.12 || command -v python3) + if ! check_python_version "$PYTHON_PATH"; then + echo "Please install Python 3.12 or higher." + exit 1 + fi + + echo "Creating a new virtual environment..." + python3 -m venv "$SCRIPT_DIR/.venv" + source "$SCRIPT_DIR/.venv/bin/activate" +fi + +# 安装包和依赖 +pip install --upgrade pip +pip install -r requirements.txt +pip install . + +echo "Installation complete. You can now use the package." \ No newline at end of file diff --git a/scripts/install_windows.bat b/scripts/install_windows.bat new file mode 100644 index 0000000..0e83c8a --- /dev/null +++ b/scripts/install_windows.bat @@ -0,0 +1,73 @@ +@echo off +setlocal enabledelayedexpansion + +REM 将目录切换到脚本所在目录 +cd /d "%~dp0" + +REM 检查是否存在虚拟环境 +if exist "venv\Scripts\activate.bat" ( + echo Activating virtual environment... + call venv\Scripts\activate.bat + echo Virtual environment activated. + + REM 检查虚拟环境中的 Python 版本 + echo Checking Python version in the virtual environment... + for /f "delims=" %%V in ('python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')"') do ( + set "PYTHON_VERSION=%%V" + ) + echo Python version is: !PYTHON_VERSION! + + for /f "tokens=1,2 delims=." %%a in ("!PYTHON_VERSION!") do ( + set "major=%%a" + set "minor=%%b" + ) + + if !major! LSS 3 ( + goto :version_error + ) else if !major! EQU 3 if !minor! LSS 12 ( + goto :version_error + ) +) else ( + echo No virtual environment found. + set /p CREATE_VENV="Do you want to create a virtual environment? (y/n): " + if /i "!CREATE_VENV!"=="y" ( + py -3.12 -m venv venv || ( + echo Failed to create virtual environment. Make sure Python 3.12+ is installed. + goto :error + ) + echo Virtual environment created. + call venv\Scripts\activate.bat + echo Virtual environment activated. + ) else ( + echo Searching for Python 3.12 or higher... + where py >nul 2>&1 || ( + echo Python launcher (py) not found. Please install Python 3.12 or higher. + goto :error + ) + py -3.12 -c "import sys; exit(0)" >nul 2>&1 || ( + echo No suitable Python 3.12+ found. Please install Python 3.12 or higher. + goto :error + ) + echo Found Python 3.12+ + ) +) + +echo Installing package and dependencies... +python -m pip install --upgrade pip +python -m pip install -r requirements.txt +python -m pip install . +echo Installation complete. You can now use the package. +goto :end + +:version_error +echo Python version is too low. Please use Python 3.12 or higher. +goto :error + +:error +pause +exit /b 1 + +:end +pause +endlocal +exit /b 0 \ No newline at end of file diff --git a/setup.py b/setup.py index 90ecf16..6068493 100644 --- a/setup.py +++ b/setup.py @@ -1,34 +1,3 @@ -from setuptools import setup, find_packages +from setuptools import setup -setup( - name="AeroViz", - version="0.1.0", - author="alex", - author_email="alex870521@gmail.com", - description="Aerosol science", - long_description=open('README.md').read(), - long_description_content_type="text/markdown", - - url="https://github.com/Alex870521/AeroViz", - python_requires=">=3.12", - - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - - # Specify your project's dependencies - install_requires=[ - "pandas", - "numpy", - "matplotlib", - "seaborn", - "scipy", - "scikit-learn", - "windrose", - "tabulate" - # Add any other dependencies here - ], - packages=find_packages() -) +setup() diff --git a/tests/test_RawDataReader.py b/tests/test_RawDataReader.py new file mode 100644 index 0000000..36d4d23 --- /dev/null +++ b/tests/test_RawDataReader.py @@ -0,0 +1,121 @@ +import unittest +from datetime import datetime +from pathlib import Path + +import pytest + +from AeroViz import RawDataReader + + +@pytest.mark.requires_data +class TestRawDataReader(unittest.TestCase): + + @classmethod + def setUpClass(cls): + # 設置基礎路徑 + cls.base_path = Path('/Users/chanchihyu/NTU/2024_高雄能見度計畫') + cls.start = datetime(2024, 2, 1) + cls.end = datetime(2024, 9, 30, 23, 59, 59) + + def test_nz_aurora(self): + path_raw = self.base_path / 'NZ' / 'data' / 'Aurora' + reader = RawDataReader('Aurora', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + self.assertIsNotNone(reader) + self.validate_data(reader) + + def test_nz_bc1054(self): + path_raw = self.base_path / 'NZ' / 'data' / 'BC1054' + reader = RawDataReader('BC1054', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + self.assertIsNotNone(reader) + self.validate_data(reader) + + def test_fs_neph(self): + path_raw = self.base_path / 'FS' / 'data' / 'Neph' + reader = RawDataReader('NEPH', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + self.assertIsNotNone(reader) + self.validate_data(reader) + + def test_fs_ae33(self): + path_raw = self.base_path / 'FS' / 'data' / 'AE33' + reader = RawDataReader('AE33', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + self.assertIsNotNone(reader) + self.validate_data(reader) + + def test_nz_teom(self): + path_raw = self.base_path / 'NZ' / 'data' / 'Teom' + reader = RawDataReader('TEOM', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + self.assertIsNotNone(reader) + self.validate_data(reader) + + def test_nz_ocec(self): + path_raw = self.base_path / 'NZ' / 'data' / 'OCEC_Rawdata' + reader = RawDataReader('OCEC', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + self.assertIsNotNone(reader) + self.validate_data(reader) + + # def test_fs_ocec(self): + # path_raw = self.base_path / 'FS' / 'data' / 'OCEC' + # reader = RawDataReader('OCEC', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + # self.assertIsNotNone(reader) + # self.validate_data(reader) + + def test_smps(self): + path_raw = self.base_path / 'NZ' / 'data' / 'SMPS' + reader = RawDataReader('SMPS', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + self.assertIsNotNone(reader) + self.validate_data(reader) + + # def test_aps(self): + # path_raw = self.base_path / 'NZ' / 'data' / 'APS' + # reader = RawDataReader('APS', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + # self.assertIsNotNone(reader) + # self.validate_data(reader) + + def test_nz_minion(self): + path_raw = self.base_path / 'NZ' / 'data' / 'Minion' + reader = RawDataReader('Minion', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + self.assertIsNotNone(reader) + self.validate_data(reader) + + def test_fs_minion(self): + path_raw = self.base_path / 'FS' / 'data' / 'Minion' + reader = RawDataReader('Minion', path_raw, reset=True, qc_freq='1MS', start=self.start, end=self.end) + self.assertIsNotNone(reader) + self.validate_data(reader) + + def validate_data(self, reader): + # 檢查日期範圍 + self.assertTrue((reader.index >= self.start).all() and (reader.index <= self.end).all()) + + # 檢查是否有數據 + self.assertFalse(reader.empty) + + # 檢查特定列是否存在(根據你的數據結構調整) + # if isinstance(reader, pd.DataFrame): + # expected_columns = ['PM2.5', 'NO2', 'AT'] # 根據實際數據調整 + # for col in expected_columns: + # self.assertIn(col, reader.columns) + + # 檢查數據類型 + # self.assertTrue(pd.api.types.is_numeric_dtype(reader['PM2.5'])) + + # 檢查是否有處理過的特殊符號 + # self.assertFalse((reader == '_').any().any()) + # self.assertFalse((reader == '*').any().any()) + + # def test_reset_functionality(self): + # path_raw = self.base_path / 'NZ' / 'data' / 'Minion' + # reader1 = RawDataReader('Minion', path_raw, reset=True, start=self.start, end=self.end) + # reader2 = RawDataReader('Minion', path_raw, reset=False, start=self.start, end=self.end) + # # 比較兩者的結果,這裡需要根據你的具體實現來定義比較邏輯 + # self.assertEqual(reader1.shape, reader2.shape) + + # def test_error_handling(self): + # with self.assertRaises(ValueError): + # RawDataReader('InvalidInstrument', self.base_path) + # with self.assertRaises(FileNotFoundError): + # RawDataReader('Minion', Path("non_existent_path")) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_import.py b/tests/test_import.py new file mode 100644 index 0000000..40df69f --- /dev/null +++ b/tests/test_import.py @@ -0,0 +1,19 @@ +import unittest + + +class TestAeroVizImports(unittest.TestCase): + def test_imports(self): + try: + import AeroViz + from AeroViz import plot + from AeroViz.dataProcess import DataProcess + from AeroViz.rawDataReader import RawDataReader + from AeroViz.tools import DataBase, DataClassifier + + self.assertTrue(True) + except ImportError as e: + self.fail(f"ImportError: {str(e)}") + + +if __name__ == '__main__': + unittest.main()