diff --git a/.github/bump_version.py b/.github/bump_version.py new file mode 100644 index 0000000..f946d95 --- /dev/null +++ b/.github/bump_version.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +import re + +with open("pyproject.toml", "r") as file: + version_content = file.read() +# Match regex for pattern +old_semantic_version = re.findall(r'version = "(\d+\.\d+\.[a-zA-Z0-9]+)"', version_content) +major_version, minor_version, patch_version = old_semantic_version[0].split(".") +patch_version = int(re.findall(r"\d+", patch_version)[0]) +new_semantic_version = f"{major_version}.{minor_version}.{patch_version + 1}" +regex_bumped_patch_version = f"\g<1>{new_semantic_version}" +# Match regex for pattern +bumped_version_content = re.sub(r'(version = ")\d+\.\d+\.[a-zA-Z0-9]+', regex_bumped_patch_version, version_content) +with open("pyproject.toml", "w") as file: + file.write(bumped_version_content) +print(new_semantic_version) # Print is required for release in GitHub action diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..eb03330 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,43 @@ + + +# Description + +Please include a summary of the changes and the related issue. Please also include relevant motivation and context. List any dependencies that are required for this change. + +Fixes # (issue) + +## Type of change + +Please delete options that are not relevant. + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] This change requires a documentation update + +# Jupyter Notebooks + +If your changes involve Jupyter notebooks please explicitly state here what the change consists of, e.g. only output cells have changes or specific input changes. This to make sure we capture these changes correctly in the review process. + +# How Has This Been Tested? + +Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration + +- [ ] Test A +- [ ] Test B + + +# Checklist: + +- [ ] My code follows the style guidelines of this project +- [ ] I have performed a self-review of my code +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new warnings +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes +- [ ] Any dependent changes have been merged and published in downstream modules + diff --git a/.github/workflows/build_package.yml b/.github/workflows/build_package.yml new file mode 100644 index 0000000..23956d9 --- /dev/null +++ b/.github/workflows/build_package.yml @@ -0,0 +1,34 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: Building the package +on: + push: + branches: + - 'main' +jobs: + Build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.11" ] + steps: + - name: Checkout Repo + uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --upgrade build + - name: Build the package + run: | + python -m build + - name: Upload build files + uses: actions/upload-artifact@v3 + with: + name: pyelq_whl + path: ./dist/*.whl diff --git a/.github/workflows/code_formatting.yml b/.github/workflows/code_formatting.yml new file mode 100644 index 0000000..2d5f965 --- /dev/null +++ b/.github/workflows/code_formatting.yml @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: Code formatting +on: + - push +jobs: + Black: + runs-on: ubuntu-latest + strategy: + matrix: + # Specify all python versions you might want to perform the actions on + python-version: [ "3.11" ] + steps: + - name: Checkout Repo + uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black + pip install isort + - name: Run isort, docformatter and black checks + run: | + isort . --check + black . --check + - name: Run isort and black when required and commit back + if: failure() + env: + GITHUB_ACCESS_TOKEN: ${{ secrets.PYELQ_TOKEN }} + run: | + isort . + black . + git config --global user.name 'code_reformat' + git config --global user.email '' + git remote set-url origin "https://$GITHUB_ACCESS_TOKEN@github.com/$GITHUB_REPOSITORY" + git commit --signoff -am "Automatic reformat of code" + git push diff --git a/.github/workflows/publish_docs.yml b/.github/workflows/publish_docs.yml new file mode 100644 index 0000000..7c2b12d --- /dev/null +++ b/.github/workflows/publish_docs.yml @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: publish documentation +on: + push: + branches: + - main +permissions: + contents: write +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: 3.x + - uses: actions/cache@v2 + with: + key: ${{ github.ref }} + path: .cache + - run: pip install mkdocs-material + - run: pip install mkdocstrings-python + - run: mkdocs gh-deploy --force diff --git a/.github/workflows/pydocstyle_check.yml b/.github/workflows/pydocstyle_check.yml new file mode 100644 index 0000000..336a187 --- /dev/null +++ b/.github/workflows/pydocstyle_check.yml @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: pydocstyle +on: + - push +jobs: + pydocstyle: + runs-on: ubuntu-latest + strategy: + matrix: + # Specify all python versions you might want to perform the actions on + python-version: [ "3.11" ] + steps: + - name: Checkout Repo + uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pydocstyle + - name: Run PydocStyle check + run: | + pydocstyle . \ No newline at end of file diff --git a/.github/workflows/pylint_check.yml b/.github/workflows/pylint_check.yml new file mode 100644 index 0000000..845db8c --- /dev/null +++ b/.github/workflows/pylint_check.yml @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +on: + - push + +name: Pylint Check +jobs: + Pylint: + # Specify the operating system GitHub has to use to perform the checks (ubuntu seems to be default) + runs-on: ubuntu-latest + strategy: + matrix: + # Specify all python versions you might want to perform the actions on + python-version: ["3.11"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + pip install . + - name: Analysing the code with pylint + # Run through the src/pyelq/ directory and check all .py files with pylint + run: | + python -m pylint `find -regextype egrep -regex '(.*src/pyelq/.*.py)$'` --output-format=parseable:pylint_report.out + - name: Upload pylint results + uses: actions/upload-artifact@v3 + with: + name: pylint_report + path: pylint_report.out + if: ${{ always() }} \ No newline at end of file diff --git a/.github/workflows/release_tagging.yml b/.github/workflows/release_tagging.yml new file mode 100644 index 0000000..aaa0358 --- /dev/null +++ b/.github/workflows/release_tagging.yml @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: ReleaseTag + +# Trigger when a python file is changed on main branch either from pull request or push +# but not when only pyproject.toml is changed due to version bump +on: + push: + branches: + - 'main' + paths: + - '**.py' + - '!pyproject.toml' + - 'requirements.txt' + +jobs: + # Releases new Python version when Pull Requests are merged into "main" + Release: + runs-on: ubuntu-latest + strategy: + matrix: + # Specify all python versions you might want to perform the actions on + python-version: [ "3.11" ] + steps: + # Checkout + - name: Checkout + uses: actions/checkout@v3 + with: + persist-credentials: false + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Bump version and commit bumped version back to branch + env: + GITHUB_ACCESS_TOKEN: ${{ secrets.PYELQ_TOKEN + id: version + run: | + version=$(python .github/bump_version.py) + git config --global user.name 'bump_version' + git config --global user.email 'action@github.com' + git remote set-url origin "https://$GITHUB_ACCESS_TOKEN@github.com/$GITHUB_REPOSITORY" + git commit --signoff -am "Bumped minor version" + git push + echo "BUMPED_VERSION=$(echo v$version)" >> $GITHUB_ENV + echo "New version: $version" + - name: Create Release + run: gh release create ${{ env.BUMPED_VERSION }} --generate-notes + env: + GITHUB_TOKEN: ${{ secrets.PYELQ_TOKEN }} diff --git a/.github/workflows/reuse_compliance.yml b/.github/workflows/reuse_compliance.yml new file mode 100644 index 0000000..dc08574 --- /dev/null +++ b/.github/workflows/reuse_compliance.yml @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: REUSE Compliance Check + +on: + - push + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: checkout + uses: actions/checkout@v4 + - name: REUSE Compliance Check + uses: fsfe/reuse-action@v2 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d341553 --- /dev/null +++ b/.gitignore @@ -0,0 +1,287 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# Data files +*.h5 +*.nc + +# Shapefiles +*.CPG +*.dbf +*.prj +*.sbn +*.sbx +*.shp +*.shp.xml +*.shx + +# environment +*.env + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +*.egg-info/ +*.dist-info/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Dependency files +poetry.lock + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/build/ +output/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ +/.mapbox_token + + +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 + +## Intermediate documents: +*.dvi +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" +.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.gtex + +# hyperref +*.brf + +*.pdf + +*.png +*.jpg +*.jpeg + +# Sonarqube files +.scannerwork/ diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..3f20c26 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,576 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold to be exceeded before program exits with error. +fail-under=9.0 + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the ignore-list. The +# regex matches against paths and can be in Posix or Windows format. +ignore-paths= + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.9 + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'error', 'warning', 'refactor', and 'convention' +# which contain the number of messages in each category, as well as 'statement' +# which is the total number of statements analyzed. This score is used by the +# global evaluation report (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt.,,,,,, +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _, + x, + y, + z, + up + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +variable-rgx=^[a-zA-Z][a-z0-9]*((_[a-z0-9]+)*)?$ + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=120 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +#notes-rgx= + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=no + +# Signatures are removed from the similarity computation +ignore-signatures=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear and the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether missing members accessed in mixin class should be ignored. A +# class is considered mixin if its name matches the mixin-class-rgx option. +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins ignore-mixin- +# members is set to 'yes' +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "BaseException, Exception". +overgeneral-exceptions=builtins.BaseException, + builtins.Exception diff --git a/CODEOWNERS.md b/CODEOWNERS.md new file mode 100644 index 0000000..78d4b2f --- /dev/null +++ b/CODEOWNERS.md @@ -0,0 +1,12 @@ + + + +| Name | GitHub ID | +|--------------------| ----------------:| +| Bas van de Kerkhof | bvandekerkhof | +| Matthew Jones | mattj89 | +| David Randell | davidrandell84 | \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..1232365 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,48 @@ + + +# Getting started with contributing +We're happy for everyone to contribute to the package by proposing new features, implementing them in a new branch and +creating a pull request. In order to keep the codebase consistent we use some common standards and tools for formatting +of the code. We are using poetry to keep our development environment up to date. Please follow the instructions here +https://python-poetry.org/docs/ to install poetry. Next, pull the repo to your local machine, open a terminal window +and navigate to the top directory of this package. Run the commands `poetry install --all-extras` and +`poetry install --with contributor` to install all required tools and dependencies for contributing to this package. + +We list the various tools below: +- pylint: Tool to help with the formatting of the code, can be used as a linter in most IDEs, all relevant settings are +contained in the .pylintrc file and additionally controlled through the pyproject.toml file. +- isort: Sorts the inputs, can be used from the command line `isort .`, use the `--check` flag if you do not want to +reformat the import statements in place but just want to check if imports need to be reformatted. +- black: Formats the code based on PEP standards, can be used from the command line: `black .`, use the `--check` flag +if you do not want to reformat the code in place but just check if files need to be reformatted. +- pydocstyle: Checks if the docstrings for all files and functions are present and follow the same style as specified +in the pyproject.toml file. Used in order to get consistent documentation, can be used as a check from the command line +but will not be able to replace any text, `pydocstyle .` + +In case you're unfamiliar with the tools, don't worry we have set up GitHub actions accordingly to format the code to +standard automatically on each push. + +When you implement a new feature you also need to write additional (unit) tests to show the feature you've implemented +is also working as it should. Do so by creating a file in the appropriate test folder and call that file +test_.py. Use pytest to see if your test is passing and use pytest-cov to check the coverage of your +test. The settings in the pyproject.toml file are such that we automatically test for coverage. You can run all tests +through the command line `pytest .`, use the `--cov-report term-missing` flag to show which lines are missing in the +coverage. All test are required to pass before merging into main. + +Whenever we merge new code into main, the release version gets automatically incremented as a micro version update. +Minor and major version releases need to be labeled manually. Version release convention used is major.minor.micro. + +# Notice + +The [codeowners](https://github.com/sede-open/pyELQ//blob/main/CODEOWNERS.md) reserve the right to deny applications +for ‘maintainer’ status or contributions if +the prospective maintainer or contributor is a national of and/or located in a ‘Restricted Jurisdiction’. +(A Restricted Jurisdiction is defined as a country, state, territory or region which is subject to comprehensive +trade sanctions or embargoes namely: Iran, Cuba, North Korea, Syria, the Crimea region of Ukraine (including +Sevastopol) and non-Government controlled areas of Donetsk and Luhansk). For anyone to be promoted to 'maintainer' +status, the prospective maintainer will be required to provide information on their nationality, location, and +affiliated organizations \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSES/Apache-2.0.txt b/LICENSES/Apache-2.0.txt new file mode 100644 index 0000000..137069b --- /dev/null +++ b/LICENSES/Apache-2.0.txt @@ -0,0 +1,73 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e113b44 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ + + + +# pyELQ +This repository contains the Python Emission Localization and Quantification software we call pyELQ. It is code used +for gas dispersion modelling, in particular methane emissions detection, localization and quantification. + +The main code is structured in the src folder in an object-oriented way. +*** +# Background +The **py**thon **E**mission **L**ocalization and **Q**uantification (pyELQ) code aims to maximize effective use of +existing measurement data, especially from continuous monitoring solutions. The code has been developed to detect, +localize, and quantify methane emissions from concentration and wind measurements. It can be used in combination with +point or beam sensors that are placed strategically on an area of interest. + +The algorithms in the pyELQ code are based a Bayesian statistics framework. pyELQ can ingest long-term concentration +and wind data, and it performs an inversion to predict the likely strengths and locations of persistent methane sources. +The goal is to arrive at a plausible estimate of methane emissions from an area of interest that matches the measured +data. The predictions from pyELQ come with uncertainty ranges that are representative of probability density functions +sampled by a Markov Chain Monte Carlo method. Time series of varying length can be processed by pyELQ: in general, +the Bayesian inversion leads to a more constrained solution if more high-precision measurement data is available. +We have tested our code under controlled conditions as well as in operating oil and gas facilities. + +The information on the strength and the approximate location of methane emission sources provided by pyELQ can help +operators with more efficient identification and quantification of (unexpected) methane sources, in order to start +appropriate mitigating actions accordingly. The pyELQ code is being made available in an open-source environment, +to support various assets in their quest to reduce methane emissions. + +Use cases where the pyELQ code has been applied are described in the following papers: + +* IJzermans, R., Jones, M., Weidmann, D. et al. "Long-term continuous monitoring of methane emissions at an oil and gas facility using a multi-open-path laser dispersion spectrometer." Sci Rep 14, 623 (2024). (https://doi.org/10.1038/s41598-023-50081-9) + +* Weidmann, D., Hirst, B. et al. "Locating and Quantifying Methane Emissions by Inverse Analysis of Path-Integrated Concentration Data Using a Markov-Chain Monte Carlo Approach." ACS Earth and Space Chemistry 2022 6 (9), 2190-2198 (https://doi.org/10.1021/acsearthspacechem.2c00093) +*** +# Installing pyELQ as a package +Suppose you want to use pyELQ in a different project. You can install it just like a Python package. +After activating the environment you want to install pyELQ in, open a terminal, move to the main pyELQ folder +where pyproject.toml is located and run `pip install .`, optionally you can pass the `-e` flag is for editable mode. +All the main options, info and settings for the package are found in the pyproject.toml file which sits in this repo +as well. + +*** + +# Examples +For some examples on how to use this package please check out these [Examples](https://github.com/sede-open/pyELQ/blob/main/examples) + +*** + +# Contribution +This project welcomes contributions and suggestions. If you have a suggestion that would make this better you can simply open an issue with a relevant title. Don't forget to give the project a star! Thanks again! + +For more details on contributing to this repository, see the [Contributing guide](https://github.com/sede-open/pyELQ/blob/main/CONTRIBUTING.md). + +*** +# Licensing + +Distributed under the Apache License Version 2.0. See the [license file](https://github.com/sede-open/pyELQ/blob/main/LICENSE.txt) for more information. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..5a934a9 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,7 @@ + + +--8<-- "README.md" \ No newline at end of file diff --git a/docs/pyelq/component/background.md b/docs/pyelq/component/background.md new file mode 100644 index 0000000..4f665a1 --- /dev/null +++ b/docs/pyelq/component/background.md @@ -0,0 +1,9 @@ + + +# Background + +::: pyelq.component.background diff --git a/docs/pyelq/component/component.md b/docs/pyelq/component/component.md new file mode 100644 index 0000000..5079929 --- /dev/null +++ b/docs/pyelq/component/component.md @@ -0,0 +1,21 @@ + + +# Component classes + +An overview of the component classes: + +- [Background Model](./background.md) + +- [Error Model](./error_model.md) + +- [Offset](./offset.md) + +- [Source Model](./source_model.md) + +## Component superclass + +::: pyelq.component.component diff --git a/docs/pyelq/component/error_model.md b/docs/pyelq/component/error_model.md new file mode 100644 index 0000000..237cbad --- /dev/null +++ b/docs/pyelq/component/error_model.md @@ -0,0 +1,9 @@ + + +# Error Model + +::: pyelq.component.error_model diff --git a/docs/pyelq/component/offset.md b/docs/pyelq/component/offset.md new file mode 100644 index 0000000..3a227ca --- /dev/null +++ b/docs/pyelq/component/offset.md @@ -0,0 +1,9 @@ + + +# Offset + +::: pyelq.component.offset diff --git a/docs/pyelq/component/source_model.md b/docs/pyelq/component/source_model.md new file mode 100644 index 0000000..a3b0bd7 --- /dev/null +++ b/docs/pyelq/component/source_model.md @@ -0,0 +1,9 @@ + + +# Source Model + +::: pyelq.component.source_model diff --git a/docs/pyelq/coordinate_system.md b/docs/pyelq/coordinate_system.md new file mode 100644 index 0000000..6ba95f0 --- /dev/null +++ b/docs/pyelq/coordinate_system.md @@ -0,0 +1,9 @@ + + +# Coordinate System + +::: pyelq.coordinate_system diff --git a/docs/pyelq/data_access/data_access.md b/docs/pyelq/data_access/data_access.md new file mode 100644 index 0000000..629f84b --- /dev/null +++ b/docs/pyelq/data_access/data_access.md @@ -0,0 +1,9 @@ + + +# Data access + +::: pyelq.data_access.data_access diff --git a/docs/pyelq/dispersion_model/gaussian_plume.md b/docs/pyelq/dispersion_model/gaussian_plume.md new file mode 100644 index 0000000..85ef6a7 --- /dev/null +++ b/docs/pyelq/dispersion_model/gaussian_plume.md @@ -0,0 +1,9 @@ + + +# Gaussian Plume + +::: pyelq.dispersion_model.gaussian_plume \ No newline at end of file diff --git a/docs/pyelq/dlm.md b/docs/pyelq/dlm.md new file mode 100644 index 0000000..affd65c --- /dev/null +++ b/docs/pyelq/dlm.md @@ -0,0 +1,9 @@ + + +# DLM + +::: pyelq.dlm \ No newline at end of file diff --git a/docs/pyelq/gas_species.md b/docs/pyelq/gas_species.md new file mode 100644 index 0000000..732ddf5 --- /dev/null +++ b/docs/pyelq/gas_species.md @@ -0,0 +1,9 @@ + + +# Gas Species + +::: pyelq.gas_species \ No newline at end of file diff --git a/docs/pyelq/meteorology.md b/docs/pyelq/meteorology.md new file mode 100644 index 0000000..90dc3fb --- /dev/null +++ b/docs/pyelq/meteorology.md @@ -0,0 +1,9 @@ + + +# Meteorology + +::: pyelq.meteorology diff --git a/docs/pyelq/model.md b/docs/pyelq/model.md new file mode 100644 index 0000000..6da1e87 --- /dev/null +++ b/docs/pyelq/model.md @@ -0,0 +1,9 @@ + + +# Model + +::: pyelq.model \ No newline at end of file diff --git a/docs/pyelq/plotting/plot.md b/docs/pyelq/plotting/plot.md new file mode 100644 index 0000000..69b2704 --- /dev/null +++ b/docs/pyelq/plotting/plot.md @@ -0,0 +1,9 @@ + + +# Plot + +::: pyelq.plotting.plot \ No newline at end of file diff --git a/docs/pyelq/preprocessing.md b/docs/pyelq/preprocessing.md new file mode 100644 index 0000000..f065fc6 --- /dev/null +++ b/docs/pyelq/preprocessing.md @@ -0,0 +1,9 @@ + + +# Pre-processing + +::: pyelq.preprocessing \ No newline at end of file diff --git a/docs/pyelq/sensor/beam.md b/docs/pyelq/sensor/beam.md new file mode 100644 index 0000000..b2928b3 --- /dev/null +++ b/docs/pyelq/sensor/beam.md @@ -0,0 +1,9 @@ + + +# Beam + +::: pyelq.sensor.beam \ No newline at end of file diff --git a/docs/pyelq/sensor/satellite.md b/docs/pyelq/sensor/satellite.md new file mode 100644 index 0000000..fa78418 --- /dev/null +++ b/docs/pyelq/sensor/satellite.md @@ -0,0 +1,9 @@ + + +# Satellite + +::: pyelq.sensor.satellite \ No newline at end of file diff --git a/docs/pyelq/sensor/sensor.md b/docs/pyelq/sensor/sensor.md new file mode 100644 index 0000000..fcd5c60 --- /dev/null +++ b/docs/pyelq/sensor/sensor.md @@ -0,0 +1,17 @@ + + +# Sensor classes + +An overview of the sensor classes: + +- [Beam](./beam.md) + +- [Satellite](./satellite.md) + +## Sensor superclass + +::: pyelq.sensor.sensor diff --git a/docs/pyelq/source_map.md b/docs/pyelq/source_map.md new file mode 100644 index 0000000..c5e9751 --- /dev/null +++ b/docs/pyelq/source_map.md @@ -0,0 +1,9 @@ + + +# Source Map + +::: pyelq.source_map \ No newline at end of file diff --git a/docs/pyelq/support_functions/spatio_temporal_interpolation.md b/docs/pyelq/support_functions/spatio_temporal_interpolation.md new file mode 100644 index 0000000..d931b5d --- /dev/null +++ b/docs/pyelq/support_functions/spatio_temporal_interpolation.md @@ -0,0 +1,9 @@ + + +# Spatio-Temporal Interpolation + +::: pyelq.support_functions.spatio_temporal_interpolation diff --git a/examples/example.ipynb b/examples/example.ipynb new file mode 100644 index 0000000..bbad558 --- /dev/null +++ b/examples/example.ipynb @@ -0,0 +1,630 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6e618dc8-b431-4850-80ce-7f53246ae382", + "metadata": {}, + "source": [ + "# How to use pyELQ\n", + "This example is intended to provide a basic overview on how to work with the pyELQ repo. We will set up a basic example where we generate some concentration data and try to estimate the source location and emisson rate of these synthetic sources.\n", + "\n", + "First we import all the required packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b315a63d-29d4-4163-9d73-50ed9622eec5", + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "from copy import deepcopy\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import plotly.graph_objects as go\n", + "from pyelq.component.background import SpatioTemporalBackground\n", + "from pyelq.component.error_model import BySensor\n", + "from pyelq.component.offset import PerSensor\n", + "from pyelq.component.source_model import Normal\n", + "from pyelq.coordinate_system import ENU, LLA\n", + "from pyelq.dispersion_model.gaussian_plume import GaussianPlume\n", + "from pyelq.gas_species import CH4\n", + "from pyelq.model import ELQModel\n", + "from pyelq.meteorology import Meteorology\n", + "from pyelq.plotting.plot import Plot\n", + "from pyelq.preprocessing import Preprocessor\n", + "from pyelq.sensor.beam import Beam\n", + "from pyelq.sensor.sensor import Sensor, SensorGroup\n", + "from pyelq.source_map import SourceMap" + ] + }, + { + "cell_type": "markdown", + "id": "d1df63d6-8fc1-4c02-991a-703dbf3fd17f", + "metadata": {}, + "source": [ + "Next we set up a time axis as well as a reference point which we use in our local coordinate systems." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da0c9d2b-eb55-4bc2-b03f-557a3eff352a", + "metadata": {}, + "outputs": [], + "source": [ + "time_axis = pd.arrays.DatetimeArray(pd.date_range(start=\"2024-01-01 08:00:00\", end=\"2024-01-01 12:00:00\", freq=\"120s\"))\n", + "nof_observations = time_axis.size\n", + "reference_latitude = 0\n", + "reference_longitude = 0\n", + "reference_altitude = 0" + ] + }, + { + "cell_type": "markdown", + "id": "8de54af3-325d-498f-aca5-0ed57b202db8", + "metadata": {}, + "source": [ + "We define a couple of regularly spaced beam sensors by creating a local ENU frame, defining the beam end points in that coordinate frame and next transform the ENU coordinates to LLA coordinates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a813265-29eb-4737-b8a4-76e1f2733747", + "metadata": {}, + "outputs": [], + "source": [ + "radius = 30\n", + "angles = np.linspace(0, 90, 5)\n", + "sensor_x = radius * np.cos(angles*np.pi/180)\n", + "sensor_y = radius * np.sin(angles*np.pi/180)\n", + "sensor_z = np.ones_like(sensor_x) * 5.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4df901ae-fd64-4836-8120-cb02533282fd", + "metadata": {}, + "outputs": [], + "source": [ + "ENU_object = ENU(ref_latitude=reference_latitude, ref_longitude=reference_longitude, ref_altitude=reference_altitude)\n", + "ENU_object.from_array(np.vstack([sensor_x, sensor_y, sensor_z]).T)\n", + "LLA_object = ENU_object.to_lla()\n", + "LLA_array = LLA_object.to_array()\n", + "print(LLA_array)" + ] + }, + { + "cell_type": "markdown", + "id": "bd5c7329-c005-49ed-bc8b-c812c2b4f02a", + "metadata": {}, + "source": [ + "We create a SensorGroup which contains all the 5 beams we have set up. We set the sensor position (beam start points) to be at the reference latitude and longitude. At an altitude of 3 meters, similar to the beam end points. The beam layout can be seen in the plot. We initialize the concentration and the time attributes of the sensor so we can use it later to calculate the simulated concentration observations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3338ee8-3c47-40eb-8721-b014f1c74f88", + "metadata": {}, + "outputs": [], + "source": [ + "nof_sensors = LLA_array.shape[0]\n", + "sensor_group = SensorGroup()\n", + "for sensor in range(nof_sensors):\n", + " new_sensor = Beam()\n", + " new_sensor.label = f\"Beam sensor {sensor}\"\n", + " new_sensor.location = LLA(\n", + " latitude=np.array([reference_latitude, LLA_object.latitude[sensor]]),\n", + " longitude=np.array([reference_longitude, LLA_object.longitude[sensor]]),\n", + " altitude=np.array([5.0, LLA_object.altitude[sensor]])\n", + " )\n", + " \n", + " new_sensor.time = time_axis\n", + " new_sensor.concentration = np.zeros(nof_observations)\n", + " sensor_group.add_sensor(new_sensor)" + ] + }, + { + "cell_type": "markdown", + "id": "717fba57-ff5f-4977-9fcb-d0969182bd0e", + "metadata": {}, + "source": [ + "Let's also add some point sensors to our SensorGroup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7452c52-be86-467c-a2dd-54e57892b637", + "metadata": {}, + "outputs": [], + "source": [ + "sensor_x = np.array([5, 20])\n", + "sensor_y = np.array([22, 5])\n", + "sensor_z = np.ones_like(sensor_x) * 1.0\n", + "ENU_object = ENU(ref_latitude=reference_latitude, ref_longitude=reference_longitude, ref_altitude=reference_altitude)\n", + "ENU_object.from_array(np.vstack([sensor_x, sensor_y, sensor_z]).T)\n", + "LLA_object = ENU_object.to_lla()\n", + "LLA_array = LLA_object.to_array()\n", + "\n", + "nof_sensors = LLA_array.shape[0]\n", + "for sensor in range(nof_sensors):\n", + " new_sensor = Sensor()\n", + " new_sensor.label = f\"Point sensor {sensor}\"\n", + " new_sensor.location = LLA(\n", + " latitude=np.array([LLA_object.latitude[sensor]]),\n", + " longitude=np.array([LLA_object.longitude[sensor]]),\n", + " altitude=np.array([LLA_object.altitude[sensor]])\n", + " )\n", + " \n", + " new_sensor.time = time_axis\n", + " new_sensor.concentration = np.zeros(nof_observations)\n", + " sensor_group.add_sensor(new_sensor)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d23ea54d-cbf9-40a4-9fc0-bd54037991fb", + "metadata": {}, + "outputs": [], + "source": [ + "fig=go.Figure()\n", + "fig = sensor_group.plot_sensor_location(fig=fig)\n", + "fig.update_layout(mapbox_style=\"open-street-map\", mapbox_center=dict(lat=reference_latitude, lon=reference_longitude),\n", + " mapbox_zoom=18, height=800, margin={\"r\":0,\"l\":0,\"b\":0})\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "b4048ff4-f85d-498b-a468-3eb32af208a0", + "metadata": {}, + "source": [ + "We use the meteorology object to store the simulated meteorology observations like wind speed and direction and show these in a wind rose plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "703e57c6-246f-429b-ac5d-050539bb81fa", + "metadata": {}, + "outputs": [], + "source": [ + "met_object = Meteorology()\n", + "\n", + "met_object.time = time_axis\n", + "met_object.wind_direction = np.linspace(0.0, 90.0, nof_observations) + np.random.normal(loc=0.0, scale=0.1, size=nof_observations)\n", + "met_object.wind_speed = 4.0 * np.ones_like(met_object.wind_direction) + np.random.normal(loc=0.0, scale=0.1, size=nof_observations)\n", + "\n", + "met_object.calculate_uv_from_wind_speed_direction()\n", + "\n", + "met_object.temperature = (273.1 + 15.0) * np.ones_like(met_object.wind_direction)\n", + "met_object.pressure = 101.325 * np.ones_like(met_object.wind_direction)\n", + "\n", + "met_object.wind_turbulence_horizontal = 5.0 * np.ones_like(met_object.wind_direction)\n", + "met_object.wind_turbulence_vertical = 5.0 * np.ones_like(met_object.wind_direction)\n", + "\n", + "fig = met_object.plot_polar_hist()\n", + "fig.update_layout(height=400, margin={\"r\":0,\"l\":0})\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e5e6644-03a3-4357-904d-88f3ede974b2", + "metadata": {}, + "outputs": [], + "source": [ + "fig = go.Figure()\n", + "fig.add_trace(go.Scatter(x=time_axis, y=met_object.wind_direction, mode='markers', name='Wind direction'))\n", + "fig.update_layout(height=400, margin={\"r\":0,\"l\":0}, title='Wind Direction [deg]')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "a0e6ef8f-ad67-40b5-a762-59280b34ee82", + "metadata": {}, + "source": [ + "We set up a source map which contains the location information of the simulated sources. We define them in a certain location but could also let this object generate sources using for example a latin hypercube design within the specifies site limits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "379f3820-ea9c-4cc0-ae94-1558e2eb21a4", + "metadata": {}, + "outputs": [], + "source": [ + "source_map = SourceMap()\n", + "site_limits = np.array([[0, 30],\n", + " [0, 30],\n", + " [0, 3]])\n", + "location_object = ENU(ref_latitude=reference_latitude, ref_longitude=reference_longitude, ref_altitude=reference_altitude)\n", + "\n", + "source_map.generate_sources(coordinate_object=location_object, sourcemap_limits=site_limits, sourcemap_type=\"hypercube\", nof_sources=2)\n", + "\n", + "source_map.location.up = np.array([2.0, 3.0])\n", + "source_map.location.east = np.array([10.0, 20.0])\n", + "source_map.location.north = np.array([20.0, 15.0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38504528-3537-4bd5-8f51-2bedca548fa1", + "metadata": {}, + "outputs": [], + "source": [ + "fig=go.Figure()\n", + "fig = sensor_group.plot_sensor_location(fig=fig)\n", + "fig.update_layout(mapbox_style=\"open-street-map\", mapbox_center=dict(lat=reference_latitude, lon=reference_longitude),\n", + " mapbox_zoom=18, height=800, margin={\"r\":0,\"l\":0,\"b\":0})\n", + "fig.add_trace(go.Scattermapbox(mode=\"markers\",\n", + " lon=source_map.location.to_lla().longitude,\n", + " lat=source_map.location.to_lla().latitude,\n", + " name=\"True locations\",\n", + " marker=go.scattermapbox.Marker(color=\"green\", size=10))\n", + " )\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "bce2eafe-acbf-40c9-8e3d-94f348bc660c", + "metadata": {}, + "source": [ + "After defining the gas species we are interested in we set the true emission rates and generate the real observations. We calculate the coupling from each source to each sensor using a Gaussian plume model and multiply this coupling with the emission rates of the respective sources. We add this source contribution to the background (in this case 2.0 ppm) and also add some random (measurement) noise. These simulated observations are populating the concentration attribute of each sensor in the sensorgroup. The resulting data is shown in the plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5c3972b-0ad4-4c83-b13c-3345350fe501", + "metadata": {}, + "outputs": [], + "source": [ + "gas_object = CH4()\n", + "dispersion_model = GaussianPlume(source_map=deepcopy(source_map))\n", + "true_emission_rates = np.array([[15], [10]])\n", + "for current_sensor in sensor_group.values():\n", + " coupling_matrix = dispersion_model.compute_coupling(sensor_object=current_sensor, meteorology_object=met_object,\n", + " gas_object=gas_object, output_stacked=False, run_interpolation=False)\n", + " source_contribution = coupling_matrix @ true_emission_rates\n", + " observation = source_contribution.flatten() + 2.0 + np.random.normal(loc=0.0, scale=0.01, size=current_sensor.nof_observations)\n", + " current_sensor.concentration = observation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "734d39c9-63e1-4818-bcba-5c4f4e6eeede", + "metadata": {}, + "outputs": [], + "source": [ + "fig=go.Figure()\n", + "fig = sensor_group.plot_timeseries(fig=fig)\n", + "fig.update_layout(height=800, margin={\"r\":0,\"t\":10,\"l\":0,\"b\":0})\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e753113", + "metadata": {}, + "outputs": [], + "source": [ + "fig = go.Figure()\n", + "fig = met_object.plot_polar_scatter(fig=fig, sensor_object=sensor_group)\n", + "fig.update_layout(height=400, margin={\"r\":0,\"l\":0})\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "c3f5c90e-e753-4799-9af7-6d4ac835d69e", + "metadata": {}, + "source": [ + "Normally these raw datasets need some preprocessing like smoothing the data and making sure all the time axes align. Therefore we created the preprocessor class. We show the functionality to apply an aggregate function on the data within the user specified time bins and also show how to apply a wind filter, even though the wind speeds we simulated are most likely all larger than the lower limit." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "625bf435-57c5-4688-a858-335d7a551acb", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_time_range = [datetime.datetime(2024, 1, 1, 8, 0, 0), datetime.datetime(2024, 1, 1, 12, 0, 0)]\n", + "\n", + "smoothing_period = 10 * 60\n", + "\n", + "time_bin_edges = pd.arrays.DatetimeArray(pd.date_range(analysis_time_range[0], analysis_time_range[1], freq=f'{smoothing_period}s'))\n", + "\n", + "prepocessor_object = Preprocessor(time_bin_edges=time_bin_edges, sensor_object=sensor_group, met_object=met_object,\n", + " aggregate_function=\"median\")\n", + "\n", + "min_wind_speed = 0.05\n", + "prepocessor_object.filter_on_met(filter_variable=[\"wind_speed\"], lower_limit=[min_wind_speed], upper_limit=[np.infty])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cd96e2d-2f35-464a-b088-1b8f3f704316", + "metadata": {}, + "outputs": [], + "source": [ + "fig=go.Figure()\n", + "fig = prepocessor_object.sensor_object.plot_timeseries(fig=fig)\n", + "fig.update_layout(height=800, margin={\"r\":0,\"t\":0,\"l\":0,\"b\":0})\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "287631c6-a3c2-4e9b-a531-310f20337f09", + "metadata": {}, + "source": [ + "We set up the different parameters for our MCMC model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e1d54a0-f3da-4cd4-a171-f12941ba6e3a", + "metadata": {}, + "outputs": [], + "source": [ + "source_model = Normal()\n", + "source_model.emission_rate_mean = np.array([0], ndmin=1)\n", + "source_model.initial_precision = np.array([1 / (2.5 ** 2)], ndmin=1)\n", + "source_model.reversible_jump = True\n", + "source_model.rate_num_sources = 1.0\n", + "source_model.dispersion_model = dispersion_model\n", + "source_model.update_precision = False\n", + "source_model.site_limits = site_limits\n", + "source_model.coverage_detection = 0.1 # ppm\n", + "source_model.coverage_test_source = 3.0 # kg/hr\n", + "\n", + "background = SpatioTemporalBackground()\n", + "background.n_time = None\n", + "background.mean_bg = 2.0\n", + "background.spatial_dependence = True\n", + "background.initial_precision = 1 / np.power(3e-4, 2)\n", + "background.precision_time_0 = 1 / np.power(0.1, 2)\n", + "background.spatial_correlation_param = 25.0\n", + "background.update_precision = False\n", + "\n", + "offset_model = PerSensor()\n", + "offset_model.update_precision = False\n", + "offset_model.initial_precision = 1 / (0.001)**2\n", + "\n", + "error_model = BySensor()\n", + "error_model.initial_precision = 1 / (0.1)**2\n", + "error_model.prior_precision_shape = 1e-2\n", + "error_model.prior_precision_rate = 1e-2\n" + ] + }, + { + "cell_type": "markdown", + "id": "f4dad5da-e4f2-42db-baac-c5b7b28577d4", + "metadata": {}, + "source": [ + "We create an instance of the ELQModel class which forms the interface with the MCMC repo and run the MCMC algorithm. Finally we plot the results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "241d1ea1-9935-417b-b1d2-296983aca7b8", + "metadata": {}, + "outputs": [], + "source": [ + "elq_model = ELQModel(sensor_object=prepocessor_object.sensor_object, meteorology=prepocessor_object.met_object,\n", + " gas_species=gas_object, background=background, source_model=source_model,\n", + " error_model=error_model, offset_model=offset_model)\n", + "elq_model.n_iter = 5000\n", + "\n", + "elq_model.initialise()\n", + "\n", + "elq_model.to_mcmc()\n", + "elq_model.run_mcmc()\n", + "elq_model.from_mcmc()" + ] + }, + { + "cell_type": "markdown", + "id": "c2aeb999-c423-4e97-a0eb-781826d1d308", + "metadata": {}, + "source": [ + "Finally plotting the results in a separate cell. Note that you can plot all plots in 1 go as well using `plotter.show_all()`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "259ea88c-086b-4b4a-bbc3-d49761c22fdb", + "metadata": {}, + "outputs": [], + "source": [ + "burn_in = elq_model.n_iter-1000\n", + "\n", + "plotter = Plot()\n", + "\n", + "plotter.plot_quantification_results_on_map(model_object=elq_model, bin_size_x=1, bin_size_y=1, normalized_count_limit=0.1, burn_in=burn_in)\n", + "\n", + "plotter.plot_fitted_values_per_sensor(mcmc_object=elq_model.mcmc, sensor_object=elq_model.sensor_object, burn_in=burn_in)\n", + "\n", + "true_source_location_trace = go.Scattermapbox(mode=\"markers\",lon=source_map.location.to_lla().longitude,\n", + " lat=source_map.location.to_lla().latitude,name=\"True locations\",\n", + " marker=go.scattermapbox.Marker(color=\"green\", size=10))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82c1288e-4610-4b4c-946c-0a420d4358ee", + "metadata": {}, + "outputs": [], + "source": [ + "plotter.figure_dict[\"fitted_values\"].update_layout(height=800, margin={\"r\":0,\"t\":50,\"l\":0,\"b\":0}).show()" + ] + }, + { + "cell_type": "markdown", + "id": "b5341479-f129-4217-af2e-d703803cba53", + "metadata": {}, + "source": [ + "Note we could have also used this call to plot the fitted values figure:\n", + "`plotter = elq_model.plot_fitted_values(plot=plotter)`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46563585-d29e-4133-85be-810750da7d33", + "metadata": {}, + "outputs": [], + "source": [ + "plotter = elq_model.plot_fitted_values(plot=plotter)\n", + "plotter.figure_dict[\"fitted_values\"].update_layout(height=800, margin={\"r\":0,\"t\":50,\"l\":0,\"b\":0}).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bd52d03-4af7-4726-b3be-ec140bd0b2db", + "metadata": {}, + "outputs": [], + "source": [ + "plotter.figure_dict[\"count_map\"].add_trace(true_source_location_trace).update_traces(showlegend=True)\n", + "plotter.figure_dict[\"count_map\"].update_layout(height=800, margin={\"r\":0,\"t\":50,\"l\":0,\"b\":0}, mapbox_zoom=19)\n", + "plotter.figure_dict[\"count_map\"].show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0577aa7-6560-4bff-bb8a-25146eca36ff", + "metadata": {}, + "outputs": [], + "source": [ + "plotter.figure_dict[\"iqr_map\"].add_trace(true_source_location_trace).update_traces(showlegend=True)\n", + "plotter.figure_dict[\"iqr_map\"].update_layout(height=800, margin={\"r\":0,\"t\":50,\"l\":0,\"b\":0}, mapbox_zoom=19)\n", + "plotter.figure_dict[\"iqr_map\"].show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "458e0125-e64f-45a4-ba6d-c1a07f9d43ea", + "metadata": {}, + "outputs": [], + "source": [ + "plotter.figure_dict[\"median_map\"].add_trace(true_source_location_trace).update_traces(showlegend=True)\n", + "plotter.figure_dict[\"median_map\"].update_layout(height=800, margin={\"r\":0,\"t\":50,\"l\":0,\"b\":0}, mapbox_zoom=19)\n", + "plotter.figure_dict[\"median_map\"].show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b3ddc5e-d0f1-4570-a37d-5399249b85d0", + "metadata": {}, + "outputs": [], + "source": [ + "plotter = elq_model.plot_log_posterior(burn_in_value=burn_in, plot=plotter)\n", + "plotter.figure_dict[\"log_posterior_plot\"].show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "291e018b-afae-46d1-affb-71c99d3c3d31", + "metadata": {}, + "outputs": [], + "source": [ + "plotter = elq_model.components[\"source\"].plot_iterations(plot=plotter, burn_in_value=burn_in, y_axis_type=\"linear\")\n", + "plotter = elq_model.components[\"source\"].plot_iterations(plot=plotter, burn_in_value=burn_in, y_axis_type=\"log\")\n", + "\n", + "plotter.figure_dict[\"estimated_values_plot\"].show()\n", + "plotter.figure_dict[\"log_estimated_values_plot\"].show()\n", + "plotter.figure_dict[\"number_of_sources_plot\"].show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c2058b5-4655-46ea-ab41-5551840b69d6", + "metadata": {}, + "outputs": [], + "source": [ + "plotter = elq_model.components[\"offset\"].plot_iterations(plot=plotter, sensor_object=elq_model.sensor_object,burn_in_value=burn_in)\n", + "plotter = elq_model.components[\"offset\"].plot_distributions(plot=plotter, sensor_object=elq_model.sensor_object,burn_in_value=burn_in)\n", + "plotter.figure_dict[\"offset_iterations\"].show()\n", + "plotter.figure_dict[\"offset_distributions\"].show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf43b37-17fc-4fe0-85cb-8894b209edbd", + "metadata": {}, + "outputs": [], + "source": [ + "plotter = elq_model.components[\"error_model\"].plot_iterations(plot=plotter, sensor_object=elq_model.sensor_object,burn_in_value=burn_in)\n", + "plotter = elq_model.components[\"error_model\"].plot_distributions(plot=plotter, sensor_object=elq_model.sensor_object,burn_in_value=burn_in)\n", + "plotter.figure_dict[\"error_model_iterations\"].show()\n", + "plotter.figure_dict[\"error_model_distributions\"].show()" + ] + }, + { + "cell_type": "markdown", + "id": "740cba99-8865-41a2-8661-b4e083fc8f4c", + "metadata": {}, + "source": [ + "Finally we show all keys of figures present. As said before we could have just created all figures and perform one call to `plotter.show_all()` which shows all figures at once." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e288724-d16d-4cd7-8bd4-c529f5b02772", + "metadata": {}, + "outputs": [], + "source": [ + "list(plotter.figure_dict.keys())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/example.ipynb.license b/examples/example.ipynb.license new file mode 100644 index 0000000..e25c5d4 --- /dev/null +++ b/examples/example.ipynb.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. + +SPDX-License-Identifier: Apache-2.0 diff --git a/examples/example.pdf.license b/examples/example.pdf.license new file mode 100644 index 0000000..e25c5d4 --- /dev/null +++ b/examples/example.pdf.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. + +SPDX-License-Identifier: Apache-2.0 diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..356aaee --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,124 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +--- +# Project information +site_name: pyELQ Python Emission Localization and Quantification +site_author: pyELQ +site_description: >- + This repository contains the Python Emission Localization and Quantification software we call pyELQ. It is code used + for gas dispersion modelling, in particular methane emissions detection, localization and quantification. +# Repository +repo_name: pyELQ +repo_url: https://github.com/sede-open/pyELQ +edit_uri: "" + +docs_dir: docs + +# Configuration +theme: + name: material + # Default values, taken from mkdocs_theme.yml + language: en + features: + - content.code.annotate + - content.code.copy + - content.code.select + - content.tabs.link + - content.tooltips + #- navigation.expand + - navigation.indexes + - navigation.instant +# - navigation.sections + - navigation.tabs + # - navigation.tabs.sticky + - navigation.top + # - navigation.tracking + - search.highlight + - search.share + - search.suggest + - toc.follow + palette: + - scheme: default + primary: custom + accent: custom + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - scheme: slate + primary: custom + accent: custom + toggle: + icon: material/brightness-4 + name: Switch to light mode + font: + text: Roboto + code: Roboto Mono + icon: + tag: + pipelines: fontawesome/solid/timeline + +extra: + generator: false + tags: + Pipelines: pipelines + +plugins: + - search + - autorefs + - mkdocstrings: + handlers: + python: + paths: [src] + options: + members_order: source + docstring_style: "google" + - tags + +watch: + - src/pyelq + +markdown_extensions: + - attr_list + - md_in_html + - meta + - admonition + - pymdownx.details + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.tabbed: + alternate_style: true + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:materialx.emoji.to_svg # Page tree + - pymdownx.snippets: + url_download: true + +nav: + - Home: index.md + - pyELQ User Guide: + - Components: + - Overview: pyelq/component/component.md + - Background: pyelq/component/background.md + - Error Model: pyelq/component/error_model.md + - Offset: pyelq/component/offset.md + - Source Model: pyelq/component/source_model.md + - Coordinate System: pyelq/coordinate_system.md + - Data Access: pyelq/data_access/data_access.md + - Dispersion Model: pyelq/dispersion_model/gaussian_plume.md + - DLM: pyelq/dlm.md + - Gas Species: pyelq/gas_species.md + - Meteorology: pyelq/meteorology.md + - Model: pyelq/model.md + - Plotting: pyelq/plotting/plot.md + - Pre-Processing: pyelq/preprocessing.md + - Sensor: + - Overview: pyelq/sensor/sensor.md + - Beam: pyelq/sensor/beam.md + - Satellite: pyelq/sensor/satellite.md + - Source Map: pyelq/source_map.md + - Support Functions: pyelq/support_functions/spatio_temporal_interpolation.md diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..62140b9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,81 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "pyelq" +version = "1.0.0" +description = "Package for detection, localization and quantification code." +authors = ["Bas van de Kerkhof", "Matthew Jones", "David Randell"] +homepage = "https://github.com/sede-open/pyELQ" +repository = "https://github.com/sede-open/pyELQ" +documentation = "https://github.com/sede-open/pyELQ" +readme = "README.md" +license = "Apache-2.0" +keywords = ["gas dispersion", "emission", "detection", "localization", "quantification"] + +[tool.poetry.dependencies] +python = "~3.11" +pandas = ">=2.1.4" +numpy = ">=1.26.2" +azure-storage-blob = ">=12.19.0" +h5py = ">=3.10.0" +plotly = ">=5.18.0" +scipy = ">=1.11.4" +pymap3d = ">=3.0.1" +python-dotenv = ">=1.0.0" +geojson = ">=3.1.0" +geopandas = ">=0.14.1" +shapely = ">=2.0.2" +Fiona = ">=1.9.5" +scikit-learn = ">=1.3.2" +openmcmc = ">=1.0.0" +pyarrow = {version = ">=14.0.2", optional = true } + +[tool.poetry.extras] +parquet = ["pyarrow"] + +[tool.poetry.group.contributor] +optional = true + +[tool.poetry.group.contributor.dependencies] +black = ">=23.12.1" +isort = ">=5.13.2" +pydocstyle = ">=6.3.0" +pylint = ">=3.0.3" +pytest = ">=7.4.4" +pytest-cov = ">=4.1.0" +mkdocs-material = ">=9.5.7" +mkdocstrings-python = ">=1.8.0" + +[tool.pytest.ini_options] +addopts = "--cov=pyelq --cov-fail-under=90 --ignore-glob=*plot*" +testpaths = ["tests"] + +[tool.coverage.report] +omit = ["*plot*", "*/data_access/*", "*/plotting/*"] +exclude_lines = [".*def.*plot.*", "from pyelq.plotting.plot import Plot"] + +[tool.coverage.run] +relative_files = true +source = ["src/"] + +[tool.pylint] +fail-under=9.0 +max-line-length=120 +py-version=3.11 + +[tool.black] +line-length = 120 +target-version = ['py311'] + +[tool.pydocstyle] +convention = "google" +add-ignore = ["D105", "D107"] + +[tool.isort] +profile = "black" diff --git a/src/pyelq/__init__.py b/src/pyelq/__init__.py new file mode 100644 index 0000000..8c80adf --- /dev/null +++ b/src/pyelq/__init__.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Main pyELQ module.""" +__all__ = [ + "component", + "data_access", + "dispersion_model", + "plotting", + "sensor", + "support_functions", + "coordinate_system", + "dlm", + "gas_species", + "meteorology", + "model", + "preprocessing", + "source_map", +] diff --git a/src/pyelq/component/__init__.py b/src/pyelq/component/__init__.py new file mode 100644 index 0000000..0905c39 --- /dev/null +++ b/src/pyelq/component/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Components Module.""" +__all__ = ["background", "component", "error_model", "offset", "source_model"] diff --git a/src/pyelq/component/background.py b/src/pyelq/component/background.py new file mode 100644 index 0000000..0f6c2e3 --- /dev/null +++ b/src/pyelq/component/background.py @@ -0,0 +1,390 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Model components for background modelling.""" + +from abc import abstractmethod +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Union + +import numpy as np +import pandas as pd +from openmcmc import gmrf, parameter +from openmcmc.distribution.distribution import Gamma +from openmcmc.distribution.location_scale import Normal +from openmcmc.model import Model +from openmcmc.sampler.sampler import NormalGamma, NormalNormal +from scipy import sparse +from sklearn.neighbors import NearestNeighbors + +from pyelq.component.component import Component +from pyelq.coordinate_system import Coordinate +from pyelq.gas_species import GasSpecies +from pyelq.meteorology import MeteorologyGroup +from pyelq.sensor.beam import Beam +from pyelq.sensor.sensor import SensorGroup + + +@dataclass +class Background(Component): + """Superclass for background models. + + Attributes: + n_obs (int): total number of observations in the background model (across all sensors). + n_parameter (int): number of parameters in the background model + bg (np.ndarray): array of sampled background values, populated in self.from_mcmc() after the MCMC run is + completed. + precision_scalar (np.ndarray): array of sampled background precision values, populated in self.from_mcmc() after + the MCMC run is completed. Only populated if update_precision is True. + precision_matrix (Union[np.ndarray, sparse.csr_array]): un-scaled precision matrix for the background parameter + vector. + mean_bg (float): global mean background value. Should be populated from the value specified in the GasSpecies + object. + update_precision (bool): logical determining whether the background (scalar) precision parameter should be + updated as part of the MCMC. Defaults to False. + prior_precision_shape (float): shape parameter for the prior gamma distribution for the scalar precision + parameter(s). + prior_precision_rate (float): rate parameter for the prior gamma distribution for the scalar precision + parameter(s). + initial_precision (float): initial value for the scalar precision parameter. + basis_matrix (sparse.csr_array): [n_obs x n_time] matrix mapping the background model parameters on to the + observations. + + """ + + n_obs: int = field(init=False) + n_parameter: int = field(init=False) + bg: np.ndarray = field(init=False) + precision_scalar: np.ndarray = field(init=False) + precision_matrix: Union[np.ndarray, sparse.csc_matrix] = field(init=False) + mean_bg: Union[float, None] = None + update_precision: bool = False + prior_precision_shape: float = 1e-3 + prior_precision_rate: float = 1e-3 + initial_precision: float = 1.0 + basis_matrix: sparse.csr_array = field(init=False) + + @abstractmethod + def initialise(self, sensor_object: SensorGroup, meteorology: MeteorologyGroup, gas_species: GasSpecies): + """Take data inputs and extract relevant properties. + + Args: + sensor_object (SensorGroup): sensor data + meteorology (MeteorologyGroup): meteorology data + gas_species (GasSpecies): gas species information + + """ + + def make_model(self, model: list = None) -> list: + """Take model list and append new elements from current model component. + + Args: + model (list, optional): Current list of model elements. Defaults to None. + + Returns: + list: model output list. + + """ + bg_precision_predictor = parameter.ScaledMatrix(matrix="P_bg", scalar="lambda_bg") + model.append(Normal("bg", mean="mu_bg", precision=bg_precision_predictor)) + if self.update_precision: + model.append(Gamma("lambda_bg", shape="a_lam_bg", rate="b_lam_bg")) + return model + + def make_sampler(self, model: Model, sampler_list: list = None) -> list: + """Take sampler list and append new elements from current model component. + + Args: + model (Model): Full model list of distributions. + sampler_list (list, optional): Current list of samplers. Defaults to None. + + Returns: + list: sampler output list. + + """ + if sampler_list is None: + sampler_list = [] + sampler_list.append(NormalNormal("bg", model)) + if self.update_precision: + sampler_list.append(NormalGamma("lambda_bg", model)) + return sampler_list + + def make_state(self, state: dict = None) -> dict: + """Take state dictionary and append initial values from model component. + + Args: + state (dict, optional): current state vector. Defaults to None. + + Returns: + dict: current state vector with components added. + + """ + state["mu_bg"] = np.ones((self.n_parameter, 1)) * self.mean_bg + state["B_bg"] = self.basis_matrix + state["bg"] = np.ones((self.n_parameter, 1)) * self.mean_bg + state["P_bg"] = self.precision_matrix + state["lambda_bg"] = self.initial_precision + if self.update_precision: + state["a_lam_bg"] = self.prior_precision_shape + state["b_lam_bg"] = self.prior_precision_rate + return state + + def from_mcmc(self, store: dict): + """Extract results of mcmc from mcmc.store and attach to components. + + Args: + store (dict): mcmc result dictionary. + + """ + self.bg = store["bg"] + if self.update_precision: + self.precision_scalar = store["lambda_bg"] + + +@dataclass +class TemporalBackground(Background): + """Model which imposes only temporal correlation on the background parameters. + + Assumes that the prior mean concentration of the background at every location/time point is the global average + background concentration as defined in the input GasSpecies object. + + Generates the (un-scaled) prior background precision matrix using the function gmrf.precision_temporal: this + precision matrix imposes first-oder Markov structure for the temporal dependence. + + By default, the times used for the model definition are the set of unique times in the observation set. + + This background model only requires the initialise function, and does not require the implementation of any further + methods. + + Attributes: + time (Union[np.ndarray, pd.arrays.DatetimeArray]): vector of times used in defining the model. + + """ + + time: Union[np.ndarray, pd.arrays.DatetimeArray] = field(init=False) + + def initialise(self, sensor_object: SensorGroup, meteorology: MeteorologyGroup, gas_species: GasSpecies): + """Create temporal background model from sensor, meteorology and gas species inputs. + + Args: + sensor_object (SensorGroup): sensor data object. + meteorology (MeteorologyGroup): meteorology data object. + gas_species (GasSpecies): gas species data object. + + """ + self.n_obs = sensor_object.nof_observations + self.time, unique_inverse = np.unique(sensor_object.time, return_inverse=True) + self.time = pd.arrays.DatetimeArray(self.time) + self.n_parameter = len(self.time) + self.basis_matrix = sparse.csr_array((np.ones(self.n_obs), (np.array(range(self.n_obs)), unique_inverse))) + self.precision_matrix = gmrf.precision_temporal(time=self.time) + if self.mean_bg is None: + self.mean_bg = gas_species.global_background + + +@dataclass +class SpatioTemporalBackground(Background): + """Model which imposes both spatial and temporal correlation on the background parameters. + + Defines a grid in time, and assumes a correlated time-series per sensor using the defined time grid. + + The background parameter is an [n_location * n_time x 1] (if self.spatial_dependence is True) or an [n_time x 1] + vector (if self.spatial_dependence is False). In the spatio-temporal case, the background vector is assumed to + unwrap over space and time as follows: + bg = [b_1(t_1), b_2(t_1),..., b_nlct(t_1),...,b_1(t_k),..., b_nlct(t_k),...].T + where nlct is the number of sensor locations. + This unwrapping mechanism is chosen as it greatly speeds up the sparse matrix operations in the solver (vs. the + alternative). + + self.basis_matrix is set up to map the elements of the full background vector onto the observations, on the basis + of spatial location and nearest time knot. + + The temporal background correlation is computed using gmrf.precision_temporal, and the spatial correlation is + computed using a squared exponential correlation function, parametrized by self.spatial_correlation_param (spatial + correlation, measured in metres). The full precision matrix is simply a Kronecker product between the two + component precision matrices. + + Attributes: + n_time (int): number of time knots for which the model is defined. Note that this does not need to be the same + as the number of concentration observations in the analysis. + n_location (int): number of spatial knots in the model. + time (pd.arrays.DatetimeArray): vector of times used in defining the model. + spatial_dependence (bool): flag indicating whether the background parameters should be spatially correlated. If + True, the model assumes a separate background time-series per sensor location, and assumes these + time-series to be spatially correlated. If False (default), the background parameters are assumed to be + common between sensors (only temporally correlated). + spatial_correlation_param (float): correlation length parameter, determining the degree of spatial correlation + imposed on the background time-series. Units are metres. Assumes equal correlation in all spatial + directions. Defaults to 1.0. + location (np.ndarray): [n_location x 3] array of sensor locations, used for calculating the spatial correlation + between the sensor background values. If self.spatial_dependence is False, this attribute is simply set to + be the location of the first sensor in the sensor object. + temporal_precision_matrix (Union[np.ndarray, sparse.csc_matrix]): temporal component of the precision matrix. + The full model precision matrix is the Kronecker product of this matrix with self.spatial_precision_matrix. + spatial_precision_matrix (np.ndarray): spatial component of the precision matrix. The full model precision + matrix is the Kronecker product of this matrix with the self.temporal_precision_matrix. Simply set to 1 if + self.spatial_dependence is False. + precision_time_0 (float): precision relating to the first time stamp in the model. Defaults to 0.01. + + """ + + n_time: Union[int, None] = None + n_location: int = field(init=False) + time: pd.arrays.DatetimeArray = field(init=False) + spatial_dependence: bool = False + spatial_correlation_param: float = field(init=False, default=1.0) + location: Coordinate = field(init=False) + temporal_precision_matrix: Union[np.ndarray, sparse.csc_matrix] = field(init=False) + spatial_precision_matrix: np.ndarray = field(init=False) + precision_time_0: float = field(init=False, default=0.01) + + def initialise(self, sensor_object: SensorGroup, meteorology: MeteorologyGroup, gas_species: GasSpecies): + """Take data inputs and extract relevant properties. + + Args: + sensor_object (SensorGroup): sensor data + meteorology (MeteorologyGroup): meteorology data wind data + gas_species (GasSpecies): gas species information + + """ + self.make_temporal_knots(sensor_object) + self.make_spatial_knots(sensor_object) + self.n_parameter = self.n_time * self.n_location + self.n_obs = sensor_object.nof_observations + + self.make_precision_matrix() + self.make_parameter_mapping(sensor_object) + + if self.mean_bg is None: + self.mean_bg = gas_species.global_background + + def make_parameter_mapping(self, sensor_object: SensorGroup): + """Create the mapping of parameters onto observations, through creation of the associated basis matrix. + + The background vector unwraps first over the spatial (sensor) location dimension, then over the temporal + dimension. For more detail, see the main class docstring. + + The data vector in the solver state is assumed to consist of the individual sensor data vectors stacked + consecutively. + + Args: + sensor_object (SensorGroup): group of sensor objects. + + """ + nn_object = NearestNeighbors(n_neighbors=1, algorithm="kd_tree").fit(self.time.to_numpy().reshape(-1, 1)) + for k, sensor in enumerate(sensor_object.values()): + _, time_index = nn_object.kneighbors(sensor.time.to_numpy().reshape(-1, 1)) + basis_matrix = sparse.csr_array( + (np.ones(sensor.nof_observations), (np.array(range(sensor.nof_observations)), time_index.flatten())), + shape=(sensor.nof_observations, self.n_time), + ) + if self.spatial_dependence: + basis_matrix = sparse.kron(basis_matrix, np.eye(N=self.n_location, M=1, k=-k).T) + + if k == 0: + self.basis_matrix = basis_matrix + else: + self.basis_matrix = sparse.vstack([self.basis_matrix, basis_matrix]) + + def make_temporal_knots(self, sensor_object: SensorGroup): + """Create the temporal grid for the model. + + If self.n_time is not specified, then the model will use the unique set of times from the sensor data. + + If self.n_time is specified, then the model will define a time grid with self.n_time elements. + + Args: + sensor_object (SensorGroup): group of sensor objects. + + """ + if self.n_time is None: + self.time = pd.arrays.DatetimeArray(np.unique(sensor_object.time)) + self.n_time = len(self.time) + else: + self.time = pd.arrays.DatetimeArray( + pd.date_range(start=np.min(sensor_object.time), end=np.max(sensor_object.time), periods=self.n_time) + ) + + def make_spatial_knots(self, sensor_object: SensorGroup): + """Create the spatial grid for the model. + + If self.spatial_dependence is False, the code assumes that only a single (arbitrary) location is used, thereby + eliminating any spatial dependence. + + If self.spatial_dependence is True, a separate but correlated time-series of background parameters is assumed + for each sensor location. + + Args: + sensor_object (SensorGroup): group of sensor objects. + + """ + if self.spatial_dependence: + self.n_location = sensor_object.nof_sensors + self.get_locations_from_sensors(sensor_object) + else: + self.n_location = 1 + self.location = sensor_object[list(sensor_object.keys())[0]].location + + def make_precision_matrix(self): + """Create the full precision matrix for the background parameters. + + Defined as the Kronecker product of the temporal precision matrix and the spatial precision matrix. + + """ + self.temporal_precision_matrix = gmrf.precision_temporal(time=self.time) + lam = self.temporal_precision_matrix[0, 0] + self.temporal_precision_matrix[0, 0] = lam * (2.0 - lam / (self.precision_time_0 + lam)) + + if self.spatial_dependence: + self.make_spatial_precision_matrix() + self.precision_matrix = sparse.kron(self.temporal_precision_matrix, self.spatial_precision_matrix) + else: + self.precision_matrix = self.temporal_precision_matrix + if (self.n_parameter == 1) and sparse.issparse(self.precision_matrix): + self.precision_matrix = self.precision_matrix.toarray() + + def make_spatial_precision_matrix(self): + """Create the spatial precision matrix for the model. + + The spatial precision matrix is simply calculated as the inverse of a squared exponential covariance matrix + calculated using the sensor locations. + + """ + location_array = self.location.to_array() + spatial_covariance_matrix = np.exp( + -(1 / (2 * np.power(self.spatial_correlation_param, 2))) + * ( + np.power(location_array[:, [0]] - location_array[:, [0]].T, 2) + + np.power(location_array[:, [1]] - location_array[:, [1]].T, 2) + + np.power(location_array[:, [2]] - location_array[:, [2]].T, 2) + ) + ) + self.spatial_precision_matrix = np.linalg.inv( + spatial_covariance_matrix + (1e-6) * np.eye(spatial_covariance_matrix.shape[0]) + ) + + def get_locations_from_sensors(self, sensor_object: SensorGroup): + """Extract the location information from the sensor object. + + Attaches a Coordinate.ENU object as the self.location attribute, with all the sensor locations stored on the + same object. + + Args: + sensor_object (SensorGroup): group of sensor objects. + + """ + self.location = deepcopy(sensor_object[list(sensor_object.keys())[0]].location.to_enu()) + self.location.east = np.full(shape=(self.n_location,), fill_value=np.nan) + self.location.north = np.full(shape=(self.n_location,), fill_value=np.nan) + self.location.up = np.full(shape=(self.n_location,), fill_value=np.nan) + for k, sensor in enumerate(sensor_object.values()): + if isinstance(sensor, Beam): + self.location.east[k] = np.mean(sensor.location.to_enu().east, axis=0) + self.location.north[k] = np.mean(sensor.location.to_enu().north, axis=0) + self.location.up[k] = np.mean(sensor.location.to_enu().up, axis=0) + else: + self.location.east[k] = sensor.location.to_enu().east + self.location.north[k] = sensor.location.to_enu().north + self.location.up[k] = sensor.location.to_enu().up diff --git a/src/pyelq/component/component.py b/src/pyelq/component/component.py new file mode 100644 index 0000000..c8e2e5d --- /dev/null +++ b/src/pyelq/component/component.py @@ -0,0 +1,79 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Superclass for model components.""" +from abc import ABC, abstractmethod +from dataclasses import dataclass + +from openmcmc.model import Model + +from pyelq.gas_species import GasSpecies +from pyelq.meteorology import MeteorologyGroup +from pyelq.sensor.sensor import SensorGroup + + +@dataclass +class Component(ABC): + """Abstract class defining methods and rules for model elements. + + The bulk of attributes will be defined in the subclasses inheriting from this superclass. + + """ + + @abstractmethod + def initialise(self, sensor_object: SensorGroup, meteorology: MeteorologyGroup, gas_species: GasSpecies): + """Take data inputs and extract relevant properties. + + Args: + sensor_object (SensorGroup): sensor data + meteorology (MeteorologyGroup): meteorology data + gas_species (GasSpecies): gas species information + + """ + + @abstractmethod + def make_model(self, model: list) -> list: + """Take model list and append new elements from current model component. + + Args: + model (list, optional): Current list of model elements. Defaults to []. + + Returns: + list: model output list. + + """ + + @abstractmethod + def make_sampler(self, model: Model, sampler_list: list) -> list: + """Take sampler list and append new elements from current model component. + + Args: + model (Model): Full model list of distributions. + sampler_list (list, optional): Current list of samplers. Defaults to []. + + Returns: + list: sampler output list. + + """ + + @abstractmethod + def make_state(self, state: dict) -> dict: + """Take state dictionary and append initial values from model component. + + Args: + state (dict, optional): current state vector. Defaults to {}. + + Returns: + dict: current state vector with components added. + + """ + + @abstractmethod + def from_mcmc(self, store: dict): + """Extract results of mcmc from mcmc.store and attach to components. + + Args: + store (dict): mcmc result dictionary. + + """ diff --git a/src/pyelq/component/error_model.py b/src/pyelq/component/error_model.py new file mode 100644 index 0000000..37030a8 --- /dev/null +++ b/src/pyelq/component/error_model.py @@ -0,0 +1,327 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Error model module.""" +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Union + +import numpy as np +from openmcmc import parameter +from openmcmc.distribution.distribution import Gamma +from openmcmc.model import Model +from openmcmc.sampler.sampler import NormalGamma + +from pyelq.component.component import Component +from pyelq.gas_species import GasSpecies +from pyelq.meteorology import MeteorologyGroup +from pyelq.sensor.sensor import Sensor, SensorGroup + +if TYPE_CHECKING: + from pyelq.plotting.plot import Plot + + +@dataclass +class ErrorModel(Component): + """Measurement precision model component for the model. + + Attributes: + n_sensor (int): number of sensors in the sensor object used for analysis. + precision_index (np.ndarray): index mapping precision parameters onto observations. Will be set up differently + for different model types. + precision_parameter (parameter.Parameter): parameter object which constructs the full measurement error + precision matrix from the components stored in state. Will be passed to the distribution for the observed + when the full model is constructed. + prior_precision_shape (Union[np.ndarray, float]): prior shape parameters for the precision model. Set up + differently per model type. + prior_precision_rate (Union[np.ndarray, float]): prior rate parameters for the precision model. Set up + differently per model type. + initial_precision (Union[np.ndarray, float]): initial value for the precision to be passed to the analysis + routine. Set up differently per model type. + precision (np.ndarray): array of sampled measurement error precision values, populated in self.from_mcmc() after + the MCMC run is completed. + + """ + + n_sensor: int = field(init=False) + precision_index: np.ndarray = field(init=False) + precision_parameter: parameter.Parameter = field(init=False) + prior_precision_shape: Union[np.ndarray, float] = field(init=False) + prior_precision_rate: Union[np.ndarray, float] = field(init=False) + initial_precision: Union[np.ndarray, float] = field(init=False) + precision: np.ndarray = field(init=False) + + def initialise( + self, sensor_object: SensorGroup, meteorology: MeteorologyGroup = None, gas_species: GasSpecies = None + ): + """Take data inputs and extract relevant properties. + + Args: + sensor_object (SensorGroup): sensor data. + meteorology (MeteorologyGroup): meteorology data. Defaults to None. + gas_species (GasSpecies): gas species information. Defaults to None. + + """ + self.n_sensor = sensor_object.nof_sensors + + def make_model(self, model: list = None) -> list: + """Take model list and append new elements from current model component. + + Args: + model (list, optional): Current list of model elements. Defaults to None. + + Returns: + list: model output list. + + """ + if model is None: + model = [] + model.append(Gamma("tau", shape="a_tau", rate="b_tau")) + return model + + def make_sampler(self, model: Model, sampler_list: list = None) -> list: + """Take sampler list and append new elements from current model component. + + Args: + model (Model): Full model list of distributions. + sampler_list (list, optional): Current list of samplers. Defaults to None. + + Returns: + list: sampler output list. + + """ + if sampler_list is None: + sampler_list = [] + sampler_list.append(NormalGamma("tau", model)) + return sampler_list + + def make_state(self, state: dict = None) -> dict: + """Take state dictionary and append initial values from model component. + + Args: + state (dict, optional): current state vector. Defaults to None. + + Returns: + dict: current state vector with components added. + + """ + if state is None: + state = {} + state["a_tau"] = self.prior_precision_shape.flatten() + state["b_tau"] = self.prior_precision_rate.flatten() + state["precision_index"] = self.precision_index + state["tau"] = self.initial_precision.flatten() + return state + + def from_mcmc(self, store: dict): + """Extract results of mcmc from mcmc.store and attach to components. + + Args: + store (dict): mcmc result dictionary. + + """ + self.precision = store["tau"] + + +@dataclass +class BySensor(ErrorModel): + """Version of measurement precision where each sensor object has a different precision. + + Attributes: + prior_precision_shape (Union[np.ndarray, float]): prior shape parameters for the precision model, can be + specified either as a float or as a (nof_sensors, ) np.ndarray: a float specification will result in + the same parameter value for each sensor. Defaults to 1e-3. + prior_precision_rate (Union[np.ndarray, float]): prior rate parameters for the precision model, can be + specified either as a float or as a (nof_sensors, ) np.ndarray: a float specification will result in + the same parameter value for each sensor. Defaults to 1e-3. + initial_precision (Union[np.ndarray, float]): initial value for the precision parameters, can be specified + either as a float or as a (nof_sensors, ) np.ndarray: a float specification will result in the same + parameter value for each sensor. Defaults to 1. + precision_index (np.ndarray): index mapping precision parameters onto observations. Parameters 1:n_sensor are + mapped as the measurement error precisions of the corresponding sensors. + precision_parameter (Parameter.MixtureParameterMatrix): parameter specification for this model, maps the + current value of the parameter in the state dict onto the concentration data precisions. + + """ + + prior_precision_shape: Union[np.ndarray, float] = 1e-3 + prior_precision_rate: Union[np.ndarray, float] = 1e-3 + initial_precision: Union[np.ndarray, float] = 1.0 + + def initialise( + self, sensor_object: SensorGroup, meteorology: MeteorologyGroup = None, gas_species: GasSpecies = None + ): + """Set up the error model using sensor properties. + + Args: + sensor_object (SensorGroup): sensor data. + meteorology (MeteorologyGroup): meteorology data. Defaults to None. + gas_species (GasSpecies): gas species information. Defaults to None. + + """ + super().initialise(sensor_object=sensor_object, meteorology=meteorology, gas_species=gas_species) + self.prior_precision_shape = self.prior_precision_shape * np.ones((self.n_sensor,)) + self.prior_precision_rate = self.prior_precision_rate * np.ones((self.n_sensor,)) + self.initial_precision = self.initial_precision * np.ones((self.n_sensor,)) + self.precision_index = sensor_object.sensor_index + self.precision_parameter = parameter.MixtureParameterMatrix(param="tau", allocation="precision_index") + + def plot_iterations(self, plot: "Plot", sensor_object: Union[SensorGroup, Sensor], burn_in_value: int) -> "Plot": + """Plots the error model values for every sensor with respect to the MCMC iterations. + + Args: + sensor_object (Union[SensorGroup, Sensor]): Sensor object associated with the error_model + burn_in_value (int): Burn in value to show in plot. + plot (Plot): Plot object to which this figure will be added in the figure dictionary + + Returns: + plot (Plot): Plot object to which this figure is added in the figure dictionary with + key 'error_model_iterations' + + """ + plot.plot_trace_per_sensor( + object_to_plot=self, sensor_object=sensor_object, plot_type="line", burn_in=burn_in_value + ) + + return plot + + def plot_distributions(self, plot: "Plot", sensor_object: Union[SensorGroup, Sensor], burn_in_value: int) -> "Plot": + """Plots the distribution of the error model values after the burn in for every sensor. + + Args: + sensor_object (Union[SensorGroup, Sensor]): Sensor object associated with the error_model + burn_in_value (int): Burn in value to show in plot. + plot (Plot): Plot object to which this figure will be added in the figure dictionary + + Returns: + plot (Plot): Plot object to which this figure is added in the figure dictionary with + key 'error_model_distributions' + + """ + plot.plot_trace_per_sensor( + object_to_plot=self, sensor_object=sensor_object, plot_type="box", burn_in=burn_in_value + ) + + return plot + + +@dataclass +class ByRelease(ErrorModel): + """ByRelease error model, special case of the measurement precision model. + + Version of the measurement precision model where each sensor object has a different precision, and there are + different precisions for periods inside and outside controlled release periods. For all parameters: the first + element corresponds to the case where the sources are OFF; the second element corresponds to the case where the + sources are ON. + + Attributes: + prior_precision_shape (np.ndarray): prior shape parameters for the precision model, can be + specified either as a (2, 1) np.ndarray or as a (2, nof_sensors) np.ndarray: the former specification + will result in the same prior specification for the off/on precisions for each sensor. Defaults to + np.array([1e-3, 1e-3]). + prior_precision_rate (np.ndarray): prior rate parameters for the precision model, can be + specified either as a (2, 1) np.ndarray or as a (2, nof_sensors) np.ndarray: the former specification + will result in the same prior specification for the off/on precisions for each sensor. Defaults to + np.array([1e-3, 1e-3]). + initial_precision (np.ndarray): initial value for the precision parameters, can be + specified either as a (2, 1) np.ndarray or as a (2, nof_sensors) np.ndarray: the former specification + will result in the same prior specification for the off/on precisions for each sensor. Defaults to + np.array([1.0, 1.0]). + precision_index (np.ndarray): index mapping precision parameters onto observations. Parameters 1:n_sensor are + mapped onto each sensor for the periods where the sources are OFF; parameters (n_sensor + 1):(2 * n_sensor) + are mapped onto each sensor for the periods where the sources are ON. + precision_parameter (Parameter.MixtureParameterMatrix): parameter specification for this model, maps the + current value of the parameter in the state dict onto the concentration data precisions. + + """ + + prior_precision_shape: np.ndarray = field(default_factory=lambda: np.array([1e-3, 1e-3], ndmin=2).T) + prior_precision_rate: np.ndarray = field(default_factory=lambda: np.array([1e-3, 1e-3], ndmin=2).T) + initial_precision: np.ndarray = field(default_factory=lambda: np.array([1.0, 1.0], ndmin=2).T) + + def initialise( + self, sensor_object: SensorGroup, meteorology: MeteorologyGroup = None, gas_species: GasSpecies = None + ): + """Set up the error model using sensor properties. + + Args: + sensor_object (SensorGroup): sensor data. + meteorology (MeteorologyGroup): meteorology data. Defaults to None. + gas_species (GasSpecies): gas species information. Defaults to None. + + """ + super().initialise(sensor_object=sensor_object, meteorology=meteorology, gas_species=gas_species) + self.prior_precision_shape = self.prior_precision_shape * np.ones((2, self.n_sensor)) + self.prior_precision_rate = self.prior_precision_rate * np.ones((2, self.n_sensor)) + self.initial_precision = self.initial_precision * np.ones((2, self.n_sensor)) + self.precision_index = sensor_object.sensor_index + sensor_object.source_on * self.n_sensor + self.precision_parameter = parameter.MixtureParameterMatrix(param="tau", allocation="precision_index") + + def plot_iterations(self, plot: "Plot", sensor_object: Union[SensorGroup, Sensor], burn_in_value: int) -> "Plot": + """Plot the estimated error model parameters against iterations of the MCMC chain. + + Works by simply creating a separate plot for each of the two categories of precision parameter (when the + sources are on/off). Creates a BySensor() object for each of the off/on precision cases, and then makes a + call to its plot function. + + Args: + sensor_object (Union[SensorGroup, Sensor]): Sensor object associated with the error_model + burn_in_value (int): Burn in value to show in plot. + plot (Plot): Plot object to which this figure will be added in the figure dictionary + + Returns: + plot (Plot): Plot object to which this figure is added in the figure dictionary with + key 'error_model_iterations' + + """ + figure_keys = ["error_model_off_iterations", "error_model_on_iterations"] + figure_titles = [ + "Estimated error parameter values: sources off", + "Estimated error parameter values: sources on", + ] + precision_arrays = [ + self.precision[: sensor_object.nof_sensors, :], + self.precision[sensor_object.nof_sensors :, :], + ] + for key, title, array in zip(figure_keys, figure_titles, precision_arrays): + error_model = BySensor() + error_model.precision = array + plot = error_model.plot_iterations(plot, sensor_object, burn_in_value) + plot.figure_dict[key] = plot.figure_dict.pop("error_model_iterations") + plot.figure_dict[key].update_layout(title=title) + return plot + + def plot_distributions(self, plot: "Plot", sensor_object: Union[SensorGroup, Sensor], burn_in_value: int) -> "Plot": + """Plot the estimated distributions of error model parameters. + + Works by simply creating a separate plot for each of the two categories of precision parameter (when the + sources are off/on). Creates a BySensor() object for each of the off/on precision cases, and then makes a + call to its plot function. + + Args: + sensor_object (Union[SensorGroup, Sensor]): Sensor object associated with the error_model + burn_in_value (int): Burn in value to show in plot. + plot (Plot): Plot object to which this figure will be added in the figure dictionary + + Returns: + plot (Plot): Plot object to which this figure is added in the figure dictionary with + key 'error_model_distributions' + + """ + figure_keys = ["error_model_off_distributions", "error_model_on_distributions"] + figure_titles = [ + "Estimated error parameter distribution: sources off", + "Estimated error parameter distribution: sources on", + ] + precision_arrays = [ + self.precision[: sensor_object.nof_sensors, :], + self.precision[sensor_object.nof_sensors :, :], + ] + for key, title, array in zip(figure_keys, figure_titles, precision_arrays): + error_model = BySensor() + error_model.precision = array + plot = error_model.plot_distributions(plot, sensor_object, burn_in_value) + plot.figure_dict[key] = plot.figure_dict.pop("error_model_distributions") + plot.figure_dict[key].update_layout(title=title) + return plot diff --git a/src/pyelq/component/offset.py b/src/pyelq/component/offset.py new file mode 100644 index 0000000..dc9a3e5 --- /dev/null +++ b/src/pyelq/component/offset.py @@ -0,0 +1,183 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Offset module.""" +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Union + +import numpy as np +from openmcmc import parameter +from openmcmc.distribution.distribution import Gamma +from openmcmc.distribution.location_scale import Normal +from openmcmc.model import Model +from openmcmc.sampler.sampler import NormalGamma, NormalNormal +from scipy import sparse + +from pyelq.component.component import Component +from pyelq.gas_species import GasSpecies +from pyelq.meteorology import Meteorology +from pyelq.sensor.sensor import Sensor, SensorGroup + +if TYPE_CHECKING: + from pyelq.plotting.plot import Plot + + +@dataclass +class PerSensor(Component): + """Offset implementation which assumes an additive offset between sensors. + + The offset is which is constant in space and time and accounts for calibration differences between sensors. + To maintain parameter identifiability, the offset for the first sensor (with index 0) is assumed to be 0, and other + sensor offsets are defined relative to this beam. + + Attributes: + n_sensor (int): number of sensors in the sensor object used for analysis. + offset (np.ndarray): array of sampled offset values, populated in self.from_mcmc() after the MCMC run is + completed. + precision_scalar (np.ndarray): array of sampled offset precision values, populated in self.from_mcmc() after + the MCMC run is completed. Only populated if update_precision is True. + indicator_basis (sparse.csc_matrix): [nof_observations x (nof_sensors - 1)] sparse matrix which assigns the + offset parameters to the correct observations. + update_precision (bool): logical indicating whether the offset prior precision parameter should be updated as + part of the analysis. + mean_offset (float): prior mean parameter for the offsets, assumed to be the same for each beam. Default is 0. + prior_precision_shape (float): shape parameter for the prior gamma distribution for the scalar precision + parameter. Default is 1e-3. + prior_precision_rate (float): rate parameter for the prior gamma distribution for the scalar precision + parameter(s). Default is 1e-3. + initial_precision (float): initial value for the scalar precision parameter. Default is 1.0. + + """ + + n_sensor: int = field(init=False) + offset: np.ndarray = field(init=False) + precision_scalar: np.ndarray = field(init=False) + indicator_basis: sparse.csc_matrix = field(init=False) + update_precision: bool = False + mean_offset: float = 0.0 + prior_precision_shape: float = 1e-3 + prior_precision_rate: float = 1e-3 + initial_precision: float = 1.0 + + def initialise(self, sensor_object: SensorGroup, meteorology: Meteorology, gas_species: GasSpecies): + """Take data inputs and extract relevant properties. + + Args: + sensor_object (SensorGroup): sensor data + meteorology (MeteorologyGroup): meteorology data wind data + gas_species (GasSpecies): gas species information + + """ + self.n_sensor = len(sensor_object) + self.indicator_basis = sparse.csc_matrix( + np.equal(sensor_object.sensor_index[:, np.newaxis], np.array(range(1, self.n_sensor))) + ) + + def make_model(self, model: list = None) -> list: + """Take model list and append new elements from current model component. + + Args: + model (list, optional): Current list of model elements. Defaults to []. + + Returns: + list: model output list. + + """ + if model is None: + model = [] + off_precision_predictor = parameter.ScaledMatrix(matrix="P_d", scalar="lambda_d") + model.append(Normal("d", mean="mu_d", precision=off_precision_predictor)) + if self.update_precision: + model.append(Gamma("lambda_d", shape="a_lam_d", rate="b_lam_d")) + return model + + def make_sampler(self, model: Model, sampler_list: list = None) -> list: + """Take sampler list and append new elements from current model component. + + Args: + model (Model): Full model list of distributions. + sampler_list (list, optional): Current list of samplers. Defaults to []. + + Returns: + list: sampler output list. + + """ + if sampler_list is None: + sampler_list = [] + sampler_list.append(NormalNormal("d", model)) + if self.update_precision: + sampler_list.append(NormalGamma("lambda_d", model)) + return sampler_list + + def make_state(self, state: dict = None) -> dict: + """Take state dictionary and append initial values from model component. + + Args: + state (dict, optional): current state vector. Defaults to {}. + + Returns: + dict: current state vector with components added. + + """ + if state is None: + state = {} + state["mu_d"] = np.ones((self.n_sensor - 1, 1)) * self.mean_offset + state["d"] = np.zeros((self.n_sensor - 1, 1)) + state["B_d"] = self.indicator_basis + state["P_d"] = sparse.eye(self.n_sensor - 1, format="csc") + state["lambda_d"] = self.initial_precision + if self.update_precision: + state["a_lam_d"] = self.prior_precision_shape + state["b_lam_d"] = self.prior_precision_rate + return state + + def from_mcmc(self, store: dict): + """Extract results of mcmc from mcmc.store and attach to components. + + Args: + store (dict): mcmc result dictionary. + + """ + self.offset = store["d"] + if self.update_precision: + self.precision_scalar = store["lambda_d"] + + def plot_iterations(self, plot: "Plot", sensor_object: Union[SensorGroup, Sensor], burn_in_value: int) -> "Plot": + """Plots the offset values for every sensor with respect to the MCMC iterations. + + Args: + sensor_object (Union[SensorGroup, Sensor]): Sensor object associated with the offset_model + burn_in_value (int): Burn in value to show in plot. + plot (Plot): Plot object to which this figure will be added in the figure dictionary + + Returns: + plot (Plot): Plot object to which this figure is added in the figure dictionary with + key 'offset_iterations' + + """ + plot.plot_trace_per_sensor( + object_to_plot=self, sensor_object=sensor_object, plot_type="line", burn_in=burn_in_value + ) + + return plot + + def plot_distributions(self, plot: "Plot", sensor_object: Union[SensorGroup, Sensor], burn_in_value: int) -> "Plot": + """Plots the distribution of the offset values after the burn in for every sensor. + + Args: + sensor_object (Union[SensorGroup, Sensor]): Sensor object associated with the offset_model + burn_in_value (int): Burn in value to use for plot. + plot (Plot): Plot object to which this figure will be added in the figure dictionary + + Returns: + plot (Plot): Plot object to which this figure is added in the figure dictionary with + key 'offset_distributions' + + """ + plot.plot_trace_per_sensor( + object_to_plot=self, sensor_object=sensor_object, plot_type="box", burn_in=burn_in_value + ) + + return plot diff --git a/src/pyelq/component/source_model.py b/src/pyelq/component/source_model.py new file mode 100644 index 0000000..55a12c7 --- /dev/null +++ b/src/pyelq/component/source_model.py @@ -0,0 +1,862 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Component Class and subclasses for source model. + +A SourceModel instance inherits from 3 super-classes: + - Component: this is the general superclass for ELQModel components, which prototypes generic methods. + - A type of SourceGrouping: this class type implements an allocation of sources to different categories (e.g. slab + or spike), and sets up a sampler for estimating the classification of each source within the source map. + Inheriting from the NullGrouping class ensures that the allocation of all sources is fixed during the inversion, + and is not updated. + - A type of SourceDistribution: this class type implements a particular type of response distribution (mostly + Normal, but also allows for cases where we have e.g. exp(log_s) or a non-Gaussian prior). + +""" + +from abc import abstractmethod +from copy import deepcopy +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Tuple, Union + +import numpy as np +from openmcmc import parameter +from openmcmc.distribution.distribution import Categorical, Gamma, Poisson, Uniform +from openmcmc.distribution.location_scale import Normal as mcmcNormal +from openmcmc.model import Model +from openmcmc.sampler.metropolis_hastings import RandomWalkLoop +from openmcmc.sampler.reversible_jump import ReversibleJump +from openmcmc.sampler.sampler import MixtureAllocation, NormalGamma, NormalNormal + +from pyelq.component.component import Component +from pyelq.coordinate_system import ENU +from pyelq.dispersion_model.gaussian_plume import GaussianPlume +from pyelq.gas_species import GasSpecies +from pyelq.meteorology import Meteorology +from pyelq.sensor.sensor import SensorGroup +from pyelq.source_map import SourceMap + +if TYPE_CHECKING: + from pyelq.plotting.plot import Plot + + +@dataclass +class SourceGrouping: + """Superclass for source grouping approach. + + Source grouping method determines the group allocation of each source in the model, e.g: slab and spike + distribution makes an on/off allocation for each source. + + Attributes: + nof_sources (int): number of sources in the model. + emission_rate_mean (Union[float, np.ndarray]): prior mean parameter for the emission rate distribution. + _source_key (str): label for the source parameter to be used in the distributions, samplers, MCMC state etc. + + """ + + nof_sources: int = field(init=False) + emission_rate_mean: Union[float, np.ndarray] = field(init=False) + _source_key: str = field(init=False, default="s") + + @abstractmethod + def make_allocation_model(self, model: list) -> list: + """Initialise the source allocation part of the model, and the parameters of the source response distribution. + + Args: + model (list): overall model, consisting of list of distributions. + + Returns: + list: overall model list, updated with allocation distribution. + + """ + + @abstractmethod + def make_allocation_sampler(self, model: Model, sampler_list: list) -> list: + """Initialise the allocation part of the sampler. + + Args: + model (Model): overall model, consisting of list of distributions. + sampler_list (list): list of samplers for individual parameters. + + Returns: + list: sampler_list updated with sampler for the source allocation. + + """ + + @abstractmethod + def make_allocation_state(self, state: dict) -> dict: + """Initialise the allocation part of the state. + + Args: + state (dict): dictionary containing current state information. + + Returns: + dict: state updated with parameters related to the source grouping. + + """ + + @abstractmethod + def from_mcmc_group(self, store: dict): + """Extract posterior allocation samples from the MCMC sampler, attach them to the class. + + Args: + store (dict): dictionary containing samples from the MCMC. + + """ + + +@dataclass +class NullGrouping(SourceGrouping): + """Null grouping: the grouping of the sources will not change during the course of the inversion. + + Note that this is intended to support two distinct cases: + 1) The case where the source map is fixed, and a given prior mean and prior precision value are assigned to + each source (can be a common value for all sources, or can be a distinct allocation to each element of the + source map). + 2) The case where the dimensionality of the source map is changing during the inversion, and a common prior + mean and precision term are used for all sources. + + """ + + def make_allocation_model(self, model: list) -> list: + """Initialise the source allocation part of the model. + + In the NullGrouping case, the source allocation is fixed throughout, so this function does nothing (simply + returns the existing model un-modified). + + Args: + model (list): model as constructed so far, consisting of list of distributions. + + Returns: + list: overall model list, updated with allocation distribution. + + """ + return model + + def make_allocation_sampler(self, model: Model, sampler_list: list) -> list: + """Initialise the allocation part of the sampler. + + In the NullGrouping case, the source allocation is fixed throughout, so this function does nothing (simply + returns the existing sampler list un-modified). + + Args: + model (Model): overall model set for the problem. + sampler_list (list): list of samplers for individual parameters. + + Returns: + list: sampler_list updated with sampler for the source allocation. + + """ + return sampler_list + + def make_allocation_state(self, state: dict) -> dict: + """Initialise the allocation part of the state. + + The prior mean parameter and the fixed source allocation are added to the state. + + Args: + state (dict): dictionary containing current state information. + + Returns: + dict: state updated with parameters related to the source grouping. + + """ + state["mu_s"] = np.array(self.emission_rate_mean, ndmin=1) + state["alloc_s"] = np.zeros((self.nof_sources, 1), dtype="int") + return state + + def from_mcmc_group(self, store: dict): + """Extract posterior allocation samples from the MCMC sampler, attach them to the class. + + We have not implemented anything here as there is nothing to fetch from the MCMC solution here for the + NullGrouping Class. + + Args: + store (dict): dictionary containing samples from the MCMC. + + """ + + +@dataclass +class SlabAndSpike(SourceGrouping): + """Slab and spike source model, special case for the source grouping. + + Slab and spike: the prior for the emission rates is a two-component mixture, and the allocation is to be + estimated as part of the inversion. + + Attributes: + slab_probability (float): prior probability of allocation to the slab component. Defaults to 0.05. + allocation (np.ndarray): set of allocation samples, with shape=(n_sources, n_iterations). Attached to + the class by self.from_mcmc_group(). + + """ + + slab_probability: float = 0.05 + allocation: np.ndarray = field(init=False) + + def make_allocation_model(self, model: list) -> list: + """Initialise the source allocation part of the model. + + Args: + model (list): model as constructed so far, consisting of list of distributions. + + Returns: + list: overall model list, updated with allocation distribution. + + """ + model.append(Categorical("alloc_s", prob="s_prob")) + return model + + def make_allocation_sampler(self, model: Model, sampler_list: list) -> list: + """Initialise the allocation part of the sampler. + + Args: + model (Model): overall model set for the problem. + sampler_list (list): list of samplers for individual parameters. + + Returns: + list: sampler_list updated with sampler for the source allocation. + + """ + sampler_list.append(MixtureAllocation(param="alloc_s", model=model, response_param=self._source_key)) + return sampler_list + + def make_allocation_state(self, state: dict) -> dict: + """Initialise the allocation part of the state. + + Args: + state (dict): dictionary containing current state information. + + Returns: + dict: state updated with parameters related to the source grouping. + + """ + state["mu_s"] = np.array(self.emission_rate_mean, ndmin=1) + state["s_prob"] = np.tile(np.array([self.slab_probability, 1 - self.slab_probability]), (self.nof_sources, 1)) + state["alloc_s"] = np.ones((self.nof_sources, 1), dtype="int") + return state + + def from_mcmc_group(self, store: dict): + """Extract posterior allocation samples from the MCMC sampler, attach them to the class. + + Args: + store (dict): dictionary containing samples from the MCMC. + + """ + self.allocation = store["alloc_s"] + + +@dataclass +class SourceDistribution: + """Superclass for source emission rate distribution. + + Source distribution determines the type of prior to be used for the source emission rates, and the transformation + linking the source parameters and the data. + + Elements related to transformation of source parameters are also specified at the model level. + + Attributes: + nof_sources (int): number of sources in the model. + emission_rate (np.ndarray): set of emission rate samples, with shape=(n_sources, n_iterations). Attached to + the class by self.from_mcmc_dist(). + + """ + + nof_sources: int = field(init=False) + emission_rate: np.ndarray = field(init=False) + + @abstractmethod + def make_source_model(self, model: list) -> list: + """Add distributional component to the overall model corresponding to the source emission rate distribution. + + Args: + model (list): model as constructed so far, consisting of list of distributions. + + Returns: + list: overall model list, updated with distributions related to source prior. + + """ + + @abstractmethod + def make_source_sampler(self, model: Model, sampler_list: list) -> list: + """Initialise the source prior distribution part of the sampler. + + Args: + model (Model): overall model set for the problem. + sampler_list (list): list of samplers for individual parameters. + + Returns: + list: sampler_list updated with sampler for the emission rate parameters. + + """ + + @abstractmethod + def make_source_state(self, state: dict) -> dict: + """Initialise the emission rate parts of the state. + + Args: + state (dict): dictionary containing current state information. + + Returns: + dict: state updated with parameters related to the source emission rates. + + """ + + @abstractmethod + def from_mcmc_dist(self, store: dict): + """Extract posterior emission rate samples from the MCMC, attach them to the class. + + Args: + store (dict): dictionary containing samples from the MCMC. + + """ + + +@dataclass +class NormalResponse(SourceDistribution): + """(Truncated) Gaussian prior for sources. + + No transformation applied to parameters, i.e.: + - Prior distribution: s ~ N(mu, 1/precision) + - Likelihood contribution: y = A*s + b + ... + + Attributes: + truncation (bool): indication of whether the emission rate prior should be truncated at 0. Defaults to True. + emission_rate_lb (Union[float, np.ndarray]): lower bound for the source emission rates. Defaults to 0. + emission_rate_mean (Union[float, np.ndarray]): prior mean for the emission rate distribution. Defaults to 0. + + """ + + truncation: bool = True + emission_rate_lb: Union[float, np.ndarray] = 0 + emission_rate_mean: Union[float, np.ndarray] = 0 + + def make_source_model(self, model: list) -> list: + """Add distributional component to the overall model corresponding to the source emission rate distribution. + + Args: + model (list): model as constructed so far, consisting of list of distributions. + + Returns: + list: model, updated with distributions related to source prior. + + """ + domain_response_lower = None + if self.truncation: + domain_response_lower = self.emission_rate_lb + + model.append( + mcmcNormal( + "s", + mean=parameter.MixtureParameterVector(param="mu_s", allocation="alloc_s"), + precision=parameter.MixtureParameterMatrix(param="lambda_s", allocation="alloc_s"), + domain_response_lower=domain_response_lower, + ) + ) + return model + + def make_source_sampler(self, model: Model, sampler_list: list = None) -> list: + """Initialise the source prior distribution part of the sampler. + + Args: + model (Model): overall model set for the problem. + sampler_list (list): list of samplers for individual parameters. + + Returns: + list: sampler_list updated with sampler for the emission rate parameters. + + """ + if sampler_list is None: + sampler_list = [] + sampler_list.append(NormalNormal("s", model)) + return sampler_list + + def make_source_state(self, state: dict) -> dict: + """Initialise the emission rate part of the state. + + Args: + state (dict): dictionary containing current state information. + + Returns: + dict: state updated with initial emission rate vector. + + """ + state["s"] = np.zeros((self.nof_sources, 1)) + return state + + def from_mcmc_dist(self, store: dict): + """Extract posterior emission rate samples from the MCMC sampler, attach them to the class. + + Args: + store (dict): dictionary containing samples from the MCMC. + + """ + self.emission_rate = store["s"] + + +@dataclass +class SourceModel(Component, SourceGrouping, SourceDistribution): + """Superclass for the specification of the source model in an inversion run. + + Various different types of model. A SourceModel is an optional component of a model, and thus inherits + from Component. + + A subclass instance of SourceModel must inherit from: + - an INSTANCE of SourceDistribution, which specifies a prior emission rate distribution for all sources in the + source map. + - an INSTANCE of SourceGrouping, which specifies a type of mixture prior specification for the sources (for + which the allocation is to be estimated as part of the inversion). + + If the flag reversible_jump == True, then the number of sources and their locations are also estimated as part of + the inversion, in addition to the emission rates. If this flag is set to true, the sensor_object, meteorology and + gas_species objects are all attached to the class, as they will be required in the repeated computation of updates + to the coupling matrix during the inversion. + + Attributes: + dispersion_model (GaussianPlume): dispersion model used to generate the couplings between source locations and + sensor observations. + coupling (np.ndarray): coupling matrix generated using dispersion_model. + + sensor_object (SensorGroup): stores sensor information for reversible jump coupling updates. + meteorology (MeteorologyGroup): stores meteorology information for reversible jump coupling updates. + gas_species (GasSpecies): stores gas species information for reversible jump coupling updates. + + reversible_jump (bool): logical indicating whether the reversible jump algorithm for estimation of the number + of sources and their locations should be run. Defaults to False. + site_limits (np.ndarray): (3 x 2) array specifying the lower (column 0) and upper (column 1) limits of the + analysis site. Only relevant for cases where reversible_jump == True (where sources are free to move in + the solution). + rate_num_sources (int): specification for the parameter for the Poisson prior distribution for the total number + of sources. Only relevant for cases where reversible_jump == True (where the number of sources in the + solution can change). + n_sources_max (int): maximum number of sources that can feature in the solution. Only relevant for cases where + reversible_jump == True (where the number of sources in the solution can change). + emission_proposal_std (float): standard deviation of the truncated Gaussian distribution used to propose the + new source emission rate in case of a birth move. + + update_precision (bool): logical indicating whether the prior precision parameter for emission rates should be + updated as part of the inversion. Defaults to false. + prior_precision_shape (Union[float, np.ndarray]): shape parameters for the prior Gamma distribution for the + source precision parameter. + prior_precision_rate (Union[float, np.ndarray]): rate parameters for the prior Gamma distribution for the + source precision parameter. + initial_precision (Union[float, np.ndarray]): initial value for the source emission rate precision parameter. + precision_scalar (np.ndarray): precision values generated by MCMC inversion. + + coverage_detection (float): sensor detection threshold (in ppm) to be used for coverage calculations. + coverage_test_source (float): test source (in kg/hr) which we wish to be able to see in coverage calculation. + + """ + + dispersion_model: GaussianPlume = field(init=False, default=None) + coupling: np.ndarray = field(init=False) + + sensor_object: SensorGroup = field(init=False, default=None) + meteorology: Meteorology = field(init=False, default=None) + gas_species: GasSpecies = field(init=False, default=None) + + reversible_jump: bool = False + site_limits: np.ndarray = None + rate_num_sources: int = 5 + n_sources_max: int = 20 + emission_proposal_std: float = 0.5 + + update_precision: bool = False + prior_precision_shape: Union[float, np.ndarray] = 1e-3 + prior_precision_rate: Union[float, np.ndarray] = 1e-3 + initial_precision: Union[float, np.ndarray] = 1.0 + precision_scalar: np.ndarray = field(init=False) + + coverage_detection: float = 0.1 + coverage_test_source: float = 6.0 + + @property + def nof_sources(self): + """Get number of sources in the source map.""" + return self.dispersion_model.source_map.nof_sources + + @property + def coverage_threshold(self): + """Compute coverage threshold from detection threshold and test source strength.""" + return self.coverage_test_source / self.coverage_detection + + def initialise(self, sensor_object: SensorGroup, meteorology: Meteorology, gas_species: GasSpecies): + """Set up the source model. + + Extract required information from the sensor, meteorology and gas species objects: + - Attach coupling calculated using self.dispersion_model. + - (If self.reversible_jump == True) Attach objects to source model which will be used in RJMCMC sampler, + they will be required when we need to update the couplings when new source locations are proposed when + we move/birth/death. + + Args: + sensor_object (SensorGroup): object containing sensor data. + meteorology (MeteorologyGroup): object containing meteorology data. + gas_species (GasSpecies): object containing gas species information. + + """ + self.initialise_dispersion_model(sensor_object) + self.coupling = self.dispersion_model.compute_coupling( + sensor_object, meteorology, gas_species, output_stacked=True + ) + self.screen_coverage() + if self.reversible_jump: + self.sensor_object = sensor_object + self.meteorology = meteorology + self.gas_species = gas_species + + def initialise_dispersion_model(self, sensor_object: SensorGroup): + """Initialise the dispersion model. + + If a dispersion_model has already been attached to this instance, then this function takes no action. + + If a dispersion_model has not already been attached to the instance, then this function adds a GaussianPlume + dispersion model, with a default source map that has limits set based on the sensor locations. + + Args: + sensor_object (SensorGroup): object containing sensor data. + + """ + if self.dispersion_model is None: + source_map = SourceMap() + sensor_locations = sensor_object.location.to_enu() + location_object = ENU( + ref_latitude=sensor_locations.ref_latitude, + ref_longitude=sensor_locations.ref_longitude, + ref_altitude=sensor_locations.ref_altitude, + ) + source_map.generate_sources( + coordinate_object=location_object, + sourcemap_limits=np.array( + [ + [np.min(sensor_locations.east), np.max(sensor_locations.east)], + [np.min(sensor_locations.north), np.max(sensor_locations.north)], + [np.min(sensor_locations.up), np.max(sensor_locations.up)], + ] + ), + sourcemap_type="grid", + ) + self.dispersion_model = GaussianPlume(source_map) + + def screen_coverage(self): + """Screen the initial source map for coverage.""" + in_coverage_area = self.dispersion_model.compute_coverage( + self.coupling, coverage_threshold=self.coverage_threshold + ) + self.coupling = self.coupling[:, in_coverage_area] + all_locations = self.dispersion_model.source_map.location.to_array() + screened_locations = all_locations[in_coverage_area, :] + self.dispersion_model.source_map.location.from_array(screened_locations) + + def update_coupling_column(self, state: dict, update_column: int) -> dict: + """Update the coupling, based on changes to the source locations as part of inversion. + + To be used in two different situations: + - movement of source locations (e.g. Metropolis Hastings, random walk). + - adding of new source locations (e.g. reversible jump birth move). + If [update_column < A.shape[1]]: an existing column of the A matrix is updated. + If [update_column == A.shape[1]]: a new column is appended to the right-hand side of the A matrix + (corresponding to a new source). + + A central assumption of this function is that the sensor information and meteorology information + have already been interpolated onto the same space/time points. + + If an update_column is supplied, the coupling for that source location only is calculated to save on + computation time. If update_column is None, then we just re-compute the whole coupling matrix. + + Args: + state (dict): dictionary containing state parameters. + update_column (int): index of the coupling column to be updated. + + Returns: + state (dict): state dictionary containing updated coupling information. + + """ + self.dispersion_model.source_map.location.from_array(state["z_src"][:, [update_column]].T) + new_coupling = self.dispersion_model.compute_coupling( + self.sensor_object, self.meteorology, self.gas_species, output_stacked=True, run_interpolation=False + ) + + if update_column == state["A"].shape[1]: + state["A"] = np.concatenate((state["A"], new_coupling), axis=1) + elif update_column < state["A"].shape[1]: + state["A"][:, [update_column]] = new_coupling + else: + raise ValueError("Invalid column specification for updating.") + return state + + def birth_function(self, current_state: dict, prop_state: dict) -> Tuple[dict, float, float]: + """Update MCMC state based on source birth proposal. + + Proposed state updated as follows: + 1- Add column to coupling matrix for new source location. + 2- If required, adjust other components of the state which correspond to the sources. + The source emission rate vector will be adjusted using the standardised functionality + in the openMCMC package. + + After the coupling has been updated, a coverage test is applied for the new source + location. If the max coupling is too small, a large contribution is added to the + log-proposal density for the new state, to force the sampler to reject it. + + A central assumption of this function is that the sensor information and meteorology information + have already been interpolated onto the same space/time points. + + This function assumes that the new source location has been added as the final column of + the source location matrix, and so will correspondingly append the new coupling column to the right + hand side of the current state coupling, and append an emission rate as the last element of the + current state emission rate vector. + + Args: + current_state (dict): dictionary containing parameters of the current state. + prop_state (dict): dictionary containing the parameters of the proposed state. + + Returns: + prop_state (dict): proposed state, with coupling matrix and source emission rate vector updated. + logp_pr_g_cr (float): log-transition density of the proposed state given the current state + (i.e. log[p(proposed | current)]) + logp_cr_g_pr (float): log-transition density of the current state given the proposed state + (i.e. log[p(current | proposed)]) + + """ + prop_state = self.update_coupling_column(prop_state, int(prop_state["n_src"]) - 1) + prop_state["alloc_s"] = np.concatenate((prop_state["alloc_s"], np.array([0], ndmin=2)), axis=0) + in_cov_area = self.dispersion_model.compute_coverage( + prop_state["A"][:, -1], coverage_threshold=self.coverage_threshold + ) + if not in_cov_area: + logp_pr_g_cr = 1e10 + else: + logp_pr_g_cr = 0.0 + logp_cr_g_pr = 0.0 + + return prop_state, logp_pr_g_cr, logp_cr_g_pr + + @staticmethod + def death_function(current_state: dict, prop_state: dict, deletion_index: int) -> Tuple[dict, float, float]: + """Update MCMC state based on source death proposal. + + Proposed state updated as follows: + 1- Remove column from coupling for deleted source. + 2- If required, adjust other components of the state which correspond to the sources. + The source emission rate vector will be adjusted using the standardised functionality in the general_mcmc repo. + + A central assumption of this function is that the sensor information and meteorology information have already + been interpolated onto the same space/time points. + + Args: + current_state (dict): dictionary containing parameters of the current state. + prop_state (dict): dictionary containing the parameters of the proposed state. + deletion_index (int): index of the source to be deleted in the overall set of sources. + + Returns: + prop_state (dict): proposed state, with coupling matrix and source emission rate vector updated. + logp_pr_g_cr (float): log-transition density of the proposed state given the current state + (i.e. log[p(proposed | current)]) + logp_cr_g_pr (float): log-transition density of the current state given the proposed state + (i.e. log[p(current | proposed)]) + + """ + prop_state["A"] = np.delete(prop_state["A"], obj=deletion_index, axis=1) + prop_state["alloc_s"] = np.delete(prop_state["alloc_s"], obj=deletion_index, axis=0) + logp_pr_g_cr = 0.0 + logp_cr_g_pr = 0.0 + + return prop_state, logp_pr_g_cr, logp_cr_g_pr + + def move_function(self, current_state: dict, update_column: int) -> dict: + """Re-compute the coupling after a source location move. + + Function first updates the coupling column, and then checks whether the location passes a coverage test. If the + location does not have good enough coverage, the state reverts to the coupling from the current state. + + Args: + current_state (dict): dictionary containing parameters of the current state. + update_column (int): index of the coupling column to be updated. + + Returns: + dict: proposed state, with updated coupling matrix. + + """ + prop_state = deepcopy(current_state) + prop_state = self.update_coupling_column(prop_state, update_column) + in_cov_area = self.dispersion_model.compute_coverage( + prop_state["A"][:, update_column], coverage_threshold=self.coverage_threshold + ) + if not in_cov_area: + prop_state = deepcopy(current_state) + return prop_state + + def make_model(self, model: list) -> list: + """Take model list and append new elements from current model component. + + Args: + model (list): Current list of model elements. + + Returns: + list: model list updated with source-related distributions. + + """ + model = self.make_allocation_model(model) + model = self.make_source_model(model) + if self.update_precision: + model.append(Gamma("lambda_s", shape="a_lam_s", rate="b_lam_s")) + if self.reversible_jump: + model.append( + Uniform( + response="z_src", + domain_response_lower=self.site_limits[:, [0]], + domain_response_upper=self.site_limits[:, [1]], + ) + ) + model.append(Poisson(response="n_src", rate="rho")) + return model + + def make_sampler(self, model: Model, sampler_list: list) -> list: + """Take sampler list and append new elements from current model component. + + Args: + model (Model): Full model list of distributions. + sampler_list (list): Current list of samplers. + + Returns: + list: sampler list updated with source-related samplers. + + """ + sampler_list = self.make_source_sampler(model, sampler_list) + sampler_list = self.make_allocation_sampler(model, sampler_list) + if self.update_precision: + sampler_list.append(NormalGamma("lambda_s", model)) + if self.reversible_jump: + sampler_list = self.make_sampler_rjmcmc(model, sampler_list) + return sampler_list + + def make_state(self, state: dict) -> dict: + """Take state dictionary and append initial values from model component. + + Args: + state (dict): current state vector. + + Returns: + dict: current state vector with source-related parameters added. + + """ + state = self.make_allocation_state(state) + state = self.make_source_state(state) + state["A"] = self.coupling + state["lambda_s"] = np.array(self.initial_precision, ndmin=1) + if self.update_precision: + state["a_lam_s"] = np.ones_like(self.initial_precision) * self.prior_precision_shape + state["b_lam_s"] = np.ones_like(self.initial_precision) * self.prior_precision_rate + if self.reversible_jump: + state["z_src"] = self.dispersion_model.source_map.location.to_array().T + state["n_src"] = state["z_src"].shape[1] + state["rho"] = self.rate_num_sources + return state + + def make_sampler_rjmcmc(self, model: Model, sampler_list: list) -> list: + """Create the parts of the sampler related to the reversible jump MCMC scheme. + + RJ MCMC scheme: + - create the RandomWalkLoop sampler object which updates the source locations one-at-a-time. + - create the ReversibleJump sampler which proposes birth/death moves to add/remove sources from the source + map. + + Args: + model (Model): model object containing probability density objects for all uncertain + parameters. + sampler_list (list): list of existing samplers. + + Returns: + sampler_list (list): list of samplers updated with samplers corresponding to RJMCMC routine. + + """ + sampler_list[-1].max_variable_size = self.n_sources_max + + sampler_list.append( + RandomWalkLoop( + "z_src", + model, + step=np.array([1.0, 1.0, 0.1], ndmin=2).T, + max_variable_size=(3, self.n_sources_max), + domain_limits=self.site_limits, + state_update_function=self.move_function, + ) + ) + matching_params = {"variable": "s", "matrix": "A", "scale": 1.0, "limits": [0.0, 1e6]} + sampler_list.append( + ReversibleJump( + "n_src", + model, + step=np.array([1.0], ndmin=2), + associated_params="z_src", + n_max=self.n_sources_max, + state_birth_function=self.birth_function, + state_death_function=self.death_function, + matching_params=matching_params, + ) + ) + return sampler_list + + def from_mcmc(self, store: dict): + """Extract results of mcmc from mcmc.store and attach to components. + + Args: + store (dict): mcmc result dictionary. + + """ + self.from_mcmc_group(store) + self.from_mcmc_dist(store) + if self.update_precision: + self.precision_scalar = store["lambda_s"] + + def plot_iterations(self, plot: "Plot", burn_in_value: int, y_axis_type: str = "linear") -> "Plot": + """Plot the emission rate estimates source model object against MCMC iteration. + + Args: + burn_in_value (int): Burn in value to show in plot. + y_axis_type (str, optional): String to indicate whether the y-axis should be linear of log scale. + plot (Plot): Plot object to which this figure will be added in the figure dictionary. + + Returns: + plot (Plot): Plot object to which the figures added in the figure dictionary with + keys 'estimated_values_plot'/'log_estimated_values_plot' and 'number_of_sources_plot' + + """ + plot.plot_emission_rate_estimates(source_model_object=self, burn_in=burn_in_value, y_axis_type=y_axis_type) + plot.plot_single_trace(object_to_plot=self) + return plot + + +@dataclass +class Normal(SourceModel, NullGrouping, NormalResponse): + """Normal model, with null allocation. + + (Truncated) Gaussian prior for emission rates, no grouping/allocation; no transformation applied to emission rate + parameters. + + Can be used in the following cases: + - Fixed set of sources (grid or specific locations), all with the same Gaussian prior distribution. + - Variable number of sources, with a common prior distribution, estimated using reversible jump MCMC. + - Fixed set of sources with a bespoke prior per source (using the allocation to map prior parameters onto + sources). + + """ + + +@dataclass +class NormalSlabAndSpike(SourceModel, SlabAndSpike, NormalResponse): + """Normal Slab and Spike model. + + (Truncated) Gaussian prior for emission rates, slab and spike prior, with allocation estimation; no transformation + applied to emission rate parameters. + + Attributes: + initial_precision (np.ndarray): initial precision parameter for a slab and spike case. shape=(2, 1). + emission_rate_mean (np.ndarray): emission rate prior mean for a slab and spike case. shape=(2, 1). + + """ + + initial_precision: np.ndarray = field(default_factory=lambda: np.array([1 / (10**2), 1 / (0.01**2)], ndmin=2).T) + emission_rate_mean: np.ndarray = field(default_factory=lambda: np.array([0, 0], ndmin=2).T) diff --git a/src/pyelq/coordinate_system.py b/src/pyelq/coordinate_system.py new file mode 100644 index 0000000..9b55633 --- /dev/null +++ b/src/pyelq/coordinate_system.py @@ -0,0 +1,598 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Coordinate System. + +This code provides the definition of, and the functionality for, all the main coordinate systems that are used in +pyELQ. Each coordinate system has relevant methods for features that are commonly required. Also provided is a set of +conversions between each of the systems, alongside some functionality for interpolation. + +""" +from abc import ABC, abstractmethod +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Union + +import numpy as np +import pymap3d as pm +from scipy.spatial import KDTree +from scipy.stats import qmc + +import pyelq.support_functions.spatio_temporal_interpolation as sti + + +def make_latin_hypercube(bounds: np.ndarray, nof_samples: int) -> np.ndarray: + """Latin Hypercube samples. + + Draw samples according to a Latin Hypercube design within the specified bounds. + + Args: + bounds (np.ndarray): Limits of the resulting hypercube of size [dim x 2] + nof_samples (int): Number of samples to draw + + Returns: + array (np.ndarray): Samples forming the Latin Hypercube + + """ + dimension = bounds.shape[0] + sampler = qmc.LatinHypercube(d=dimension) + sample = sampler.random(n=nof_samples) + array = qmc.scale(sample, np.min(bounds, axis=1), np.max(bounds, axis=1)) + return array + + +@dataclass +class Coordinate(ABC): + """Abstract base class for coordinate transformations. + + Attributes: + use_degrees (bool): Flag if reference uses degrees (True) or radians (False). Defaults to True. + ellipsoid (pm.Ellipsoid): Definition of the Ellipsoid used in the coordinate system, for which the default is + WGS84. See: https://en.wikipedia.org/wiki/World_Geodetic_System. + + """ + + use_degrees: bool = field(init=False) + ellipsoid: pm.Ellipsoid = field(init=False) + + def __post_init__(self): + self.use_degrees = True + self.ellipsoid = pm.Ellipsoid.from_name("wgs84") + + @property + @abstractmethod + def nof_observations(self) -> int: + """Number of observations contained in the class instance, implemented as dependent property.""" + + @abstractmethod + def from_array(self, array: np.ndarray) -> None: + """Unstack a numpy array into the corresponding coordinates. + + The method has no return as it sets the corresponding attributes of the coordinate class instance. + + Args: + array (np.ndarray): Numpy array of size [n x dim] with n>0 containing the coordinates stacked into a single + array + + """ + + @abstractmethod + def to_array(self, dim: int = 3) -> np.ndarray: + """Stacks coordinates together into a numpy array. + + Args: + dim (int, optional): Number of dimensions to use, which is either 2 or 3. + + Returns: + np.ndarray: Numpy array of size [n x dim] with n>0 containing the coordinates stacked into a single array + + """ + + @abstractmethod + def to_lla(self): + """LLA: Converts coordinates to latitude/longitude/altitude system.""" + + @abstractmethod + def to_ecef(self): + """ECEF: Convert coordinates to earth centered earth fixed coordinates.""" + + @abstractmethod + def to_enu(self, ref_latitude: float = None, ref_longitude: float = None, ref_altitude: float = None): + """Converts coordinates to East North Up system. + + If a reference is not provided, the minimum of coordinates in Lat/Lon/Alt is used as the reference. + + Args: + ref_latitude (float, optional): reference latitude for ENU + ref_longitude (float, optional): reference longitude for ENU + ref_altitude (float, optional): reference altitude for ENU + + Returns: + (ENU): East North Up coordinate object + + """ + + def to_object_type(self, coordinate_object): + """Converts current object to same class as input coordinate_object. + + Args: + coordinate_object (Coordinate): An coordinate object which provides the coordinate system to convert self to + + Returns: + (Coordinate): The converted coordinate object + + """ + if type(coordinate_object) is not type(self): + if isinstance(coordinate_object, LLA): + temp_object = self.to_lla() + elif isinstance(coordinate_object, ENU): + temp_object = self.to_enu( + ref_latitude=coordinate_object.ref_latitude, + ref_longitude=coordinate_object.ref_longitude, + ref_altitude=coordinate_object.ref_altitude, + ) + elif isinstance(coordinate_object, ECEF): + temp_object = self.to_ecef() + else: + raise TypeError("Please provide a valid coordinate type") + + return temp_object + + return self + + def interpolate(self, values: np.ndarray, locations, dim: int = 3, **kwargs) -> np.ndarray: + """Interpolate data using coordinate object. + + If locations coordinate system does not match self's coordinate system it will be converted to same type as + self. In the ENU case extra checking needs to take place to check reference locations match up. + + If only 1 value is provided which needs to be interpolated to many other locations we just set the value at all + these locations to the single input value + + Args: + values (np.ndarray): Values to interpolate, consistent with location in self + locations (Coordinate): Coordinate object containing locations to which you want to interpolate + dim (int): Number of dimensions to use for interpolation (2 or 3) + **kwargs (dict): Other arguments available in scipy.interpolate.griddata e.g. method, fill_value + + Returns: + Result (np.ndarray): Interpolated values at requested locations. + + """ + locations = locations.to_object_type(coordinate_object=self) + + if isinstance(self, ENU): + if ( + self.ref_latitude != locations.ref_latitude + or self.ref_longitude != locations.ref_longitude + or self.ref_altitude != locations.ref_altitude + ): + locations = locations.to_lla() + locations = locations.to_enu( + ref_latitude=self.ref_latitude, ref_longitude=self.ref_longitude, ref_altitude=self.ref_altitude + ) + result = sti.interpolate( + location_in=self.to_array(dim), + values_in=values.flatten(), + location_out=locations.to_array(dim=dim), + **kwargs, + ) + + return result + + def make_grid( + self, bounds: np.ndarray, grid_type: str = "rectangular", shape: Union[tuple, np.ndarray] = (5, 5, 1) + ) -> np.ndarray: + """Generates grid of values locations based on specified inputs. + + If the grid type is 'spherical', we scale the latitude and longitude from -90/90 and -180/180 to 0/1 for the + use in temp_lat_rad and temp_lon_rad. + + Args: + bounds (np.ndarray): Limits of the grid on which to generate the grid of size [dim x 2] + if dim == 2 we assume the third dimension will be zeros + grid_type (str, optional): Type of grid to generate, default 'rectangular': + rectangular == rectangular grid of shape grd_shape, + spherical == grid of shape grid_shape taking into account a spherical spacing + shape: (tuple, optional): Number of grid cells to generate in each dimension, total number of + grid cells will be the product of the entries of this tuple + + Returns + np.ndarray: gridded of locations + + """ + dimension = bounds.shape[0] + + if grid_type == "rectangular": + dim_0 = np.linspace(bounds[0, 0], bounds[0, 1], num=shape[0]) + dim_1 = np.linspace(bounds[1, 0], bounds[1, 1], num=shape[1]) + if dimension == 3: + dim_2 = np.linspace(bounds[2, 0], bounds[2, 1], num=shape[2]) + else: + dim_2 = np.array(0) + + dim_0, dim_1, dim_2 = np.meshgrid(dim_0, dim_1, dim_2) + array = np.stack([dim_0.flatten(), dim_1.flatten(), dim_2.flatten()], axis=1) + elif grid_type == "spherical": + temp_object = deepcopy(self) + temp_object.from_array(array=bounds) + temp_object = temp_object.to_lla() + temp_object.latitude = (temp_object.latitude - (-90)) / 180 + temp_object.longitude = (temp_object.longitude - (-180)) / 360 + + temp_lat_rad = np.linspace(start=temp_object.latitude[0], stop=temp_object.latitude[1], num=shape[0]) + temp_lon_rad = np.linspace(start=temp_object.longitude[0], stop=temp_object.longitude[1], num=shape[1]) + + longitude = (2 * np.pi * temp_lon_rad - np.pi) * 180 / np.pi + latitude = (np.arccos(1 - 2 * temp_lat_rad) - 0.5 * np.pi) * 180 / np.pi + if dimension == 3: + altitude = np.linspace(start=temp_object.altitude[0], stop=temp_object.altitude[1], num=shape[2]) + latitude, longitude, altitude = np.meshgrid(latitude, longitude, altitude) + array = np.stack( + [latitude.flatten() * np.pi / 180, longitude.flatten() * np.pi / 180, altitude.flatten()], axis=1 + ) + else: + latitude, longitude = np.meshgrid(latitude, longitude) + array = np.stack([latitude.flatten() * np.pi / 180, longitude.flatten() * np.pi / 180], axis=1) + + temp_object.from_array(array=array) + temp_object = temp_object.to_object_type(self) + array = temp_object.to_array() + else: + raise NotImplementedError("Please provide a valid grid type") + + return array + + def create_tree(self) -> KDTree: + """Create KD tree for the purpose of fast distance computation. + + Returns: + KDTree: Spatial KD tree + + """ + return KDTree(self.to_array()) + + +@dataclass +class LLA(Coordinate): + """Defines the properties and functionality of the latitude/ longitude/ altitude coordinate system. + + Attributes: + latitude (np.ndarray): Latitude values in degrees. + longitude (np.ndarray): Longitude values in degrees. + altitude (np.ndarray): Altitude values in meters with respect to a spheroid. + + """ + + latitude: np.ndarray = None + longitude: np.ndarray = None + altitude: np.ndarray = None + + @property + def nof_observations(self): + """Number of observations contained in the class instance, implemented as dependent property.""" + if self.latitude is None: + return 0 + return self.latitude.size + + def from_array(self, array): + """Unstack a numpy array into the corresponding coordinates. + + The method has no return as it sets the corresponding attributes of the coordinate class instance. + + Args: + array (np.ndarray): Numpy array of size [n x dim] with n>0 containing the coordinates stacked into a single + array + + """ + dim = array.shape[1] + self.latitude = array[:, 0] + self.longitude = array[:, 1] + self.altitude = np.zeros_like(self.latitude) + if dim == 3: + self.altitude = array[:, 2] + + def to_array(self, dim=3): + """Stacks coordinates together into a numpy array. + + Args: + dim (int, optional): Number of dimensions to use, which is either 2 or 3. + + Returns: + (np.ndarray): Numpy array of size [n x dim] with n>0 containing the coordinates stacked into a single array + + """ + if dim == 2: + return np.stack((self.latitude.flatten(), self.longitude.flatten()), axis=1) + return np.stack((self.latitude.flatten(), self.longitude.flatten(), self.altitude.flatten()), axis=1) + + def to_lla(self): + """LLA: Converts coordinates to latitude/longitude/altitude system.""" + return self + + def to_ecef(self): + """ECEF: Convert coordinates to earth centered earth fixed coordinates.""" + if self.altitude is None: + self.altitude = np.zeros(self.latitude.shape) + ecef_object = ECEF() + ecef_object.x, ecef_object.y, ecef_object.z = pm.geodetic2ecef( + lat=self.latitude, lon=self.longitude, alt=self.altitude, ell=self.ellipsoid, deg=self.use_degrees + ) + + return ecef_object + + def to_enu(self, ref_latitude=None, ref_longitude=None, ref_altitude=None): + """Converts coordinates to East North Up system. + + If a reference is not provided, the minimum of coordinates in Lat/Lon/Alt is used as the reference. + + Args: + ref_latitude (float, optional): reference latitude for ENU + ref_longitude (float, optional): reference longitude for ENU + ref_altitude (float, optional): reference altitude for ENU + + Returns: + (ENU): East North Up coordinate object + + """ + if self.altitude is None: + self.altitude = np.zeros(self.latitude.shape) + + if ref_altitude is None: + ref_altitude = np.amin(self.altitude) + + if ref_latitude is None: + ref_latitude = np.amin(self.latitude) + + if ref_longitude is None: + ref_longitude = np.amin(self.longitude) + + enu_object = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude) + + enu_object.east, enu_object.north, enu_object.up = pm.geodetic2enu( + lat=self.latitude, + lon=self.longitude, + h=self.altitude, + lat0=ref_latitude, + lon0=ref_longitude, + h0=ref_altitude, + ell=self.ellipsoid, + deg=self.use_degrees, + ) + + return enu_object + + +@dataclass +class ENU(Coordinate): + """Defines the properties and functionality of a local East-North-Up coordinate system. + + Positions relative to some reference location in metres. + + Attributes: + ref_latitude (float): Reference latitude for current ENU system. + ref_longitude (float): Reference longitude for current ENU system. + ref_altitude (float): Reference altitude for current ENU system. + east (np.ndarray): East values. + north (np.ndarray): North values. + up: (np.ndarray): Up values. + + """ + + ref_latitude: float + ref_longitude: float + ref_altitude: float + east: np.ndarray = None + north: np.ndarray = None + up: np.ndarray = None + + @property + def nof_observations(self): + """Number of observations contained in the class instance, implemented as dependent property.""" + if self.east is None: + return 0 + return self.east.size + + def from_array(self, array): + """Unstack a numpy array into the corresponding coordinates. + + The method has no return as it sets the corresponding attributes of the coordinate class instance. + + Args: + array (np.ndarray): Numpy array of size [n x dim] with n>0 containing the coordinates stacked into a single + array + + """ + dim = array.shape[1] + self.east = array[:, 0] + self.north = array[:, 1] + self.up = np.zeros_like(self.east) + if dim == 3: + self.up = array[:, 2] + + def to_array(self, dim=3): + """Stacks coordinates together into a numpy array. + + Args: + dim (int, optional): Number of dimensions to use, which is either 2 or 3. + + Returns: + (np.ndarray): Numpy array of size [n x dim] with n>0 containing the coordinates stacked into a single array + + """ + if dim == 2: + return np.stack((self.east.flatten(), self.north.flatten()), axis=1) + return np.stack((self.east.flatten(), self.north.flatten(), self.up.flatten()), axis=1) + + def to_enu(self, ref_latitude=None, ref_longitude=None, ref_altitude=None): + """Converts coordinates to East North Up system. + + If a reference is not provided, the minimum of coordinates in Lat/Lon/Alt is used as the reference. + + Args: + ref_latitude (float, optional): reference latitude for ENU + ref_longitude (float, optional): reference longitude for ENU + ref_altitude (float, optional): reference altitude for ENU + + Returns: + (ENU): East North Up coordinate object + + """ + if ref_latitude is None: + ref_latitude = self.ref_latitude + + if ref_longitude is None: + ref_longitude = self.ref_longitude + + if ref_altitude is None: + ref_altitude = self.ref_altitude + + if ( + self.ref_latitude == ref_latitude + and self.ref_longitude == ref_longitude + and self.ref_altitude == ref_altitude + ): + return self + + ecef_temp = self.to_ecef() + + return ecef_temp.to_enu(ref_longitude=ref_longitude, ref_latitude=ref_latitude, ref_altitude=ref_altitude) + + def to_lla(self): + """LLA: Converts coordinates to latitude/longitude/altitude system.""" + lla_object = LLA() + + lla_object.latitude, lla_object.longitude, lla_object.altitude = pm.enu2geodetic( + e=self.east, + n=self.north, + u=self.up, + lat0=self.ref_latitude, + lon0=self.ref_longitude, + h0=self.ref_altitude, + ell=self.ellipsoid, + deg=self.use_degrees, + ) + + return lla_object + + def to_ecef(self): + """ECEF: Convert coordinates to earth centered earth fixed coordinates.""" + ecef_object = ECEF() + + ecef_object.x, ecef_object.y, ecef_object.z = pm.enu2ecef( + e1=self.east, + n1=self.north, + u1=self.up, + lat0=self.ref_latitude, + lon0=self.ref_longitude, + h0=self.ref_altitude, + ell=self.ellipsoid, + deg=self.use_degrees, + ) + + return ecef_object + + +@dataclass +class ECEF(Coordinate): + """Defines the properties and functionality of an Earth-Centered, Earth-Fixed coordinate system. + + See: https://en.wikipedia.org/wiki/Earth-centered,_Earth-fixed_coordinate_system + + Attributes: + x (np.ndarray): Eastings values [metres] + y (np.ndarray): Northings values [metres] + z (np.ndarray): Altitude values [metres] + + """ + + x: np.ndarray = None + y: np.ndarray = None + z: np.ndarray = None + + @property + def nof_observations(self): + """Number of observations contained in the class instance, implemented as dependent property.""" + if self.x is None: + return 0 + return self.x.size + + def from_array(self, array): + """Unstack a numpy array into the corresponding coordinates. + + The method has no return as it sets the corresponding attributes of the coordinate class instance. + + Args: + array (np.ndarray): Numpy array of size [n x dim] with n>0 containing the coordinates stacked into a single + array + + """ + dim = array.shape[1] + self.x = array[:, 0] + self.y = array[:, 1] + self.z = np.zeros_like(self.x) + if dim == 3: + self.z = array[:, 2] + + def to_array(self, dim=3): + """Stacks coordinates together into a numpy array. + + Args: + dim (int, optional): Number of dimensions to use, which is either 2 or 3. + + Returns: + (np.ndarray): Numpy array of size [n x dim] with n>0 containing the coordinates stacked into a single array + + """ + if dim == 2: + return np.stack((self.x.flatten(), self.y.flatten()), axis=1) + return np.stack((self.x.flatten(), self.y.flatten(), self.z.flatten()), axis=1) + + def to_ecef(self): + """ECEF: Convert coordinates to earth centered earth fixed coordinates.""" + return self + + def to_lla(self): + """LLA: Converts coordinates to latitude/longitude/altitude system.""" + lla_object = LLA() + + lla_object.latitude, lla_object.longitude, lla_object.altitude = pm.ecef2geodetic( + self.x, self.y, self.z, ell=self.ellipsoid, deg=self.use_degrees + ) + + return lla_object + + def to_enu(self, ref_latitude=None, ref_longitude=None, ref_altitude=None): + """Converts coordinates to East North Up system. + + If a reference is not provided, the minimum of coordinates in Lat/Lon/Alt is used as the reference. + + Args: + ref_latitude (float, optional): reference latitude for ENU + ref_longitude (float, optional): reference longitude for ENU + ref_altitude (float, optional): reference altitude for ENU + + Returns: + (ENU): East North Up coordinate object + + """ + if ref_latitude is None or ref_longitude is None or ref_altitude is None: + lla_object = self.to_lla() + return lla_object.to_enu() + + enu_object = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude) + + enu_object.east, enu_object.north, enu_object.up = pm.ecef2enu( + x=self.x, + y=self.y, + z=self.z, + lat0=ref_latitude, + lon0=ref_longitude, + h0=ref_altitude, + ell=self.ellipsoid, + deg=self.use_degrees, + ) + + return enu_object diff --git a/src/pyelq/data_access/__init__.py b/src/pyelq/data_access/__init__.py new file mode 100644 index 0000000..b0f8b88 --- /dev/null +++ b/src/pyelq/data_access/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Data Access Module.""" +__all__ = ["data_access"] diff --git a/src/pyelq/data_access/data_access.py b/src/pyelq/data_access/data_access.py new file mode 100644 index 0000000..561d6cf --- /dev/null +++ b/src/pyelq/data_access/data_access.py @@ -0,0 +1,104 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Data access module. + +Superclass containing some common attributes and helper functions used in multiple data access classes + +""" + +import datetime as dt +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Union + +import pandas as pd + +from pyelq.meteorology import Meteorology, MeteorologyGroup +from pyelq.sensor.sensor import Sensor, SensorGroup + + +@dataclass +class DataAccess(ABC): + """DataAccess superclass containing some common attributes and functionalities. + + This superclass is used to show the type of methods to implement when creating a new data access class. The data + access classes are used to convert raw data into well-defined classes and objects which can be used by the rest of + the package. + + Attributes: + latitude_bounds (tuple, optional): Tuple specifying (latitude_min, latitude_max) + longitude_bounds (tuple, optional): Tuple specifying (longitude_min, longitude_max) + date_bounds (tuple, optional): Tuple specifying (datetime_min, datetime_max) + + """ + + latitude_bounds: tuple = (None, None) + longitude_bounds: tuple = (None, None) + date_bounds: tuple = (None, None) + + @abstractmethod + def to_sensor(self, *args: Any, **kwargs: dict) -> Union[Sensor, SensorGroup]: + """Abstract method to convert raw data into a Sensor or SensorGroup object. + + This method should be implemented to convert the raw data into a Sensor or SensorGroup object. + + Args: + *args (Any): Variable length argument list of any type. + **kwargs (dict): Arbitrary keyword arguments + + """ + + @abstractmethod + def to_meteorology(self, *args: Any, **kwargs: dict) -> Union[Meteorology, MeteorologyGroup]: + """Abstract method to convert raw data into a Meteorology or MeteorologyGroup object. + + This method should be implemented to convert the raw data into a Meteorology or MeteorologyGroup object. + + Args: + *args (Any): Variable length argument list of any type. + **kwargs (dict): Arbitrary keyword arguments + + """ + + def _query_aoi(self, data: pd.DataFrame) -> pd.DataFrame: + """Helper function to perform area of interest query on data. + + Args: + data (pd.Dataframe): Pandas dataframe to perform the query on + + """ + aoi_query_string = "" + if self.latitude_bounds[0] is not None: + aoi_query_string += f" & latitude>={self.latitude_bounds[0]}" + if self.latitude_bounds[1] is not None: + aoi_query_string += f" & latitude<={self.latitude_bounds[1]}" + if self.longitude_bounds[0] is not None: + aoi_query_string += f" & longitude>={self.longitude_bounds[0]}" + if self.longitude_bounds[1] is not None: + aoi_query_string += f" & longitude<={self.longitude_bounds[1]}" + if len(aoi_query_string) > 0: + aoi_query_string = aoi_query_string[3:] + return data.query(aoi_query_string).copy() + return data + + def _query_time(self, data: pd.DataFrame) -> pd.DataFrame: + """Helper function to perform time query on data. + + Args: + data (pd.Dataframe): Pandas dataframe to perform the query on + + """ + time_query_string = "" + if self.date_bounds[0] is not None: + timestamp_min = dt.datetime.timestamp(self.date_bounds[0]) + time_query_string += f" & timestamp>={timestamp_min}" + if self.date_bounds[1] is not None: + timestamp_max = dt.datetime.timestamp(self.date_bounds[1]) + time_query_string += f" & timestamp<={timestamp_max}" + if len(time_query_string) > 0: + time_query_string = time_query_string[3:] + return data.query(time_query_string).copy() + return data diff --git a/src/pyelq/dispersion_model/__init__.py b/src/pyelq/dispersion_model/__init__.py new file mode 100644 index 0000000..1d5c9ec --- /dev/null +++ b/src/pyelq/dispersion_model/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Dispersion Model Module.""" +__all__ = ["gaussian_plume"] diff --git a/src/pyelq/dispersion_model/gaussian_plume.py b/src/pyelq/dispersion_model/gaussian_plume.py new file mode 100644 index 0000000..3bb3be7 --- /dev/null +++ b/src/pyelq/dispersion_model/gaussian_plume.py @@ -0,0 +1,626 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Gaussian Plume module. + +The class for the Gaussian Plume dispersion model used in pyELQ. + +The Mathematics of Atmospheric Dispersion Modeling, John M. Stockie, DOI. 10.1137/10080991X + +""" +from copy import deepcopy +from dataclasses import dataclass +from typing import Callable, Union + +import numpy as np + +import pyelq.support_functions.spatio_temporal_interpolation as sti +from pyelq.coordinate_system import ENU, LLA +from pyelq.gas_species import GasSpecies +from pyelq.meteorology import Meteorology, MeteorologyGroup +from pyelq.sensor.beam import Beam +from pyelq.sensor.satellite import Satellite +from pyelq.sensor.sensor import Sensor, SensorGroup +from pyelq.source_map import SourceMap + + +@dataclass +class GaussianPlume: + """Defines the Gaussian plume dispersion model class. + + Attributes: + source_map (Sourcemap): SourceMap object used for the dispersion model + source_half_width (float): Source half width (radius) to be used in the Gaussian plume model (in meters) + minimum_contribution (float): All elements in the Gaussian plume coupling smaller than this number will be set + to 0. Helps to speed up matrix multiplications/matrix inverses, also helps with stability + + """ + + source_map: SourceMap + source_half_width: float = 1 + minimum_contribution: float = 0 + + def compute_coupling( + self, + sensor_object: Union[SensorGroup, Sensor], + meteorology_object: Union[MeteorologyGroup, Meteorology], + gas_object: GasSpecies = None, + output_stacked: bool = False, + run_interpolation: bool = True, + ) -> Union[list, np.ndarray, dict]: + """Top level function to calculate the Gaussian plume coupling. + + Calculates the coupling for either a single sensor object or a dictionary of sensor objects. + + When both a SensorGroup and a MeteorologyGroup have been passed in, we assume they are consistent and contain + exactly the same keys for each item in both groups. Also assuming interpolation has been performed and time axes + are consistent, so we set run_interpolation to False + + When you input a SensorGroup and a single Meteorology object we convert this object into a dictionary, so we + don't have to duplicate the same code. + + Args: + sensor_object (Union[SensorGroup, Sensor]): Single sensor object or SensorGroup object which is used in the + calculation of the plume coupling. + meteorology_object (Union[MeteorologyGroup, Meteorology]): Meteorology object or MeteorologyGroup object + which is used in the calculation of the plume coupling. + gas_object (GasSpecies, optional): Optional input, a gas species object to correctly calculate the + gas density which is used in the conversion of the units of the Gaussian plume coupling + output_stacked (bool, optional): if true outputs as stacked np.array across sensors if not + outputs as dict + run_interpolation (bool, optional): logical indicating whether interpolation of the meteorological data to + the sensor/source is required. Defaults to True. + + Returns: + plume_coupling (Union[list, np.ndarray, dict]): List of arrays, single array or dictionary containing the + plume coupling in hr/kg. When a single source object is passed in as input this function returns a list + or an array depending on the sensor type. + If a dictionary of sensor objects is passed in as input and output_stacked=False this function returns + a dictionary consistent with the input dictionary keys, containing the corresponding plume coupling + outputs for each sensor. + If a dictionary of sensor objects is passed in as input and output_stacked=True this function returns + a np.array containing the stacked coupling matrices. + + """ + if isinstance(sensor_object, SensorGroup): + output = {} + if isinstance(meteorology_object, Meteorology): + meteorology_object = dict.fromkeys(sensor_object.keys(), meteorology_object) + elif isinstance(meteorology_object, MeteorologyGroup): + run_interpolation = False + + for sensor_key in sensor_object: + output[sensor_key] = self.compute_coupling_single_sensor( + sensor_object=sensor_object[sensor_key], + meteorology=meteorology_object[sensor_key], + gas_object=gas_object, + run_interpolation=run_interpolation, + ) + if output_stacked: + output = np.concatenate(tuple(output.values()), axis=0) + + elif isinstance(sensor_object, Sensor): + if isinstance(meteorology_object, MeteorologyGroup): + raise TypeError("Please provide a single Meteorology object when using a single Sensor object") + + output = self.compute_coupling_single_sensor( + sensor_object=sensor_object, + meteorology=meteorology_object, + gas_object=gas_object, + run_interpolation=run_interpolation, + ) + else: + raise TypeError("Please provide either a Sensor or SensorGroup as input argument") + + return output + + def compute_coupling_single_sensor( + self, + sensor_object: Sensor, + meteorology: Meteorology, + gas_object: GasSpecies = None, + run_interpolation: bool = True, + ) -> Union[list, np.ndarray]: + """Wrapper function to compute the gaussian plume coupling for a single sensor. + + Wrapper is used to identify specific cases and calculate the Gaussian plume coupling accordingly. + + When the sensor object contains the source_on attribute we set all coupling values to 0 for observations for + which source_on is False. Making sure the source_on is column array, aligning with the 1st dimension + (nof_observations) of the plume coupling array. + + Args: + sensor_object (Sensor): Single sensor object which is used in the calculation of the plume coupling + meteorology (Meteorology): Meteorology object which is used in the calculation of the plume coupling + gas_object (GasSpecies, optional): Optionally input a gas species object to correctly calculate the + gas density which is used in the conversion of the units of the Gaussian plume coupling + run_interpolation (bool): logical indicating whether interpolation of the meteorological data to + the sensor/source is required. Default passed from compute_coupling. + + Returns: + plume_coupling (Union[list, np.ndarray]): List of arrays or single array containing the plume coupling + in 1e6*[hr/kg]. Entries of the list are per source in the case of a satellite sensor, if a single array + is returned the coupling for each observation (first dimension) to each source (second dimension) is + provided. + + """ + if not isinstance(sensor_object, Sensor): + raise NotImplementedError("Please provide a valid sensor type") + + ( + gas_density, + u_interpolated, + v_interpolated, + wind_turbulence_horizontal, + wind_turbulence_vertical, + ) = self.interpolate_all_meteorology( + meteorology=meteorology, + sensor_object=sensor_object, + gas_object=gas_object, + run_interpolation=run_interpolation, + ) + + wind_speed = np.sqrt(u_interpolated**2 + v_interpolated**2) + theta = np.arctan2(v_interpolated, u_interpolated) + + if isinstance(sensor_object, Satellite): + plume_coupling = self.compute_coupling_satellite( + sensor_object=sensor_object, + wind_speed=wind_speed, + theta=theta, + wind_turbulence_horizontal=wind_turbulence_horizontal, + wind_turbulence_vertical=wind_turbulence_vertical, + gas_density=gas_density, + ) + + else: + plume_coupling = self.compute_coupling_ground( + sensor_object=sensor_object, + wind_speed=wind_speed, + theta=theta, + wind_turbulence_horizontal=wind_turbulence_horizontal, + wind_turbulence_vertical=wind_turbulence_vertical, + gas_density=gas_density, + ) + + if sensor_object.source_on is not None: + plume_coupling = plume_coupling * sensor_object.source_on[:, None] + + return plume_coupling + + def compute_coupling_array( + self, + sensor_x: np.ndarray, + sensor_y: np.ndarray, + sensor_z: np.ndarray, + source_z: np.ndarray, + wind_speed: np.ndarray, + theta: np.ndarray, + wind_turbulence_horizontal: np.ndarray, + wind_turbulence_vertical: np.ndarray, + gas_density: Union[float, np.ndarray], + ) -> np.ndarray: + """Compute the Gaussian plume coupling. + + Most low level function to calculate the Gaussian plume coupling. Assuming input shapes are consistent but no + checking is done on this. + + Setting sigma_vert to 1e-16 when it is identically zero (distance_x == 0) so we don't get a divide by 0 error + all the time. + + Args: + sensor_x (np.ndarray): sensor x location relative to source [m]. + sensor_y (np.ndarray): sensor y location relative to source [m]. + sensor_z (np.ndarray): sensor z location relative to ground height [m]. + source_z (np.ndarray): source z location relative to ground height [m]. + wind_speed (np.ndarray): wind speed at source locations in [m/s]. + theta (np.ndarray): Mathematical wind direction at source locations [radians]: + calculated as np.arctan2(v_component_wind, u_component_wind). + wind_turbulence_horizontal (np.ndarray): Horizontal wind turbulence [deg]. + wind_turbulence_vertical (np.ndarray): Vertical wind turbulence [deg]. + gas_density (Union[float, np.ndarray]): Gas density to use in coupling calculation [kg/m^3]. + + Returns: + plume_coupling (np.ndarray): Gaussian plume coupling in (1e6)*[hr/kg]: gives concentrations + in [ppm] when multiplied by sources in [kg/hr]. + + """ + cos_theta = np.cos(theta) + sin_theta = np.sin(theta) + + distance_x = cos_theta * sensor_x + sin_theta * sensor_y + if np.all(distance_x < 0): + return np.zeros_like(distance_x) + + distance_y = -sin_theta * sensor_x + cos_theta * sensor_y + + sigma_hor = np.tan(wind_turbulence_horizontal * (np.pi / 180)) * np.abs(distance_x) + self.source_half_width + sigma_vert = np.tan(wind_turbulence_vertical * (np.pi / 180)) * np.abs(distance_x) + + sigma_vert[sigma_vert == 0] = 1e-16 + + plume_coupling = ( + (1 / (2 * np.pi * wind_speed * sigma_hor * sigma_vert)) + * np.exp(-0.5 * (distance_y / sigma_hor) ** 2) + * ( + np.exp(-0.5 * (((sensor_z + source_z) / sigma_vert) ** 2)) + + np.exp(-0.5 * (((sensor_z - source_z) / sigma_vert) ** 2)) + ) + ) + + plume_coupling = np.divide(np.multiply(plume_coupling, 1e6), (gas_density * 3600)) + plume_coupling[np.logical_or(distance_x < 0, plume_coupling < self.minimum_contribution)] = 0 + + return plume_coupling + + def calculate_gas_density( + self, meteorology: Meteorology, sensor_object: Sensor, gas_object: Union[GasSpecies, None] + ) -> np.ndarray: + """Helper function to calculate the gas density using ideal gas law. + + https://en.wikipedia.org/wiki/Ideal_gas + + When a gas object is passed as input we calculate the density according to that gas. We check if the + meteorology object has a temperature and/or pressure value and use those accordingly. Otherwise, we use Standard + Temperature and Pressure (STP). + + We interpolate the temperature and pressure values to the source locations/times such that this is consistent + with the other calculations, i.e. we only do spatial interpolation when the sensor is a Satellite object + and temporal interpolation otherwise. + + When no gas_object is passed in we just set the gas density value to 1. + + Args: + meteorology (Meteorology): Meteorology object potentially containing temperature or pressure values + sensor_object (Sensor): Sensor object containing information about where to interpolate to + gas_object (Union[GasSpecies, None]): Gas species object which actually calculates the correct density + + Returns: + gas_density (np.ndarray): Numpy array of shape [1 x nof_sources] (Satellite sensor) + or [nof_observations x 1] (otherwise) containing the gas density values to use + + """ + if not isinstance(gas_object, GasSpecies): + if isinstance(sensor_object, Satellite): + return np.ones((1, self.source_map.nof_sources)) + return np.ones((sensor_object.nof_observations, 1)) + + temperature_interpolated = self.interpolate_meteorology( + meteorology=meteorology, variable_name="temperature", sensor_object=sensor_object + ) + if temperature_interpolated is None: + temperature_interpolated = np.array([[273.15]]) + + pressure_interpolated = self.interpolate_meteorology( + meteorology=meteorology, variable_name="pressure", sensor_object=sensor_object + ) + if pressure_interpolated is None: + pressure_interpolated = np.array([[101.325]]) + + gas_density = gas_object.gas_density(temperature=temperature_interpolated, pressure=pressure_interpolated) + + return gas_density + + def interpolate_all_meteorology( + self, sensor_object: Sensor, meteorology: Meteorology, gas_object: GasSpecies, run_interpolation: bool + ): + """Function which carries out interpolation of all meteorological information. + + The flag run_interpolation determines whether the interpolation should be carried out. If this + is set to be False, the meteorological parameters are simply set to the values stored on the + meteorology object (i.e. we assume that the meteorology has already been interpolated). This + functionality is required to avoid wasted computation in the case of e.g. a reversible jump run. + + Args: + sensor_object (Sensor): object containing locations/times onto which met information should + be interpolated. + meteorology (Meteorology): object containing meteorology information for interpolation. + gas_object (GasSpecies): object containing gas information. + run_interpolation (bool): logical indicating whether the meteorology information needs to be interpolated. + + Returns: + gas_density (np.ndarray): numpy array of shape [n_data x 1] of gas densities. + u_interpolated (np.ndarray): numpy array of shape [n_data x 1] of northerly wind components. + v_interpolated (np.ndarray): numpy array of shape [n_data x 1] of easterly wind components. + wind_turbulence_horizontal (np.ndarray): numpy array of shape [n_data x 1] of horizontal turbulence + parameters. + wind_turbulence_vertical (np.ndarray): numpy array of shape [n_data x 1] of vertical turbulence + parameters. + + """ + if run_interpolation: + gas_density = self.calculate_gas_density( + meteorology=meteorology, sensor_object=sensor_object, gas_object=gas_object + ) + u_interpolated = self.interpolate_meteorology( + meteorology=meteorology, variable_name="u_component", sensor_object=sensor_object + ) + v_interpolated = self.interpolate_meteorology( + meteorology=meteorology, variable_name="v_component", sensor_object=sensor_object + ) + wind_turbulence_horizontal = self.interpolate_meteorology( + meteorology=meteorology, variable_name="wind_turbulence_horizontal", sensor_object=sensor_object + ) + wind_turbulence_vertical = self.interpolate_meteorology( + meteorology=meteorology, variable_name="wind_turbulence_vertical", sensor_object=sensor_object + ) + else: + gas_density = gas_object.gas_density(temperature=meteorology.temperature, pressure=meteorology.pressure) + gas_density = gas_density.reshape((gas_density.size, 1)) + u_interpolated = meteorology.u_component.reshape((meteorology.u_component.size, 1)) + v_interpolated = meteorology.v_component.reshape((meteorology.v_component.size, 1)) + wind_turbulence_horizontal = meteorology.wind_turbulence_horizontal.reshape( + (meteorology.wind_turbulence_horizontal.size, 1) + ) + wind_turbulence_vertical = meteorology.wind_turbulence_vertical.reshape( + (meteorology.wind_turbulence_vertical.size, 1) + ) + + return gas_density, u_interpolated, v_interpolated, wind_turbulence_horizontal, wind_turbulence_vertical + + def interpolate_meteorology( + self, meteorology: Meteorology, variable_name: str, sensor_object: Sensor + ) -> Union[np.ndarray, None]: + """Helper function to interpolate meteorology variables. + + This function interpolates meteorological variables to times in Sensor or Sources in sourcemap. It also + calculates the wind speed and mathematical angle between the u- and v-components which in turn gets used in the + calculation of the Gaussian plume. + + When the input sensor object is a Satellite type we use spatial interpolation using the interpolation method + from the coordinate system class as this takes care of the coordinate systems. + When the input sensor object is of another time we use temporal interpolation (assumption is spatial uniformity + for all observations over a small(er) area). + + Args: + meteorology (Meteorology): Meteorology object containing u- and v-components of wind including their + spatial location + variable_name (str): String name of an attribute in the meteorology input object which needs to be + interpolated + sensor_object (Sensor): Sensor object containing information about where to interpolate to + + Returns: + variable_interpolated (np.ndarray): Interpolated values + + """ + variable = getattr(meteorology, variable_name) + if variable is None: + return None + + if isinstance(sensor_object, Satellite): + variable_interpolated = meteorology.location.interpolate(variable, self.source_map.location) + variable_interpolated = variable_interpolated.reshape(1, self.source_map.nof_sources) + else: + variable_interpolated = sti.interpolate( + time_in=meteorology.time, values_in=variable, time_out=sensor_object.time + ) + variable_interpolated = variable_interpolated.reshape(sensor_object.nof_observations, 1) + return variable_interpolated + + def compute_coupling_satellite( + self, + sensor_object: Sensor, + wind_speed: np.ndarray, + theta: np.ndarray, + wind_turbulence_horizontal: np.ndarray, + wind_turbulence_vertical: np.ndarray, + gas_density: np.ndarray, + ) -> list: + """Compute Gaussian plume coupling for satellite sensor. + + When the sensor is a Satellite object we calculate the plume coupling per source. Given the large number of + sources and the possibility of using the inclusion radius and inclusion indices here and validity of a local + ENU system over large distances we loop over each source and calculate the coupling on a per-source basis. + + If source_map.inclusion_n_obs is None, we do not do any filtering on observations and we want to include all + observations in the plume coupling calculations. + + All np.ndarray inputs should have a shape of [1 x nof_sources] + + Args: + sensor_object (Sensor): Sensor object used in plume coupling calculation + wind_speed (np.ndarray): Wind speed [m/s] + theta (np.ndarray): Mathematical angle between the u- and v-components of wind [radians] + wind_turbulence_horizontal (np.ndarray): Parameter of the wind stability in horizontal direction [deg] + wind_turbulence_vertical (np.ndarray): Parameter of the wind stability in vertical direction [deg] + gas_density: (np.ndarray): Numpy array containing the gas density values to use [kg/m^3] + + Returns: + plume_coupling (list): List of Gaussian plume coupling 1e6*[hr/kg] arrays. The list has a length of + nof_sources, each array has the shape [nof_observations x 1] or [inclusion_n_obs x 1] when + inclusion_idx is used. + + """ + plume_coupling = [] + + source_map_location_lla = self.source_map.location.to_lla() + for current_source in range(self.source_map.nof_sources): + if self.source_map.inclusion_n_obs is None: + enu_sensor_array = sensor_object.location.to_enu( + ref_latitude=source_map_location_lla.latitude[current_source], + ref_longitude=source_map_location_lla.longitude[current_source], + ref_altitude=0, + ).to_array() + + else: + if self.source_map.inclusion_n_obs[current_source] == 0: + plume_coupling.append(np.array([])) + continue + + enu_sensor_array = _create_enu_sensor_array( + inclusion_idx=self.source_map.inclusion_idx[current_source], + sensor_object=sensor_object, + source_map_location_lla=source_map_location_lla, + current_source=current_source, + ) + + temp_coupling = self.compute_coupling_array( + enu_sensor_array[:, [0]], + enu_sensor_array[:, [1]], + enu_sensor_array[:, [2]], + source_map_location_lla.altitude[current_source], + wind_speed[:, current_source], + theta[:, current_source], + wind_turbulence_horizontal[:, current_source], + wind_turbulence_vertical[:, current_source], + gas_density[:, current_source], + ) + + plume_coupling.append(temp_coupling) + + return plume_coupling + + def compute_coupling_ground( + self, + sensor_object: Sensor, + wind_speed: np.ndarray, + theta: np.ndarray, + wind_turbulence_horizontal: np.ndarray, + wind_turbulence_vertical: np.ndarray, + gas_density: np.ndarray, + ) -> np.ndarray: + """Compute Gaussian plume coupling for a ground sensor. + + If the source map is already defined as ENU the reference location is maintained but the sensor is checked + to make sure the same reference location is used. Otherwise, when converting to ENU object for the sensor + observations we use a single source and altitude 0 as the reference location. This way our ENU system is a + system w.r.t. ground level which is required for the current implementation of the actual coupling calculation. + + When the sensor is a Beam object we calculate the plume coupling for all sources to all beam knot locations at + once in the same ENU coordinate system and finally averaged over the beam knots to get the final output. + + In general, we calculate the coupling from all sources to all sensor observation locations. In order to achieve + this we input the sensor array as column and source array as row vector in calculating relative x etc., + with the beam knot locations being the third dimension. When the sensor is a single point Sensor or a Drone + sensor we effectively have one beam knot, making the mean operation at the end effectively a reshape operation + which gets rid of the third dimension. + + All np.ndarray inputs should have a shape of [nof_observations x 1] + + Args: + sensor_object (Sensor): Sensor object used in plume coupling calculation + wind_speed (np.ndarray): Wind speed [m/s] + theta (np.ndarray): Mathematical angle between the u- and v-components of wind [radians] + wind_turbulence_horizontal (np.ndarray): Parameter of the wind stability in horizontal direction [deg] + wind_turbulence_vertical (np.ndarray): Parameter of the wind stability in vertical direction [deg] + gas_density: (np.ndarray): Numpy array containing the gas density values to use [kg/m^3] + + Returns: + plume_coupling (np.ndarray): Gaussian plume coupling 1e6*[hr/kg] array. The array has the + shape [nof_observations x nof_sources] + + """ + if not isinstance(self.source_map.location, ENU): + source_map_lla = self.source_map.location.to_lla() + source_map_enu = source_map_lla.to_enu( + ref_latitude=source_map_lla.latitude[0], ref_longitude=source_map_lla.longitude[0], ref_altitude=0 + ) + else: + source_map_enu = self.source_map.location + + enu_source_array = source_map_enu.to_array() + + if isinstance(sensor_object, Beam): + enu_sensor_array = sensor_object.make_beam_knots( + ref_latitude=source_map_enu.ref_latitude, + ref_longitude=source_map_enu.ref_longitude, + ref_altitude=source_map_enu.ref_altitude, + ) + relative_x = np.subtract(enu_sensor_array[:, 0][None, None, :], enu_source_array[:, 0][None, :, None]) + relative_y = np.subtract(enu_sensor_array[:, 1][None, None, :], enu_source_array[:, 1][None, :, None]) + z_sensor = enu_sensor_array[:, 2][None, None, :] + else: + enu_sensor_array = sensor_object.location.to_enu( + ref_latitude=source_map_enu.ref_latitude, + ref_longitude=source_map_enu.ref_longitude, + ref_altitude=source_map_enu.ref_altitude, + ).to_array() + relative_x = np.subtract(enu_sensor_array[:, 0][:, None, None], enu_source_array[:, 0][None, :, None]) + relative_y = np.subtract(enu_sensor_array[:, 1][:, None, None], enu_source_array[:, 1][None, :, None]) + z_sensor = enu_sensor_array[:, 2][:, None, None] + + z_source = enu_source_array[:, 2][None, :, None] + + plume_coupling = self.compute_coupling_array( + relative_x, + relative_y, + z_sensor, + z_source, + wind_speed[:, :, None], + theta[:, :, None], + wind_turbulence_horizontal[:, :, None], + wind_turbulence_vertical[:, :, None], + gas_density[:, :, None], + ) + + plume_coupling = plume_coupling.mean(axis=2) + + return plume_coupling + + @staticmethod + def compute_coverage( + couplings: np.ndarray, threshold_function: Callable = np.max, coverage_threshold: float = 6, **kwargs + ) -> Union[np.ndarray, dict]: + """Returns a logical vector that indicates which sources in the couplings are, or are not, within the coverage. + + The 'coverage' is the area inside which all sources are well covered by wind data. E.g. If wind exclusively + blows towards East, then all sources to the East of any sensor are 'invisible', and are not within the coverage. + + Couplings are returned in hr/kg. Some threshold function defines the largest allowed coupling value. This is + used to calculate estimated emission rates in kg/hr. Any emissions which are greater than the value of + 'coverage_threshold' are defined as not within the coverage. + + Args: + couplings (np.ndarray): Array of coupling values. Dimensions: n_datapoints x n_sources. + threshold_function (Callable, optional): Callable function which returns some single value that defines the + maximum or 'threshold' coupling. Examples: np.quantile(q=0.9), + np.max, np.mean. Defaults to np.max. + coverage_threshold (float, optional): The threshold value of the estimated emission rate which is + considered to be within the coverage. Defaults to 6 kg/hr. + kwargs (dict, optional): Keyword arguments required for the threshold function. + + Returns: + coverage (Union[np.ndarray, dict]): A logical array specifying which sources are within the coverage. + + """ + coupling_threshold = threshold_function(couplings, axis=0, **kwargs) + no_warning_threshold = np.where(coupling_threshold <= 1e-100, 1, coupling_threshold) + no_warning_estimated_emission_rates = np.where(coupling_threshold <= 1e-100, np.inf, 1 / no_warning_threshold) + coverage = no_warning_estimated_emission_rates < coverage_threshold + + return coverage + + +def _create_enu_sensor_array( + inclusion_idx: np.ndarray, sensor_object: Sensor, source_map_location_lla: LLA, current_source: int +): + """Helper function to create ENU sensor array when we only want ot include specific observation locations. + + This function gets called when we need to create the enu_sensor_array when we only want to include specific + observation locations. First we obtain the subset of locations from the sensor object and convert that to an array. + Given we don't know which coordinate system the sensor_object is created in, we make a copy of the original sensor + object, thereby keeping all key details of the coordinate system and repopulate the location values accordingly + through the from_array method using the subset of locations from the sensor object. Finally, we convert the subset + to ENU and return that as output. + + Args: + inclusion_idx (np.ndarray): Numpy array containing the indices of observations in the sensor_object to be used + in the Gaussian plume coupling. + sensor_object (Sensor): Sensor object to be used in the Gaussian Plume calculation. + source_map_location_lla (LLA): LLA coordinate object of the source map locations. + current_source (int): Integer index of the current source for which we want to use in the Gaussian plume + calculation. + + """ + temp_array = sensor_object.location.to_array()[inclusion_idx, :] + temp_object = deepcopy(sensor_object.location) + temp_object.from_array(array=temp_array) + enu_sensor_array = temp_object.to_enu( + ref_latitude=source_map_location_lla.latitude[current_source], + ref_longitude=source_map_location_lla.longitude[current_source], + ref_altitude=0, + ).to_array() + + return enu_sensor_array diff --git a/src/pyelq/dlm.py b/src/pyelq/dlm.py new file mode 100644 index 0000000..147e26f --- /dev/null +++ b/src/pyelq/dlm.py @@ -0,0 +1,497 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""DLM module. + +This module provides a class definition for the Dynamic Linear Models following Harrison and West +'Bayesian Forecasting and Dynamic Models' (2nd ed), Springer New York, NY, Chapter 4, https://doi.org/10.1007/b98971 + +""" +from dataclasses import dataclass, field +from typing import Tuple, Union + +import numpy as np +from scipy.stats import chi2 + + +@dataclass +class DLM: + """Defines the DLM in line with Harrison and West (2nd edition) Chapter 4. + + Attributes: + f_matrix (np.ndarray, optional): F matrix linking the state to the observables of + size [nof_state_parameters x nof_observables] + g_matrix (np.ndarray, optional): G matrix characterizing the state evolution of + size [nof_state_parameters x nof_state parameters] + v_matrix (np.ndarray, optional): V matrix being the covariance matrix of the zero mean observation noise + of size [nof_state_parameters x nof_observables] + w_matrix (np.ndarray, optional): W matrix being the covariance matrix of the zero mean system noise of + size [nof_state_parameters x nof_state parameters] + g_power (np.ndarray, optional): Attribute to store G^k, does not get initialized + + """ + + f_matrix: np.ndarray = None + g_matrix: np.ndarray = None + v_matrix: np.ndarray = None + w_matrix: np.ndarray = None + g_power: np.ndarray = field(init=False) + + @property + def nof_observables(self) -> int: + """Int: Number of observables as derived from the associated F matrix.""" + if self.f_matrix is not None and isinstance(self.f_matrix, np.ndarray): + return self.f_matrix.shape[1] + return 0 + + @property + def nof_state_parameters(self) -> int: + """Int: Number of state parameters as derived from the associated G matrix.""" + if self.g_matrix is not None and isinstance(self.g_matrix, np.ndarray): + return self.g_matrix.shape[0] + return 0 + + def calculate_g_power(self, max_power: int) -> None: + """Calculate the powers of the G matrix. + + Calculate the powers upfront, so we don't have to calculate it at every iteration. Result gets stored in the + g_power attribute of the DLM class. We use an iterative way of calculating the power to have the fewest matrix + multiplications necessary, i.e. we are not using numpy.linalg.matrix_power as that would leak to k factorial + multiplications instead of the k we have now. + + Args: + max_power (int): Maximum power to compute + + """ + if self.nof_state_parameters == 1: + self.g_power = self.g_matrix ** np.array([[range(max_power + 1)]]) + else: + self.g_power = np.zeros((self.nof_state_parameters, self.nof_state_parameters, max_power + 1)) + self.g_power[:, :, 0] = np.identity(self.nof_state_parameters) + for i in range(max_power): + self.g_power[:, :, i + 1] = self.g_power[:, :, i] @ self.g_matrix + + def polynomial_f_g(self, nof_observables: int, order: int) -> None: + """Create F and G matrices associated with a polynomial DLM. + + Following Harrison and West (Chapter 7 on polynomial DLMs) with the exception that we use order==0 for a + "constant" DLM and order==1 for linear growth DLM, order==2 for quadratic growth etc. + Hence, the definition of n-th order polynomial DLM in Harrison & West is implemented here with order=n-1 + We stack the observables in a block diagonal form. So the first #order of rows belong to the first observable, + the second #order rows belong to the second observable etc. + Results are being stored in the f_matrix and g_matrix attributes respectively + + Args: + nof_observables (int): Dimension of observation + order (int): Polynomial order (0=constant, 1=linear, 2=quadratic etc.) + + """ + e_n = np.append(1, np.zeros(order))[:, None] + self.f_matrix = np.kron(np.eye(nof_observables), e_n) + + l_n = np.triu(np.ones((order + 1, order + 1))) + self.g_matrix = np.kron(np.eye(nof_observables), l_n) + + def simulate_data(self, init_state: np.ndarray, nof_timesteps: int) -> Tuple[np.ndarray, np.ndarray]: + """Simulate data from DLM model. + + Function to simulate state evolution and corresponding observations according to model as specified through DLM + class attributes (F, G, V and W matrices) + + Args: + init_state (np.ndarray): Initial state vector to start simulating from of size [nof_state_parameters x 1] + nof_timesteps (int): Number of timesteps to simulate + + Returns: + state (np.ndarray): Simulated state vectors of size [nof_state_parameters x nof_timesteps] + obs (np.ndarray): Simulated observations of size [nof_observables x nof_timesteps] + + """ + if self.f_matrix is None or self.g_matrix is None or self.v_matrix is None or self.w_matrix is None: + raise ValueError("Please specify all matrices (F, G, V and W)") + + obs = np.empty((self.nof_observables, nof_timesteps)) + state = np.empty((self.nof_state_parameters, nof_timesteps)) + + state[:, [0]] = init_state + mean_state_noise = np.zeros(self.nof_state_parameters) + mean_observation_noise = np.zeros(self.nof_observables) + + for i in range(nof_timesteps): + if i == 0: + state[:, [i]] = ( + self.g_matrix @ init_state + + np.random.multivariate_normal(mean_state_noise, self.w_matrix, size=1).T + ) + else: + state[:, [i]] = ( + self.g_matrix @ state[:, [i - 1]] + + np.random.multivariate_normal(mean_state_noise, self.w_matrix, size=1).T + ) + obs[:, [i]] = ( + self.f_matrix.T @ state[:, [i]] + + np.random.multivariate_normal(mean_observation_noise, self.v_matrix, size=1).T + ) + + return state, obs + + def forecast_mean( + self, current_mean_state: np.ndarray, forecast_steps: Union[int, list, np.ndarray] = 1 + ) -> Tuple[np.ndarray, np.ndarray]: + """Perform forecasting of the state and observation mean parameters. + + Following Harrison and West (2nd ed) Chapter 4.4 (Forecast Distributions), corollary 4.1, assuming F and G are + constant over time. + Note that in the output the second axis of the output arrays is the forecast dimension consistent with the + forecast steps input, all forecast steps contained in the forecast steps argument are returned. + + Args: + current_mean_state (np.ndarray): Current mean parameter for the state of size [nof_state_parameters x 1] + forecast_steps (Union[int, list, np.ndarray], optional): Steps ahead to forecast + + Returns: + a_t_k (np.array): Forecast values of state mean parameter of the size + [nof_observables x size(forecast_steps)] + f_t_k (np.array): Forecast values of observation mean parameter of the size + [nof_observables x size(forecast_steps)] + + """ + min_forecast = np.amin(forecast_steps) + + if min_forecast < 1: + raise ValueError(f"Minimum forecast should be >= 1, currently it is {min_forecast}") + if isinstance(forecast_steps, int): + forecast_steps = [forecast_steps] + + a_t_k = np.hstack([self.g_power[:, :, step] @ current_mean_state for step in forecast_steps]) + f_t_k = self.f_matrix.T @ a_t_k + + return a_t_k, f_t_k + + def forecast_covariance( + self, c_matrix: np.ndarray, forecast_steps: Union[int, list, np.ndarray] = 1 + ) -> Tuple[np.ndarray, np.ndarray]: + """Perform forecasting of the state and observation covariance parameters. + + Following Harrison and West (2nd ed) Chapter 4.4 (Forecast Distributions), assuming F, G, V and W are + constant over time. + Note that in the output the third axis of the output arrays is the forecast dimension consistent with the + forecast steps input, all forecast steps contained in the forecast steps argument are returned. + sum_g_w_g is initialized as G^k @ W @ G^k for k==0, hence we initialize as W + Because of zero based indexing, in the for loop i==1 means 2-step ahead forecast which requires element + (i+1) of the g_power attribute as the third dimension serves as the actual power of the G matrix + + Args: + c_matrix (np.ndarray): Current posterior covariance estimate for the state of size + [nof_state_parameters x nof_state_parameters] + forecast_steps (Union[int, list, np.ndarray], optional): Steps ahead to forecast + + Returns: + r_t_k (np.array): Forecast values of estimated prior state covariance of the size + [nof_state_parameters x nof_state_parameters x size(forecast_steps)] + q_t_k (np.array): Forecast values of estimated observation covariance of the size + [nof_observables x nof_observables x size(forecast_steps)] + + """ + min_forecast = np.amin(forecast_steps) + max_forecast = np.amax(forecast_steps) + + if min_forecast < 1: + raise ValueError(f"Minimum forecast should be >= 1, currently it is {min_forecast}") + if isinstance(forecast_steps, int): + forecast_steps = [forecast_steps] + + sum_g_w_g = np.zeros((self.nof_state_parameters, self.nof_state_parameters, max_forecast)) + sum_g_w_g[:, :, 0] = self.w_matrix + for i in np.arange(1, max_forecast, step=1): + sum_g_w_g[:, :, i] = ( + sum_g_w_g[:, :, i - 1] + self.g_power[:, :, i] @ self.w_matrix @ self.g_power[:, :, i].T + ) + + r_t_k = np.dstack( + [ + self.g_power[:, :, step] @ c_matrix @ self.g_power[:, :, step].T + sum_g_w_g[:, :, step - 1] + for step in forecast_steps + ] + ) + q_t_k = np.dstack( + [self.f_matrix.T @ r_t_k[:, :, idx] @ self.f_matrix + self.v_matrix for idx in range(r_t_k.shape[2])] + ) + + return r_t_k, q_t_k + + def update_posterior( + self, a_t: np.ndarray, r_matrix_t: np.ndarray, q_matrix_t: np.ndarray, error: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray]: + """Update of the posterior mean and covariance of the state. + + Following Harrison and West (2nd ed) Chapter 4.4 (Forecast Distributions), assuming F, G, V and W are + constant over time. + We are using a solver instead of calculating the inverse of Q directly + Setting inf values in Q equal to 0 after the solver function for computational issues, otherwise we would + get 0 * inf = nan, where we want the result to be 0. + + Args: + a_t (np.ndarray): Current prior mean of the state of size [nof_state_parameters x 1] + r_matrix_t (np.ndarray): Current prior covariance of the state of size [nof_state_parameters x nof_state_parameters] + q_matrix_t (np.ndarray): Current one step ahead forecast covariance estimate of the observations of size [nof_observables x nof_observables] + error (np.ndarray): Error associated with the one step ahead forecast (observation - forecast) of size [nof_observables x 1] + + Returns: + m_t (np.array): Posterior mean estimate of the state of size [nof_state_parameters x 1] + c_matrix (np.array): Posterior covariance estimate of the state of size [nof_state_parameters x nof_state_parameters] + + """ + if self.nof_state_parameters == 1: + a_matrix_t = r_matrix_t @ self.f_matrix.T @ (1 / q_matrix_t) + else: + a_matrix_t = r_matrix_t @ np.linalg.solve(q_matrix_t.T, self.f_matrix.T).T + m_t = a_t + a_matrix_t @ error + q_matrix_t[np.isinf(q_matrix_t)] = 0 + c_matrix = r_matrix_t - a_matrix_t @ q_matrix_t @ a_matrix_t.T + + return m_t, c_matrix + + def dlm_full_update( + self, + new_observation: np.ndarray, + current_mean_state: np.ndarray, + current_cov_state: np.ndarray, + mode: str = "learn", + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Perform 1 step of the full DLM update. + + Following Harrison and West (2nd ed) we perform all steps to update the entire DLM model and obtain new + estimates for all parameters involved, including nan value handling. + When mode == 'learn' the parameters are updated, when mode == 'ignore' the current observation is ignored and + the posterior is set equal to the prior + When no observation is present (i.e. a nan value) we let the covariance (V matrix) for that particular sensor + such that we set the variance of that sensor for that time instance to infinity and set all cross (covariance) + terms to 0. Instead of changing this in the V matrix, we simply adjust the Q matrix accordingly. Effectively, + we set the posterior equal to the prior for that particular sensor and the uncertainty associated with the new + forecast gets increased. We set the error equal to zero for computational issues, first but finally set it equal + to nan in the end. + + Args: + new_observation (np.ndarray): New observations to use in the updating of the estimates of size [nof_observables x 1] + current_mean_state (np.ndarray): Current mean estimate for the state of size [nof_state_parameters x 1] + current_cov_state (np.ndarray): Current covariance estimate for the state of size [nof_state_parameters x nof_state_parameters] + mode (str, optional): String indicating whether the DLM needs to be updated using the new observation or not. Currently, `learn` and `ignore` are implemented + + Returns: + new_mean_state (np.ndarray): New mean estimate for the state of size [nof_state_parameters x 1] + new_cov_state (np.ndarray): New covariance estimate for the state of size [nof_state_parameters x nof_state_parameters] + error (np.ndarray): Error between the observation and the forecast (observation - forecast) of size [nof_observables x 1] + + """ + a_t, f_t = self.forecast_mean(current_mean_state, forecast_steps=1) + r_matrix_t, q_matrix_t = self.forecast_covariance(current_cov_state, forecast_steps=1) + error = new_observation - f_t + + nan_bool = np.isnan(new_observation) + nan_idx = np.argwhere(nan_bool.flatten()) + if np.any(nan_bool): + q_matrix_t[nan_idx, :, 0] -= self.v_matrix[nan_idx, :] + q_matrix_t[:, nan_idx, 0] -= self.v_matrix[:, nan_idx] + q_matrix_t[nan_idx, nan_idx, 0] = np.inf + error[nan_idx] = 0 + + if mode == "learn": + new_mean_state, new_cov_state = self.update_posterior(a_t, r_matrix_t[:, :, 0], q_matrix_t[:, :, 0], error) + elif mode == "ignore": + new_mean_state = a_t + new_cov_state = r_matrix_t + else: + raise TypeError(f"Mode {mode} not implemented") + + error[nan_idx] = np.nan + + return new_mean_state, new_cov_state, error + + def calculate_mahalanobis_distance( + self, + new_observations: np.ndarray, + current_mean_state: np.ndarray, + current_cov_state: np.ndarray, + forecast_steps: int = 1, + return_statistics=False, + ) -> Union[Tuple[float, np.ndarray], Tuple[float, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]]: + """Calculate the mahalanobis distance. + + Calculating the Mahalanobis distance which is defined as error.T @ covariance^(-1) @ error + The error is flatted in row-major (C-style) This returns the stacked rows, which in our case is the errors per + observation parameter stacked and this is exactly what we want: array([[1, 2], [3, 4]]).reshape((-1, 1), + order='C') becomes column array([1, 2 3, 4]) + Using a solve method instead of calculating inverse matrices directly + When calculating mhd_per_obs_param we use the partial result and reshape the temporary output such that we can + sum the correct elements associated with the same observable together + When no observation is present (i.e. a nan value) we let the covariance (V matrix) for that particular sensor + such that we set the variance of that sensor for that time instance to infinity and set all cross (covariance) + terms to 0. Instead of changing this in the V matrix, we simply adjust the Q matrix accordingly. Effectively, + we set the posterior equal to the prior for that particular sensor and the uncertainty associated with the new + forecast gets increased. We set the error equal to zero for computational issues, but this does decrease the + number of degrees of freedom for that particular Mahalanobis distance calculation, basically decreasing the + Mahalanobis distance. We allow the option to output the number of degrees of freedom and chi2 statistic which + allows to take this decrease in degrees of freedom into account. + + Args: + new_observations (np.ndarray): New observations to use in the calculation of the mahalanobis distance of + size [nof_observables x forecast_steps] + current_mean_state (np.ndarray): Current mean estimate for the state of size [nof_state_parameters x 1] + current_cov_state (np.ndarray): Current covariance estimate for the state of size + [nof_state_parameters x nof_state_parameters] + forecast_steps (int, optional): Number of steps ahead to forecast and use in the mahalanobis distance + calculation + return_statistics (bool, optional): Boolean to return used degrees of freedom and chi2 statistic + Returns: + mhd_overall (float): mahalanobis distance over all observables + mhd_per_obs_param (np.ndarray): mahalanobis distance per observation parameter of size [nof_observables, 1] + + """ + if forecast_steps <= 0: + raise AttributeError("Forecast steps should be a positive integer") + + if new_observations.size / self.nof_observables != forecast_steps: + raise AttributeError("Sizes of new observations and forecast steps are not aligning") + + _, f_t_k = self.forecast_mean(current_mean_state, forecast_steps=np.array(range(forecast_steps)) + 1) + + if new_observations.shape != f_t_k.shape: + raise AttributeError("Dimensions of new_observations are not aligning with dimensions of forecast") + + error = np.subtract(new_observations, f_t_k).reshape((-1, 1), order="C") + + r_t_k, q_t_k = self.forecast_covariance(current_cov_state, forecast_steps=np.array(range(forecast_steps)) + 1) + + nan_bool = np.isnan(new_observations) + if np.any(nan_bool): + nan_idx = np.argwhere(nan_bool) + for value in nan_idx: + q_t_k[value[0], :, value[1]] -= self.v_matrix[value[0], :] + q_t_k[:, value[0], value[1]] -= self.v_matrix[:, value[0]] + + q_t_k[nan_idx[:, 0], nan_idx[:, 0], nan_idx[:, 1]] = np.inf + error[nan_bool.reshape((-1, 1), order="C")] = 0 + + if forecast_steps > 1: + full_covariance = self.create_full_covariance(r_t_k=r_t_k, q_t_k=q_t_k, forecast_steps=forecast_steps) + else: + full_covariance = q_t_k[:, :, 0] + + mhd_overall = mahalanobis_distance(error=error, cov_matrix=full_covariance) + mhd_per_obs_param = np.empty((self.nof_observables, 1)) + + for i_obs in range(self.nof_observables): + ind_hrz = np.array(range(forecast_steps)) + i_obs * forecast_steps + mhd_per_obs_param[i_obs] = mahalanobis_distance( + error=error[ind_hrz], cov_matrix=full_covariance[np.ix_(ind_hrz, ind_hrz)] + ) + + if self.nof_observables == 1: + mhd_per_obs_param = mhd_per_obs_param.item() + + if return_statistics: + dof_per_obs_param = (nan_bool.shape[1] - np.count_nonzero(nan_bool, axis=1)).reshape( + self.nof_observables, 1 + ) + dof_overall = dof_per_obs_param.sum() + chi2_cdf_per_obs_param = chi2.cdf(mhd_per_obs_param.flatten(), dof_per_obs_param.flatten()).reshape( + self.nof_observables, 1 + ) + chi2_cdf_overall = chi2.cdf(mhd_overall, dof_overall) + + return ( + mhd_overall, + mhd_per_obs_param, + dof_overall, + dof_per_obs_param, + chi2_cdf_overall, + chi2_cdf_per_obs_param, + ) + + return mhd_overall, mhd_per_obs_param + + def create_full_covariance(self, r_t_k: np.ndarray, q_t_k: np.ndarray, forecast_steps: int) -> np.ndarray: + """Helper function to construct the full covariance matrix. + + Following Harrison and West (2nd ed) Chapter 4.4 (Forecast distributions) Theorem 4.2 and corollary 4.2 + we construct the full covariance matrix. This full covariance matrix is the covariance matrix of all forecasted + observations with respect to each other. Hence, it's COV[Y_{t+k}, Y_{t+j}] with j and k 1<=j,k<=forecast steps + input argument and Y_{t+k} the k step ahead forecast of the observation at time t + + The matrix is build up using the different blocks for different covariances between observations i and j. + The diagonals of each block are calculated first as q_t_k[i, j, :]. + Next the i, j-th (lower triangular) entry of the m, n-th block is calculated as + (F.T @ G^(i-j) r_t_k[:, :, j] @ F)[i, j] + Next each upper triangular part of each lower diagonal block is calculated and next the entire upper triangular + part of the full matrix is calculated + + Args: + r_t_k (np.array): Forecast values of estimated prior state covariance of the size + [nof_state_parameters x nof_state_parameters x forecast_steps] + q_t_k (np.array): Forecast values of estimated observation covariance of the size + [nof_observables x nof_observables x forecast_steps] + forecast_steps (int): Maximum number of steps ahead to forecast and use all of those in the mahalanobis + distance calculation + + Returns: + full_covariance (np.array): Full covariance matrix of all forecasted observations with respect to each other + having size [(nof_observables * forecast_steps) X (nof_observables * forecast_steps)] + + """ + full_covariance = np.zeros((forecast_steps * self.nof_observables, forecast_steps * self.nof_observables)) + base_idx = np.array(range(forecast_steps)) + for block_i in range(self.nof_observables): + for block_j in range(block_i + 1): + block_rows = base_idx + block_i * forecast_steps + block_cols = base_idx + block_j * forecast_steps + full_covariance[block_rows, block_cols] = q_t_k[block_i, block_j, :] + + temp_idx = np.array(range(self.nof_observables)) + for sub_i in np.arange(start=1, stop=forecast_steps, step=1): + sub_row = temp_idx * forecast_steps + sub_i + for sub_j in range(sub_i): + sub_col = temp_idx * forecast_steps + sub_j + sub_idx = np.ix_(sub_row, sub_col) + full_covariance[sub_idx] = ( + self.f_matrix.T @ self.g_power[:, :, sub_i - sub_j] @ r_t_k[:, :, sub_j] @ self.f_matrix + ) + + for block_i in range(self.nof_observables): + for block_j in range(block_i): + block_rows = base_idx + block_i * forecast_steps + block_cols = base_idx + block_j * forecast_steps + block_idx = np.ix_(block_rows, block_cols) + full_covariance[block_idx] = full_covariance[block_idx] + np.tril(full_covariance[block_idx], k=-1).T + + full_covariance = np.tril(full_covariance) + np.tril(full_covariance, k=-1).T + + return full_covariance + + +def mahalanobis_distance(error: np.ndarray, cov_matrix: np.ndarray) -> float: + """Calculate Mahalanobis distance for multivariate observations. + + m = e.T @ inv(cov) @ e + Sometimes the solution does not exist when np.inf value is present in cov_matrix (computational limitations?) + Hence, we set it to a large value instead + + Args: + error (np.ndarray): n x p observation error + cov_matrix (np.ndarray): p x p covariance matrix + + Returns: + np.ndarray: n x 1 mahalanobis distance score for each observation + + """ + if cov_matrix.size == 1: + return error.item() ** 2 / cov_matrix.item() + + partial_solution = np.linalg.solve(cov_matrix, error) + if np.any(np.isnan(partial_solution)): + cov_matrix[np.isinf(cov_matrix)] = 1e100 + partial_solution = np.linalg.solve(cov_matrix, error) + + return np.sum(error * partial_solution, axis=0).item() diff --git a/src/pyelq/gas_species.py b/src/pyelq/gas_species.py new file mode 100644 index 0000000..2f7deb6 --- /dev/null +++ b/src/pyelq/gas_species.py @@ -0,0 +1,232 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Gas Species module. + +The superclass for the Gas species classes. It contains a few gas species with its properties and functionality to +calculate the density of the gas and do emission rate conversions from m^3/s to kg/hr and back + +""" +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Union + +import numpy as np + + +@dataclass +class GasSpecies(ABC): + """Defines the properties of a particular gas species. + + Attributes: + global_background (float, optional): Global background concentration [ppm] + half_life (float, optional): Half life of gas [hr] + __molar_gas_constant (float): R, molar gas constant [JK^-1mol^-1] + + """ + + global_background: float = field(init=False) + half_life: float = field(init=False) + __molar_gas_constant: float = 8.31446261815324 + + @property + @abstractmethod + def name(self) -> str: + """Str: Name of gas.""" + + @property + @abstractmethod + def molar_mass(self) -> float: + """Float: Molar Mass [g/mol].""" + + @property + @abstractmethod + def formula(self) -> str: + """Str: Chemical formula of gas.""" + + def gas_density( + self, temperature: Union[np.ndarray, float] = 273.15, pressure: Union[np.ndarray, float] = 101.325 + ) -> np.ndarray: + """Calculating the density of the gas. + + Calculating the density of the gas given temperature and pressure if temperature and pressure are not provided + we use Standard Temperature and Pressure (STP). + + https://en.wikipedia.org/wiki/Ideal_gas_law + + Args: + temperature (Union[np.ndarray, float], optional): Array of temperatures [Kelvin], + defaults to 273.15 [K] + pressure (Union[np.ndarray, float], optional): Array of pressures [kPa], + defaults to 101.325 [kPa] + + Returns: + density (np.ndarray): Array of gas density values [kg/m^3] + + """ + specific_gas_constant = self.__molar_gas_constant / self.molar_mass + density = np.divide(pressure, (temperature * specific_gas_constant)) + return density + + def convert_emission_m3s_to_kghr( + self, + emission_m3s: Union[np.ndarray, float], + temperature: Union[np.ndarray, float] = 273.15, + pressure: Union[np.ndarray, float] = 101.325, + ) -> np.ndarray: + """Converting emission rates from m^3/s to kg/hr given temperature and pressure. + + If temperature and pressure are not provided we use Standard Temperature and Pressure (STP). + + Args: + emission_m3s (Union[np.ndarray, float]): Array of emission rates [m^3/s] + temperature (Union[np.ndarray, float], optional): Array of temperatures [Kelvin], + defaults to 273.15 [K] + pressure (Union[np.ndarray, float], optional): Array of pressures [kPa], + defaults to 101.325 [kPa] + + Returns: + emission_kghr (np.ndarray): [p x 1] array of emission rates in [kg/hr] + + """ + density = self.gas_density(temperature=temperature, pressure=pressure) + emission_kghr = np.multiply(emission_m3s, density) * 3600 + return emission_kghr + + def convert_emission_kghr_to_m3s( + self, + emission_kghr: Union[np.ndarray, float], + temperature: Union[np.ndarray, float] = 273.15, + pressure: Union[np.ndarray, float] = 101.325, + ) -> np.ndarray: + """Converting emission rates from kg/hr to m^3/s given temperature and pressure. + + If temperature and pressure are not provided we use Standard Temperature and Pressure (STP). + + Args: + emission_kghr (np.ndarray): Array of emission rates in [kg/hr] + temperature (Union[np.ndarray, float], optional): Array of temperatures [Kelvin], + defaults to 273.15 [K] + pressure (Union[np.ndarray, float], optional): Array of pressures [kPa], + defaults to 101.325 [kPa] + + Returns: + emission_m3s (Union[np.ndarray, float]): Array of emission rates [m^3/s] + + """ + density = self.gas_density(temperature=temperature, pressure=pressure) + emission_m3s = np.divide(emission_kghr, density) / 3600 + return emission_m3s + + +@dataclass +class CH4(GasSpecies): + """Defines the properties of CH4.""" + + @property + def name(self): + """Str: Name of gas.""" + return "Methane" + + @property + def molar_mass(self): + """Float: Molar Mass [g/mol].""" + return 16.04246 + + @property + def formula(self): + """Str: Chemical formula of gas.""" + return "CH4" + + global_background = 1.85 + + +@dataclass +class C2H6(GasSpecies): + """Defines the properties of C2H6.""" + + @property + def name(self): + """Str: Name of gas.""" + return "Ethane" + + @property + def molar_mass(self): + """Float: Molar Mass [g/mol].""" + return 30.06904 + + @property + def formula(self): + """Str: Chemical formula of gas.""" + return "C2H6" + + global_background = 5e-4 + + +@dataclass +class C3H8(GasSpecies): + """Defines the properties of C3H8.""" + + @property + def name(self): + """Str: Name of gas.""" + return "Propane" + + @property + def molar_mass(self): + """Float: Molar Mass [g/mol].""" + return 46.0055 + + @property + def formula(self): + """Str: Chemical formula of gas.""" + return "C3H8" + + global_background = 5e-4 + + +@dataclass +class CO2(GasSpecies): + """Defines the properties of CO2.""" + + @property + def name(self): + """Str: Name of gas.""" + return "Carbon Dioxide" + + @property + def molar_mass(self): + """Float: Molar Mass [g/mol].""" + return 44.0095 + + @property + def formula(self): + """Str: Chemical formula of gas.""" + return "CO2" + + global_background = 400 + + +@dataclass +class NO2(GasSpecies): + """Defines the properties of NO2.""" + + @property + def name(self): + """Str: Name of gas.""" + return "Nitrogen Dioxide" + + @property + def molar_mass(self): + """Float: Molar Mass [g/mol].""" + return 46.0055 + + @property + def formula(self): + """Str: Chemical formula of gas.""" + return "NO2" + + global_background = 0 + half_life = 12 diff --git a/src/pyelq/meteorology.py b/src/pyelq/meteorology.py new file mode 100644 index 0000000..f1cf976 --- /dev/null +++ b/src/pyelq/meteorology.py @@ -0,0 +1,303 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Meteorology module. + +The superclass for the meteorology classes + +""" +import warnings +from dataclasses import dataclass, field + +import numpy as np +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +from pandas.arrays import DatetimeArray +from scipy.stats import circstd + +from pyelq.coordinate_system import Coordinate +from pyelq.sensor.sensor import SensorGroup + + +@dataclass +class Meteorology: + """Defines the properties and methods of the meteorology class. + + Sizes of all attributes should match. + + Attributes: + wind_speed (np.ndarray, optional): Wind speed [m/s] + wind_direction (np.ndarray, optional): Meteorological wind direction (from) [deg], see + https://confluence.ecmwf.int/pages/viewpage.action?pageId=133262398 + u_component (np.ndarray, optional): u component of wind [m/s] in the easterly direction + v_component (np.ndarray, optional): v component of wind [m/s] in the northerly direction + w_component (np.ndarray, optional): w component of wind [m/s] in the vertical direction + wind_turbulence_horizontal (np.ndarray, optional): Parameter of the wind stability in + horizontal direction [deg] + wind_turbulence_vertical (np.ndarray, optional): Parameter of the wind stability in + vertical direction [deg] + pressure (np.ndarray, optional): Pressure [kPa] + temperature (np.ndarray, optional): Temperature [K] + atmospheric_boundary_layer (np.ndarray, optional): Atmospheric boundary layer [m] + surface_albedo (np.ndarray, optional): Surface reflectance parameter [unitless] + time (pandas.arrays.DatetimeArray, optional): Array containing time values associated with the + meteorological observation + location: (Coordinate, optional): Coordinate object specifying the meteorological observation locations + label (str, optional): String label for object + + """ + + wind_speed: np.ndarray = field(init=False, default=None) + wind_direction: np.ndarray = field(init=False, default=None) + u_component: np.ndarray = field(init=False, default=None) + v_component: np.ndarray = field(init=False, default=None) + w_component: np.ndarray = field(init=False, default=None) + wind_turbulence_horizontal: np.ndarray = field(init=False, default=None) + wind_turbulence_vertical: np.ndarray = field(init=False, default=None) + pressure: np.ndarray = field(init=False, default=None) + temperature: np.ndarray = field(init=False, default=None) + atmospheric_boundary_layer: np.ndarray = field(init=False, default=None) + surface_albedo: np.ndarray = field(init=False, default=None) + time: DatetimeArray = field(init=False, default=None) + location: Coordinate = field(init=False, default=None) + label: str = field(init=False) + + @property + def nof_observations(self) -> int: + """Number of observations.""" + if self.location is None: + return 0 + return self.location.nof_observations + + def calculate_wind_speed_from_uv(self) -> None: + """Calculate wind speed. + + Calculate the wind speed from u and v components. Result gets stored in the wind_speed attribute + + """ + self.wind_speed = np.sqrt(self.u_component**2 + self.v_component**2) + + def calculate_wind_direction_from_uv(self) -> None: + """Calculate wind direction: meteorological convention 0 is wind from the North. + + Calculate the wind direction from u and v components. Result gets stored in the wind_direction attribute + See: https://confluence.ecmwf.int/pages/viewpage.action?pageId=133262398 + + """ + self.wind_direction = (270 - 180 / np.pi * np.arctan2(self.v_component, self.u_component)) % 360 + + def calculate_uv_from_wind_speed_direction(self) -> None: + """Calculate u and v components from wind speed and direction. + + Results get stored in the u_component and v_component attributes. + See: https://confluence.ecmwf.int/pages/viewpage.action?pageId=133262398 + + """ + self.u_component = -1 * self.wind_speed * np.sin(self.wind_direction * (np.pi / 180)) + self.v_component = -1 * self.wind_speed * np.cos(self.wind_direction * (np.pi / 180)) + + def calculate_wind_turbulence_horizontal(self, window: str) -> None: + """Calculate the horizontal wind turbulence values from the wind direction attribute. + + Wind turbulence values are calculated as the circular standard deviation based on a rolling window. + Outputted values are calculated at the center of the window and at least 3 observations are required in a + window for the calculation. If the window contains less values the result will be np.nan. + The result of the calculation will be stored as the wind_turbulence_horizontal attribute. + + Args: + window (str): The size of the window in which values are aggregated specified as an offset alias: + https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases + + """ + data_series = pd.Series(data=self.wind_direction, index=self.time) + aggregated_data = data_series.rolling(window=window, center=True, min_periods=3).apply( + circstd, kwargs={"low": 0, "high": 360} + ) + self.wind_turbulence_horizontal = aggregated_data.values + + def plot_polar_hist(self, nof_sectors: int = 16, nof_divisions: int = 5, template: object = None) -> go.Figure(): + """Plots a histogram of wind speed and wind direction in polar Coordinates. + + Args: + nof_sectors (int, optional): The number of wind direction sectors into which the data is binned. + nof_divisions (int, optional): The number of wind speed divisions into which the data is binned. + template (go.update_layout): A layout template which can be applied to the plot. Defaults to None. + + Returns: + fig (go.Figure): A plotly go figure containing the trace of the rose plot. + + """ + sector_half_width = 0.5 * (360 / nof_sectors) + wind_direction_bin_edges = np.linspace(-sector_half_width, 360 - sector_half_width, nof_sectors + 1) + wind_speed_bin_edges = np.linspace(np.min(self.wind_speed), np.max(self.wind_speed), nof_divisions) + + dataframe = pd.DataFrame() + dataframe["wind_direction"] = [x - 360 if x > (360 - sector_half_width) else x for x in self.wind_direction] + dataframe["wind_speed"] = self.wind_speed + + dataframe["sector"] = pd.cut(dataframe["wind_direction"], wind_direction_bin_edges, include_lowest=True) + if np.allclose(wind_speed_bin_edges[0], wind_speed_bin_edges): + dataframe["speed"] = wind_speed_bin_edges[0] + else: + dataframe["speed"] = pd.cut(dataframe["wind_speed"], wind_speed_bin_edges, include_lowest=True) + + dataframe = dataframe.groupby(["sector", "speed"], observed=False).count() + dataframe = dataframe.rename(columns={"wind_speed": "count"}).drop(columns=["wind_direction"]) + dataframe["%"] = dataframe["count"] / dataframe["count"].sum() + + dataframe = dataframe.reset_index() + dataframe["theta"] = dataframe.apply(lambda x: x["sector"].mid, axis=1) + + fig = px.bar_polar( + dataframe, + r="%", + theta="theta", + color="speed", + direction="clockwise", + start_angle=90, + color_discrete_sequence=px.colors.sequential.Sunset_r, + ) + + ticktext = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"] + polar_dict = { + "radialaxis": {"tickangle": 90}, + "radialaxis_angle": 90, + "angularaxis": { + "tickmode": "array", + "ticktext": ticktext, + "tickvals": list(np.linspace(0, 360 - (360 / 8), 8)), + }, + } + fig.add_annotation( + x=1, + y=1, + yref="paper", + xref="paper", + xanchor="right", + yanchor="top", + align="left", + font={"size": 18, "color": "#000000"}, + showarrow=False, + borderwidth=2, + borderpad=10, + bgcolor="#ffffff", + bordercolor="#000000", + opacity=0.8, + text="Radial Axis: Proportion
of wind measurements
in a given direction.", + ) + + fig.update_layout(polar=polar_dict) + fig.update_layout(template=template) + fig.update_layout(title="Distribution of Wind Speeds and Directions") + + return fig + + def plot_polar_scatter(self, fig, sensor_object: SensorGroup, template: object = None) -> go.Figure(): + """Plots a scatter plot of concentration with respect to wind direction in polar Coordinates. + + Args: + fig (go.Figure): A plotly figure onto which traces can be drawn. + sensor_object (SensorGroup): SensorGroup object which contains the concentration information + template (go.update_layout): A layout template which can be applied to the plot. Defaults to None. + + Returns: + fig (go.Figure): A plotly go figure containing the trace of the rose plot. + + """ + max_concentration = 0 + + for i, (sensor_key, sensor) in enumerate(sensor_object.items()): + if sensor.concentration.shape != self.wind_direction.shape: + warnings.warn( + f"Concentration values for sensor {sensor_key} are of shape " + + f"{sensor.concentration.shape}, but self.wind_direction has shape " + + f"{self.wind_direction.shape}. It will not be plotted on the polar scatter plot." + ) + else: + theta = self.wind_direction + + fig.add_trace( + go.Scatterpolar( + r=sensor.concentration, + theta=theta, + mode="markers", + name=sensor_key, + marker={"color": sensor_object.color_map[i]}, + ) + ) + + max_concentration = np.maximum(np.nanmax(sensor.concentration), max_concentration) + + ticktext = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"] + polar_dict = { + "radialaxis": {"tickangle": 0, "range": [0.0, 1.01 * max_concentration]}, + "radialaxis_angle": 0, + "angularaxis": { + "tickmode": "array", + "ticktext": ticktext, + "direction": "clockwise", + "rotation": 90, + "tickvals": list(np.linspace(0, 360 - (360 / 8), 8)), + }, + } + + fig.add_annotation( + x=1, + y=1, + yref="paper", + xref="paper", + xanchor="right", + yanchor="top", + align="left", + font={"size": 18, "color": "#000000"}, + showarrow=False, + borderwidth=2, + borderpad=10, + bgcolor="#ffffff", + bordercolor="#000000", + opacity=0.8, + text="Radial Axis: Wind
speed in m/s.", + ) + + fig.update_layout(polar=polar_dict) + fig.update_layout(template=template) + fig.update_layout(title="Measured Concentration against Wind Direction.") + + return fig + + +@dataclass +class MeteorologyGroup(dict): + """A dictionary containing multiple Meteorology objects. + + This class is used when we want to define/store a collection of meteorology objects consistent with an associated + SensorGroup which can then be used in further processing, e.g. Gaussian plume coupling computation. + + """ + + @property + def nof_objects(self) -> int: + """Int: Number of meteorology objects contained in the MeteorologyGroup.""" + return len(self) + + def add_object(self, met_object: Meteorology): + """Add an object to the MeteorologyGroup.""" + self[met_object.label] = met_object + + def calculate_uv_from_wind_speed_direction(self): + """Calculate the u and v components for each member of the group.""" + for met in self.values(): + met.calculate_uv_from_wind_speed_direction() + + def calculate_wind_direction_from_uv(self): + """Calculate wind direction from the u and v components for each member of the group.""" + for met in self.values(): + met.calculate_wind_direction_from_uv() + + def calculate_wind_speed_from_uv(self): + """Calculate wind speed from the u and v components for each member of the group.""" + for met in self.values(): + met.calculate_wind_speed_from_uv() diff --git a/src/pyelq/model.py b/src/pyelq/model.py new file mode 100644 index 0000000..c6cf5c9 --- /dev/null +++ b/src/pyelq/model.py @@ -0,0 +1,207 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""ELQModel module. + +This module provides a class definition main functionalities of the codebase, providing the interface with the openMCMC +repo and defining some plotting wrappers. + +""" +import warnings +from dataclasses import dataclass, field +from typing import Union + +import numpy as np +from openmcmc import parameter +from openmcmc.distribution import location_scale +from openmcmc.mcmc import MCMC +from openmcmc.model import Model + +from pyelq.component.background import Background, SpatioTemporalBackground +from pyelq.component.error_model import BySensor, ErrorModel +from pyelq.component.offset import PerSensor +from pyelq.component.source_model import Normal, SourceModel +from pyelq.gas_species import GasSpecies +from pyelq.meteorology import Meteorology, MeteorologyGroup +from pyelq.plotting.plot import Plot +from pyelq.sensor.sensor import SensorGroup + + +@dataclass +class ELQModel: + """Class for setting up, running, and post-processing the full ELQModel analysis. + + Attributes: + form (dict): dictionary detailing the form of the predictor for the concentration data. For details of the + required specification, see parameter.LinearCombinationWithTransform() in the openMCMC repo. + transform (dict): dictionary detailing transformations applied to the model components. For details of the + required specification, see parameter.LinearCombinationWithTransform() in the openMCMC repo. + model (Model): full model specification for the analysis, constructed in self.to_mcmc(). + mcmc (MCMC): MCMC object containing model and sampler specification for the problem. Constructed from the + other components in self.to_mcmc(). + n_iter (int): number of MCMC iterations to be run. + fitted_values (np.ndarray): samples of fitted values (i.e. model predictions for the data) generated during the + MCMC sampler. Attached in self.from_mcmc(). + + """ + + form: dict = field(init=False) + transform: dict = field(init=False) + model: Model = field(init=False) + mcmc: MCMC = field(init=False) + n_iter: int = 1000 + fitted_values: np.ndarray = field(init=False) + + def __init__( + self, + sensor_object: SensorGroup, + meteorology: Union[Meteorology, MeteorologyGroup], + gas_species: GasSpecies, + background: Background = SpatioTemporalBackground(), + source_model: SourceModel = Normal(), + error_model: ErrorModel = BySensor(), + offset_model: PerSensor = None, + ): + """Initialise the ELQModel model. + + Model form is as follows: + y = A*s + b + d + e + where: + - y is the vector of observed concentration data (extracted from the sensor object). + - A*s is the source contribution (from the source model and dispersion model). + - b is from the background model. + - d is from the offset model. + - e is residual error term and var(e) comes from the error precision model. + + Args: + sensor_object (SensorGroup): sensor data. + meteorology (Union[Meteorology, MeteorologyGroup]): meteorology data. + gas_species (GasSpecies): gas species object. + background (Background): background model specification. Defaults to SpatioTemporalBackground(). + source_model (SourceModel): source model specification. Defaults to Normal(). + error_model (Precision): measurement precision model specification. Defaults to BySensor(). + offset_model (PerSensor): offset model specification. Defaults to None. + + """ + self.sensor_object = sensor_object + self.meteorology = meteorology + self.gas_species = gas_species + self.components = { + "background": background, + "source": source_model, + "error_model": error_model, + "offset": offset_model, + } + if error_model is None: + self.components["error_model"] = BySensor() + warnings.warn("None is not an allowed type for error_model: resetting to default BySensor model.") + for key in list(self.components.keys()): + if self.components[key] is None: + self.components.pop(key) + + def initialise(self): + """Take data inputs and extract relevant properties.""" + self.form = {} + self.transform = {} + component_keys = list(self.components.keys()) + if "background" in component_keys: + self.form["bg"] = "B_bg" + self.transform["bg"] = False + if "source" in component_keys: + self.transform["s"] = False + self.form["s"] = "A" + if "offset" in component_keys: + self.form["d"] = "B_d" + self.transform["d"] = False + for key in component_keys: + self.components[key].initialise(self.sensor_object, self.meteorology, self.gas_species) + + def to_mcmc(self): + """Convert the ELQModel specification into an MCMC solver object that can be run. + + Executing the following steps: + - Initialise the model object with the data likelihood (response distribution for y), and add all the + associated prior distributions, as specified by the model components. + - Initialise the state dictionary with the observed sensor data, and add parameters associated with all + the associated prior distributions, as specified by the model components. + - Initialise the MCMC sampler objects associated with each of the model components. + - Create the MCMC solver object, using all of the above information. + + """ + response_precision = self.components["error_model"].precision_parameter + model = [ + location_scale.Normal( + "y", + mean=parameter.LinearCombinationWithTransform(self.form, self.transform), + precision=response_precision, + ) + ] + + initial_state = {"y": self.sensor_object.concentration} + + for component in self.components.values(): + model = component.make_model(model) + initial_state = component.make_state(initial_state) + + self.model = Model(model, response={"y": "mean"}) + + sampler_list = [] + for component in self.components.values(): + sampler_list = component.make_sampler(self.model, sampler_list) + + self.mcmc = MCMC(initial_state, sampler_list, self.model, n_burn=0, n_iter=self.n_iter) + + def run_mcmc(self): + """Run the mcmc function.""" + self.mcmc.run_mcmc() + + def from_mcmc(self): + """Extract information from MCMC solver class once its has run. + + Performs two operations: + - For each of the components of the model: extracts the related sampled parameter values and attaches these + to the component class. + - For all keys in the mcmc.store dictionary: extracts the sampled parameter values from self.mcmc.store and + puts them into the equivalent fields in the state + + """ + state = self.mcmc.state + for component in self.components.values(): + component.from_mcmc(self.mcmc.store) + for key in self.mcmc.store: + state[key] = self.mcmc.store[key] + + def plot_log_posterior(self, burn_in_value: int, plot: Plot = Plot()) -> Plot(): + """Plots the trace of the log posterior over the iterations of the MCMC. + + Args: + burn_in_value (int): Burn in value to show in plot. + plot (Plot, optional): Plot object to which this figure will be added in the figure dictionary + + Returns: + plot (Plot): Plot object to which this figure is added in the figure dictionary with + key 'log_posterior_plot' + + """ + plot.plot_single_trace(object_to_plot=self.mcmc, burn_in=burn_in_value) + return plot + + def plot_fitted_values(self, plot: Plot = Plot()) -> Plot: + """Plot the fitted values from the mcmc object against time, also shows the estimated background when possible. + + Based on the inputs it plots the results of the mcmc analysis, being the fitted values of the concentration + measurements together with the 10th and 90th quantile lines to show the goodness of fit of the estimates. + + Args: + plot (Plot, optional): Plot object to which this figure will be added in the figure dictionary + + Returns: + plot (Plot): Plot object to which this figure is added in the figure dictionary with key 'fitted_values' + + """ + plot.plot_fitted_values_per_sensor( + mcmc_object=self.mcmc, sensor_object=self.sensor_object, background_model=self.components["background"] + ) + return plot diff --git a/src/pyelq/plotting/__init__.py b/src/pyelq/plotting/__init__.py new file mode 100644 index 0000000..639769e --- /dev/null +++ b/src/pyelq/plotting/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Plotting Module.""" +__all__ = ["plot"] diff --git a/src/pyelq/plotting/plot.py b/src/pyelq/plotting/plot.py new file mode 100644 index 0000000..7deaaa3 --- /dev/null +++ b/src/pyelq/plotting/plot.py @@ -0,0 +1,1374 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Plot module. + +Large module containing all the plotting code used to create various plots. Contains helper functions and the Plot class +definition. + +""" +import warnings +from copy import deepcopy +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Callable, Type, Union + +import numpy as np +import pandas as pd +import plotly.figure_factory as ff +import plotly.graph_objects as go +from geojson import Feature, FeatureCollection +from openmcmc.mcmc import MCMC +from scipy.ndimage import label +from shapely import geometry + +from pyelq.component.background import TemporalBackground +from pyelq.component.error_model import ErrorModel +from pyelq.component.offset import PerSensor +from pyelq.component.source_model import SlabAndSpike, SourceModel +from pyelq.coordinate_system import ENU, LLA +from pyelq.dispersion_model.gaussian_plume import GaussianPlume +from pyelq.sensor.sensor import Sensor, SensorGroup + +if TYPE_CHECKING: + from pyelq.model import ELQModel + + +def lighter_rgb(rbg_string: str) -> str: + """Takes in an RGB string and returns a lighter version of this colour. + + The colour is made lighter by increasing the magnitude of the RGB values by half of the difference between the + original value and the number 255. + + Arguments: + rbg_string (str): An RGB string. + + """ + rbg_string = rbg_string[4:-1] + rbg_string = rbg_string.replace(" ", "") + colors = rbg_string.split(",") + colors_out = [np.nan, np.nan, np.nan] + + for i, color in enumerate(colors): + color = int(color) + color = min(int(round(color + ((255 - color) * 0.5))), 255) + colors_out[i] = color + + return f"rgb({colors_out[0]}, {colors_out[1]}, {colors_out[2]})" + + +def plot_quantiles_from_array( + fig: go.Figure, + x_values: Union[np.ndarray, pd.arrays.DatetimeArray], + y_values: np.ndarray, + quantiles: Union[tuple, list, np.ndarray], + color: str, + name: str = None, +) -> go.Figure: + """Plot quantiles over y-values against x-values. + + Assuming x-values have size N and y-values have size [N x M] where the second dimension is the dimension to + calculate the quantiles over. + + Will plot the median of the y-values as a solid line and a filled area between the lower and upper specified + quantile. + + Args: + fig (go.Figure): Plotly figure to add the traces on. + x_values (Union[np.ndarray, pd.arrays.DatetimeArray]): Numpy array containing the x-values to plot. + y_values (np.ndarray): Numpy array containing the y-values to calculate the quantiles for. + quantiles (Union[tuple, list, np.ndarray]): Values of upper and lower quantile to plot in range (0-100) + color (str): RGB string specifying color for quantile fill plot. + name (str, optional): Optional string name to show in the legend. + + Returns: + fig (go.Figure): Plotly figure with the quantile filled traces and median trace added on it. + + """ + color_fill = f"rgba{color[3:-1]}, 0.3)" + + median_trace = go.Scatter( + x=x_values, + y=np.median(y_values, axis=1), + mode="lines", + line={"width": 3, "color": color}, + name=f"Median for {name}", + legendgroup=name, + showlegend=False, + ) + + lower_quantile_trace = go.Scatter( + x=x_values, + y=np.quantile(y_values, axis=1, q=quantiles[0] / 100), + mode="lines", + line={"width": 0, "color": color_fill}, + name=f"{quantiles[0]}% quantile", + legendgroup=name, + showlegend=False, + ) + + upper_quantile_trace = go.Scatter( + x=x_values, + y=np.quantile(y_values, axis=1, q=quantiles[1] / 100), + fill="tonexty", + fillcolor=color_fill, + mode="lines", + line={"width": 0, "color": color_fill}, + name=f"{quantiles[1]}% quantile", + legendgroup=name, + showlegend=False, + ) + + fig.add_trace(median_trace) + fig.add_trace(lower_quantile_trace) + fig.add_trace(upper_quantile_trace) + + return fig + + +def create_trace_specifics(object_to_plot: Union[Type[SlabAndSpike], SourceModel, MCMC], **kwargs) -> dict: + """Specification of different traces of single variables. + + Provides all details for plots where we want to plot a single variable as a line plot. Based on the object_to_plot + we select the correct plot to show. + + Args: + object_to_plot (Union[Type[SlabAndSpike], SourceModel, MCMC]): Object which we want to plot a single + variable from + **kwargs (dict): Additional key word arguments, e.g. burn_in or dict_key, used in some specific plots but not + applicable to all. + + Returns: + dict: A dictionary with the following key/values: + x_values (Union[np.ndarray, pd.arrays.DatetimeArray]): Array containing the x-values to plot. + y_values (np.ndarray): Numpy array containing the y-values to use in plotting. + dict_key (str): String key associated with this plot to be used in the figure_dict attribute of the Plot + class. + title_text (str): String title of the plot. + x_label (str): String label of x-axis. + y_label (str) : String label of y-axis. + name (str): String name to show in the legend. + color (str): RGB string specifying color for plot. + + Raises: + ValueError: When no specifics are defined for the inputted object to plot. + + """ + if isinstance(object_to_plot, SourceModel): + dict_key = kwargs.pop("dict_key", "number_of_sources_plot") + title_text = "Number of Sources 'on' against MCMC iterations" + x_label = "MCMC Iteration Number" + y_label = "Number of Sources 'on'" + emission_rates = object_to_plot.emission_rate + if isinstance(object_to_plot, SlabAndSpike): + total_nof_sources = emission_rates.shape[0] + y_values = total_nof_sources - np.sum(object_to_plot.allocation, axis=0) + elif object_to_plot.reversible_jump: + y_values = np.count_nonzero(np.logical_not(np.isnan(emission_rates)), axis=0) + else: + raise TypeError("No plotting routine implemented for this SourceModel type.") + x_values = np.array(range(y_values.size)) + color = "rgb(248, 156, 116)" + name = "Number of Sources 'on'" + + elif isinstance(object_to_plot, MCMC): + dict_key = kwargs.pop("dict_key", "log_posterior_plot") + title_text = "Log posterior values against MCMC iterations" + x_label = "MCMC Iteration Number" + y_label = "Log Posterior
Value" + y_values = object_to_plot.store["log_post"].flatten() + x_values = np.array(range(y_values.size)) + color = "rgb(102, 197, 204)" + name = "Log Posterior" + + if "burn_in" not in kwargs: + warnings.warn("Burn in is not specified for the Log Posterior plot, are you sure this is correct?") + + else: + raise ValueError("No values to plot") + + return { + "x_values": x_values, + "y_values": y_values, + "dict_key": dict_key, + "title_text": title_text, + "x_label": x_label, + "y_label": y_label, + "name": name, + "color": color, + } + + +def create_plot_specifics( + object_to_plot: Union[ErrorModel, PerSensor, MCMC], sensor_object: SensorGroup, plot_type: str = "", **kwargs +) -> dict: + """Specification of different traces where we want to plot a trace for each sensor. + + Provides all details for plots where we want to plot a single variable for each sensor as a line or box plot. + Based on the object_to_plot we select the correct plot to show. + + When plotting the MCMC Observations and Predicted Model Values Against Time plot we are assuming time axis is the + same for all sensors w.r.t. the fitted values from the MCMC store attribute, so we are only using the time axis + from the first sensor. + + Args: + object_to_plot (Union[ErrorModel, PerSensor, MCMC]): Object which we want to plot a single variable from + sensor_object (SensorGroup): SensorGroup object associated with the object_to_plot + plot_type (str, optional): String specifying either a line or a box plot. + **kwargs (dict): Additional key word arguments, e.g. burn_in or dict_key, used in some specific plots but not + applicable to all. + + Returns: + dict: A dictionary with the following key/values: + x_values (Union[np.ndarray, pd.arrays.DatetimeArray]): Array containing the x-values to plot. + y_values (np.ndarray): Numpy array containing the y-values to use in plotting. + dict_key (str): String key associated with this plot to be used in the figure_dict attribute of the + Plot class. + title_text (str): String title of the plot. + x_label (str): String label of x-axis. + y_label (str): String label of y-axis. + plot_type (str): Type of plot which needs to be generated. + + Raises: + ValueError: When no specifics are defined for the inputted object to plot. + + """ + if isinstance(object_to_plot, ErrorModel): + y_values = np.sqrt(1 / object_to_plot.precision) + x_values = np.array(range(y_values.shape[1])) + + if plot_type == "line": + dict_key = kwargs.pop("dict_key", "error_model_iterations") + title_text = "Estimated Error Model Values" + x_label = "MCMC Iteration Number" + y_label = "Estimated Error Model
Standard Deviation (ppm)" + + elif plot_type == "box": + dict_key = kwargs.pop("dict_key", "error_model_distributions") + title_text = "Distributions of Estimated Error Model Values After Burn-In" + x_label = "Sensor" + y_label = "Estimated Error Model
Standard Deviation (ppm)" + + else: + raise ValueError("Only line and box are allowed for the plot_type argument for ErrorModel") + + if "burn_in" not in kwargs: + warnings.warn("Burn in is not specified for the ErrorModel plot, are you sure this is correct?") + + elif isinstance(object_to_plot, PerSensor): + offset_sensor_name = list(sensor_object.values())[0].label + y_values = object_to_plot.offset + nan_row = np.tile(np.nan, (1, y_values.shape[1])) + y_values = np.concatenate((nan_row, y_values), axis=0) + x_values = np.array(range(y_values.shape[1])) + + if plot_type == "line": + dict_key = kwargs.pop("dict_key", "offset_iterations") + title_text = f"Estimated Value of Offset w.r.t. {offset_sensor_name}" + x_label = "MCMC Iteration Number" + y_label = "Estimated Offset
Value (ppm)" + + elif plot_type == "box": + dict_key = kwargs.pop("dict_key", "offset_distributions") + title_text = f"Distributions of Estimated Offset Values w.r.t. {offset_sensor_name} After Burn-In" + x_label = "Sensor" + y_label = "Estimated Offset
Value (ppm)" + + else: + raise ValueError("Only line and box are allowed for the plot_type argument for PerSensor OffsetModel") + + if "burn_in" not in kwargs: + warnings.warn("Burn in is not specified for the PerSensor OffsetModel plot, are you sure this is correct?") + + elif isinstance(object_to_plot, MCMC): + y_values = object_to_plot.store["y"] + x_values = list(sensor_object.values())[0].time + dict_key = kwargs.pop("dict_key", "fitted_values") + title_text = "Observations and Predicted Model Values Against Time" + x_label = "Time" + y_label = "Concentration (ppm)" + plot_type = "line" + + else: + raise ValueError("No values to plot") + + return { + "x_values": x_values, + "y_values": y_values, + "dict_key": dict_key, + "title_text": title_text, + "x_label": x_label, + "y_label": y_label, + "plot_type": plot_type, + } + + +def plot_single_scatter( + fig: go.Figure, + x_values: Union[np.ndarray, pd.arrays.DatetimeArray], + y_values: np.ndarray, + color: str, + name: str, + **kwargs, +) -> go.Figure: + """Plots a single scatter trace on the supplied figure object. + + Args: + fig (go.Figure): Plotly figure to add the trace to. + x_values (Union[np.ndarray, pd.arrays.DatetimeArray]): X values to plot + y_values (np.ndarray): Numpy array containing the y-values to use in plotting. + color (str): RGB color string to use for this trace. + name (str): String name to show in the legend. + **kwargs (dict): Additional key word arguments, e.g. burn_in, legend_group, show_legend, used in some specific plots + but not applicable to all. + + Returns: + fig (go.Figure): Plotly figure with the trace added to it. + + """ + burn_in = kwargs.pop("burn_in", 0) + legend_group = kwargs.pop("legend_group", name) + show_legend = kwargs.pop("show_legend", True) + if burn_in > 0: + fig.add_trace( + go.Scatter( + x=x_values[: burn_in + 1], + y=y_values[: burn_in + 1], + name=name, + mode="lines", + line={"width": 3, "color": lighter_rgb(color)}, + legendgroup=legend_group, + showlegend=False, + ) + ) + + fig.add_trace( + go.Scatter( + x=x_values[burn_in:], + y=y_values[burn_in:], + name=name, + mode="lines", + line={"width": 3, "color": color}, + legendgroup=legend_group, + showlegend=show_legend, + ) + ) + + return fig + + +def plot_single_box(fig: go.Figure, y_values: np.ndarray, color: str, name: str) -> go.Figure: + """Plot a single box plot trace on the plot figure. + + Args: + fig (go.Figure): Plotly figure to add the trace to. + y_values (np.ndarray): Numpy array containing the y-values to use in plotting. + color (str): RGB color string to use for this trace. + name (str): String name to show in the legend. + + Returns: + fig (go.Figure): Plotly figure with the trace added to it. + + """ + fig.add_trace(go.Box(y=y_values, name=name, legendgroup=name, marker={"color": color})) + + return fig + + +def plot_polygons_on_map( + polygons: Union[np.ndarray, list], values: np.ndarray, opacity: float, map_color_scale: str, **kwargs +) -> go.Choroplethmapbox: + """Plot a set of polygons on a map. + + Args: + polygons (Union[np.ndarray, list]): Numpy array or list containing the polygons to plot. + values (np.ndarray): Numpy array consistent with polygons containing the value which is + used in coloring the polygons on the map. + opacity (float): Float between 0 and 1 specifying the opacity of the polygon fill color. + map_color_scale (str): The string which defines which plotly color scale. + **kwargs (dict): Additional key word arguments which can be passed on the go.Choroplethmapbox object (will override + the default values as specified in this function) + + Returns: + trace: go.Choroplethmapbox trace with the colored polygons which can be added to a go.Figure object. + + """ + polygon_id = list(range(values.shape[0])) + feature_collection = FeatureCollection([Feature(geometry=polygons[idx], id_value=idx) for idx in polygon_id]) + text_box = [ + f"Polygon ID: {counter:d}
Center (lon, lat): " + f"({polygons[counter].centroid.coords[0][0]:.4f}, {polygons[counter].centroid.coords[0][1]:.4f})
" + f"Value: {values[counter]:f}
" + for counter in polygon_id + ] + + trace_options = { + "geojson": feature_collection, + "featureidkey": "id_value", + "locations": polygon_id, + "z": values, + "marker": {"line": {"width": 0}, "opacity": opacity}, + "hoverinfo": "text", + "text": text_box, + "name": "Values", + "colorscale": map_color_scale, + "colorbar": {"title": "Values"}, + "showlegend": True, + } + + for key, value in kwargs.items(): + trace_options[key] = value + + trace = go.Choroplethmapbox(**trace_options) + + return trace + + +def plot_regular_grid( + coordinates: LLA, + values: np.ndarray, + opacity: float, + map_color_scale: str, + tolerance: float = 1e-7, + unit: str = "kg/hr", + name="Values", +) -> go.Choroplethmapbox: + """Plots a regular grid of LLA data onto a map. + + So long as the input array is regularly spaced, the value of the spacing is found. A set of rectangles are defined + where the centre of the rectangle is the LLA coordinate. + + Args: + coordinates (LLA object): A LLA coordinate object containing a set of locations. + values (np.array): A set of values that correspond to locations specified in the coordinates. + opacity (float): The opacity of the grid cells when they are plotted. + map_color_scale (str): The string which defines which plotly color scale should be used when plotting + the values. + tolerance (float, optional): Absolute value above which the difference between values is considered significant. + Used to calculate the regular grid of coordinate values. Defaults to 1e-7. + unit (str, optional): The unit to be added to the colorscale. Defaults to kg/hr. + name (str, optional): Name for the trace to be used in the color bar as well + + Returns: + trace (go.Choroplethmapbox): Trace with the colored polygons which can be added to a go.Figure object. + + """ + _, gridsize_lat = is_regularly_spaced(coordinates.latitude, tolerance=tolerance) + _, gridsize_lon = is_regularly_spaced(coordinates.longitude, tolerance=tolerance) + + polygons = [ + geometry.box( + coordinates.longitude[idx] - gridsize_lon / 2, + coordinates.latitude[idx] - gridsize_lat / 2, + coordinates.longitude[idx] + gridsize_lon / 2, + coordinates.latitude[idx] + gridsize_lat / 2, + ) + for idx in range(coordinates.nof_observations) + ] + + trace = plot_polygons_on_map( + polygons=polygons, + values=values, + opacity=opacity, + name=name, + colorbar={"title": name + "
" + unit}, + map_color_scale=map_color_scale, + ) + + return trace + + +def plot_hexagonal_grid( + coordinates: LLA, + values: np.ndarray, + opacity: float, + map_color_scale: str, + num_hexagons: Union[int, None], + show_positions: bool, + aggregate_function: Callable = np.sum, +): + """Plots a set of values into hexagonal bins with respect to the location of the values. + + Any data points that fall within the area of a hexagon are used to perform aggregation and bin the data. + See: https://plotly.com/python-api-reference/generated/plotly.figure_factory.create_hexbin_mapbox.html + + Args: + coordinates (LLA object): A LLA coordinate object containing a set of locations. + values (np.array): A set of values that correspond to locations specified in the coordinates. + opacity (float): The opacity of the hexagons when they are plotted. + map_color_scale (str): Colour scale for plotting values. + num_hexagons (Union[int, None]): The number of hexagons which define the *horizontal* axis of the plot. + show_positions (bool): A flag to determine whether the original data should be shown alongside + the binning hexagons. + aggregate_function (Callable, optional): Function which to apply on the data in each hexagonal bin to aggregate + the data and visualise the result. + + Returns: + (go.Figure): A plotly go figure representing the data which was submitted to this function. + + """ + if num_hexagons is None: + num_hexagons = max(1, np.ceil((np.max(coordinates.longitude) - np.min(coordinates.longitude)) / 0.25)) + + coordinates = coordinates.to_lla() + + hex_plot = ff.create_hexbin_mapbox( + lat=coordinates.latitude, + lon=coordinates.longitude, + color=values, + nx_hexagon=num_hexagons, + opacity=opacity, + agg_func=aggregate_function, + color_continuous_scale=map_color_scale, + show_original_data=show_positions, + original_data_marker={"color": "black"}, + ) + + return hex_plot + + +@dataclass +class Plot: + """Defines the plot class. + + Can be used to generate various figures from model components while storing general settings to get consistent + figure appearance. + + Attributes: + figure_dict (dict): Figure dictionary, used as storage using keys to identify the different figures. + mapbox_token (str, optional): Optional mapbox token, used for plotting mapbox backgrounds. + layout (dict, optional): Layout template for plotly figures, used in all figures generated using this class + instance. + + """ + + figure_dict: dict = field(default_factory=dict) + mapbox_token: str = "empty" + layout: dict = field(default_factory=dict) + + def __post_init__(self): + """Using post init to set the default layout, not able to do this in attribute definition/initialization.""" + self.layout = { + "layout": go.Layout( + font={"family": "Futura", "size": 20}, + title={"x": 0.5}, + title_font={"size": 30}, + xaxis={"ticks": "outside", "showline": True, "linewidth": 2}, + yaxis={"ticks": "outside", "showline": True, "linewidth": 2}, + legend={ + "orientation": "v", + "yanchor": "middle", + "y": 0.5, + "xanchor": "right", + "x": 1.2, + "font": {"size": 14, "color": "black"}, + }, + ) + } + + def show_all(self, renderer="browser"): + """Show all the figures which are in the figure dictionary. + + Args: + renderer (str, optional): Default renderer to use when showing the figures. + + """ + for fig in self.figure_dict.values(): + fig.show(renderer=renderer) + + def plot_single_trace(self, object_to_plot: Union[Type[SlabAndSpike], SourceModel, MCMC], **kwargs): + """Plotting a trace of a single variable. + + Depending on the object to plot it creates a figure which is stored in the figure_dict attribute. + First it grabs all the specifics needed for the plot and then plots the trace. + + Args: + object_to_plot (Union[Type[SlabAndSpike], SourceModel, MCMC]): The object from which to plot a variable + **kwargs (dict): Additional key word arguments, e.g. burn_in, legend_group, show_legend, dict_key, used in some + specific plots but not applicable to all. + + """ + plot_specifics = create_trace_specifics(object_to_plot=object_to_plot, **kwargs) + + burn_in = kwargs.pop("burn_in", 0) + + fig = go.Figure() + fig = plot_single_scatter( + fig=fig, + x_values=plot_specifics["x_values"], + y_values=plot_specifics["y_values"], + color=plot_specifics["color"], + name=plot_specifics["name"], + burn_in=burn_in, + ) + + if burn_in > 0: + fig.add_vline( + x=burn_in, line_width=3, line_dash="dash", line_color="black", annotation_text=f"\tBurn in: {burn_in}" + ) + if isinstance(object_to_plot, SlabAndSpike) and isinstance(object_to_plot, SourceModel): + prior_num_sources_on = round(object_to_plot.emission_rate.shape[0] * object_to_plot.slab_probability, 2) + + fig.add_hline( + y=prior_num_sources_on, + line_width=3, + line_dash="dash", + line_color="black", + annotation_text=f"Prior sources 'on': {prior_num_sources_on}", + ) + + if self.layout is not None: + fig.update_layout(template=self.layout) + + fig.update_layout(title=plot_specifics["title_text"]) + fig.update_xaxes(title_standoff=20, automargin=True, title_text=plot_specifics["x_label"]) + fig.update_yaxes(title_standoff=20, automargin=True, title_text=plot_specifics["y_label"]) + + self.figure_dict[plot_specifics["dict_key"]] = fig + + def plot_trace_per_sensor( + self, + object_to_plot: Union[ErrorModel, PerSensor, MCMC], + sensor_object: Union[SensorGroup, Sensor], + plot_type: str, + **kwargs, + ): + """Plotting a trace of a single variable per sensor. + + Depending on the object to plot it creates a figure which is stored in the figure_dict attribute. + First it grabs all the specifics needed for the plot and then plots the trace per sensor. + + Args: + object_to_plot (Union[ErrorModel, PerSensor, MCMC]): The object which to plot a variable from + sensor_object (Union[SensorGroup, Sensor]): Sensor object associated with the object_to_plot + plot_type (str): String specifying a line or box plot. + **kwargs (dict): Additional key word arguments, e.g. burn_in, legend_group, show_legend, dict_key, used in some + specific plots but not applicable to all. + + """ + if isinstance(sensor_object, Sensor): + temp = SensorGroup() + temp.add_sensor(sensor_object) + sensor_object = deepcopy(temp) + plot_specifics = create_plot_specifics( + object_to_plot=object_to_plot, sensor_object=sensor_object, plot_type=plot_type, **kwargs + ) + burn_in = kwargs.pop("burn_in", 0) + + fig = go.Figure() + for sensor_idx, sensor_key in enumerate(sensor_object.keys()): + color_idx = sensor_idx % len(sensor_object.color_map) + color = sensor_object.color_map[color_idx] + + if plot_specifics["plot_type"] == "line": + fig = plot_single_scatter( + fig=fig, + x_values=plot_specifics["x_values"], + y_values=plot_specifics["y_values"][sensor_idx, :], + color=color, + name=sensor_key, + burn_in=burn_in, + ) + elif plot_specifics["plot_type"] == "box": + fig = plot_single_box( + fig=fig, + y_values=plot_specifics["y_values"][sensor_idx, burn_in:].flatten(), + color=color, + name=sensor_key, + ) + + if burn_in > 0 and plot_specifics["plot_type"] == "line": + fig.add_vline( + x=burn_in, line_width=3, line_dash="dash", line_color="black", annotation_text=f"\tBurn in: {burn_in}" + ) + + if self.layout is not None: + fig.update_layout(template=self.layout) + + fig.update_layout(title=plot_specifics["title_text"]) + fig.update_xaxes(title_standoff=20, automargin=True, title_text=plot_specifics["x_label"]) + fig.update_yaxes(title_standoff=20, automargin=True, title_text=plot_specifics["y_label"]) + + self.figure_dict[plot_specifics["dict_key"]] = fig + + def plot_fitted_values_per_sensor( + self, + mcmc_object: MCMC, + sensor_object: Union[SensorGroup, Sensor], + background_model: TemporalBackground = None, + burn_in: int = 0, + ): + """Plot the fitted values from the mcmc object against time, also shows the estimated background when inputted. + + Based on the inputs it plots the results of the mcmc analysis, being the fitted values of the concentration + measurements together with the 10th and 90th quantile lines to show the goodness of fit of the estimates. + + The created figure is stored in the figure_dict attribute. + + Args: + mcmc_object (MCMC): MCMC object which contains the fitted values in the store attribute of the object. + sensor_object (Union[SensorGroup, Sensor]): Sensor object associated with the object_to_plot + background_model (TemporalBackground, optional): Background model containing the estimated background. + burn_in (int, optional): Number of burn-in iterations to discard before calculating the quantiles + and median. Defaults to 0. + + """ + if "y" not in mcmc_object.store: + raise ValueError("Missing fitted values ('y') in mcmc_store_object") + + if isinstance(sensor_object, Sensor): + temp = SensorGroup() + temp.add_sensor(sensor_object) + sensor_object = deepcopy(temp) + + y_values_overall = mcmc_object.store["y"] + dict_key = "fitted_values" + title_text = "Observations and Predicted Model Values Against Time" + x_label = "Time" + y_label = "Concentration (ppm)" + fig = go.Figure() + + for sensor_idx, sensor_key in enumerate(sensor_object.keys()): + plot_idx = np.array(sensor_object.sensor_index == sensor_idx) + + x_values = sensor_object[sensor_key].time + y_values = y_values_overall[plot_idx, burn_in:] + + color_idx = sensor_idx % len(sensor_object.color_map) + color = sensor_object.color_map[color_idx] + + fig = plot_quantiles_from_array( + fig=fig, x_values=x_values, y_values=y_values, quantiles=[10, 90], color=color, name=sensor_key + ) + + if isinstance(background_model, TemporalBackground): + fig = plot_quantiles_from_array( + fig=fig, + x_values=background_model.time, + y_values=background_model.bg, + quantiles=[10, 90], + color="rgb(186, 186, 186)", + name="Background", + ) + + fig.for_each_trace( + lambda trace: ( + trace.update(showlegend=True, name="Background") if trace.name == "Median for Background" else () + ), + ) + + fig = sensor_object.plot_timeseries(fig=fig, color_map=sensor_object.color_map, mode="markers") + + fig.add_annotation( + x=1, + y=1.1, + yref="paper", + xref="paper", + xanchor="left", + yanchor="top", + font={"size": 12, "color": "#000000"}, + align="left", + showarrow=False, + borderwidth=2, + borderpad=10, + bgcolor="#ffffff", + bordercolor="#000000", + opacity=0.8, + text=( + "Point: Real observation
Line: Predicted Value
Shading: " + "Quantiles 10-90" + ), + ) + + if self.layout is not None: + fig.update_layout(template=self.layout) + + fig.update_layout(title=title_text) + fig.update_xaxes(title_standoff=20, automargin=True, title_text=x_label) + fig.update_yaxes(title_standoff=20, automargin=True, title_text=y_label) + + self.figure_dict[dict_key] = fig + + def plot_emission_rate_estimates(self, source_model_object, y_axis_type="linear", **kwargs): + """Plot the emission rate estimates source model object against MCMC iteration. + + Based on the inputs it plots the results of the mcmc analysis, being the estimated emission rate values for + each source location together with the total emissions estimate, which is the sum over all source locations. + + The created figure is stored in the figure_dict attribute. + + After the loop over all sources we add an empty trace to have the legend entry and desired legend group + behaviour. + + Args: + source_model_object (SourceModel): Source model object which contains the estimated emission rate estimates. + y_axis_type (str, optional): String to indicate whether the y-axis should be linear of log scale. + **kwargs (dict): Additional key word arguments, e.g. burn_in, dict_key, used in some specific plots but not + applicable to all. + + """ + total_emissions = np.nansum(source_model_object.emission_rate, axis=0) + x_values = np.array(range(total_emissions.size)) + + burn_in = kwargs.pop("burn_in", 0) + + dict_key = "estimated_values_plot" + title_text = "Estimated Values of Sources With Respect to MCMC Iterations" + x_label = "MCMC Iteration Number" + y_label = "Estimated Emission
Values (kg/hr)" + + fig = go.Figure() + + fig = plot_single_scatter( + fig=fig, + x_values=x_values, + y_values=total_emissions, + color="rgb(239, 85, 59)", + name="Total Site Emissions", + burn_in=burn_in, + show_legend=True, + ) + + for source_idx in range(source_model_object.emission_rate.shape[0]): + y_values = source_model_object.emission_rate[source_idx, :] + + fig = plot_single_scatter( + fig=fig, + x_values=x_values, + y_values=y_values, + color="rgb(102, 197, 204)", + name=f"Source {source_idx}", + burn_in=burn_in, + show_legend=False, + legend_group="Source traces", + ) + + fig = plot_single_scatter( + fig=fig, + x_values=np.array([None]), + y_values=np.array([None]), + color="rgb(102, 197, 204)", + name="Source traces", + burn_in=0, + show_legend=True, + ) + + if burn_in > 0: + fig.add_vline( + x=burn_in, line_width=3, line_dash="dash", line_color="black", annotation_text=f"\tBurn in: {burn_in}" + ) + + if self.layout is not None: + fig.update_layout(template=self.layout) + + fig.add_annotation( + x=1.05, + y=1.05, + yref="paper", + xref="paper", + xanchor="left", + yanchor="top", + align="left", + font={"size": 12, "color": "#000000"}, + showarrow=False, + borderwidth=2, + borderpad=10, + bgcolor="#ffffff", + bordercolor="#000000", + opacity=0.8, + text=( + "Total Site Emissions are
the sum of all estimated
" + "emission rates at a given
iteration number." + ), + ) + + fig.update_layout(title=title_text) + fig.update_xaxes(title_standoff=20, automargin=True, title_text=x_label) + fig.update_yaxes(title_standoff=20, automargin=True, title_text=y_label) + if y_axis_type == "log": + fig.update_yaxes(type="log") + dict_key = "log_estimated_values_plot" + elif y_axis_type != "linear": + raise ValueError(f"Only linear or log y axis type is allowed, {y_axis_type} was currently specified.") + + self.figure_dict[dict_key] = fig + + def create_empty_mapbox_figure(self, dict_key: str = "map_plot") -> None: + """Creating an empty mapbox figure to use when you want to add additional traces on a map. + + Args: + dict_key (str, optional): String key for figure dictionary + + """ + self.figure_dict[dict_key] = go.Figure( + data=go.Scattermapbox(), + layout={ + "mapbox_style": "carto-positron", + "mapbox_center_lat": 0, + "mapbox_center_lon": 0, + "mapbox_zoom": 0, + "mapbox_accesstoken": self.mapbox_token, + }, + ) + + def plot_values_on_map( + self, dict_key: str, coordinates: LLA, values: np.ndarray, aggregate_function: Callable = np.sum, **kwargs + ): + """Plot values on a map based on coordinates. + + Args: + dict_key (str): Sting key to use in the figure dictionary + coordinates (LLA): LLA coordinates to use in plotting the values on the map + values (np.ndarray): Numpy array of values consistent with coordinates to plot on the map + aggregate_function (Callable, optional): Function which to apply on the data in each hexagonal bin to + aggregate the data and visualise the result. + **kwargs (dict): Additional keyword arguments for plotting behaviour (opacity, map_color_scale, num_hexagons, + show_positions) + + """ + map_color_scale = kwargs.pop("map_color_scale", "YlOrRd") + num_hexagons = kwargs.pop("num_hexagons", None) + opacity = kwargs.pop("opacity", 0.8) + show_positions = kwargs.pop("show_positions", False) + + latitude_check, _ = is_regularly_spaced(coordinates.latitude) + longitude_check, _ = is_regularly_spaced(coordinates.longitude) + if latitude_check and longitude_check: + self.create_empty_mapbox_figure(dict_key=dict_key) + trace = plot_regular_grid( + coordinates=coordinates, + values=values, + opacity=opacity, + map_color_scale=map_color_scale, + tolerance=1e-7, + unit="", + ) + self.figure_dict[dict_key].add_trace(trace) + else: + fig = plot_hexagonal_grid( + coordinates=coordinates, + values=values, + opacity=opacity, + map_color_scale=map_color_scale, + num_hexagons=num_hexagons, + show_positions=show_positions, + aggregate_function=aggregate_function, + ) + fig.update_layout(mapbox_accesstoken=self.mapbox_token, mapbox_style="carto-positron") + self.figure_dict[dict_key] = fig + + center_longitude = np.mean(coordinates.longitude) + center_latitude = np.mean(coordinates.latitude) + self.figure_dict[dict_key].update_layout( + mapbox={"zoom": 10, "center": {"lon": center_longitude, "lat": center_latitude}} + ) + + if self.layout is not None: + self.figure_dict[dict_key].update_layout(template=self.layout) + + def plot_quantification_results_on_map( + self, + model_object: "ELQModel", + bin_size_x: float = 1, + bin_size_y: float = 1, + normalized_count_limit: float = 0.005, + burn_in: int = 0, + show_summary_results: bool = True, + ): + """Placeholder for the quantification plots.""" + nof_iterations = model_object.n_iter + ref_latitude = model_object.components["source"].dispersion_model.source_map.location.ref_latitude + ref_longitude = model_object.components["source"].dispersion_model.source_map.location.ref_longitude + ref_altitude = model_object.components["source"].dispersion_model.source_map.location.ref_altitude + datetime_min_string = model_object.sensor_object.time.min().strftime("%d-%b-%Y, %H:%M:%S") + datetime_max_string = model_object.sensor_object.time.max().strftime("%d-%b-%Y, %H:%M:%S") + + all_source_locations = model_object.mcmc.store["z_src"] + min_x = np.nanmin(all_source_locations[0, :, :]) + max_x = np.nanmax(all_source_locations[0, :, :]) + min_y = np.nanmin(all_source_locations[1, :, :]) + max_y = np.nanmax(all_source_locations[1, :, :]) + + bin_min_x = np.floor(min_x - 0.1) + bin_max_x = np.ceil(max_x + 0.1) + bin_min_y = np.floor(min_y - 0.1) + bin_max_y = np.ceil(max_y + 0.1) + bin_min_iteration = burn_in + 0.5 + bin_max_iteration = nof_iterations + 0.5 + + max_nof_sources = all_source_locations.shape[1] + + x_edges = np.arange(start=bin_min_x, stop=bin_max_x + bin_size_x, step=bin_size_x) + y_edges = np.arange(start=bin_min_y, stop=bin_max_y + bin_size_y, step=bin_size_y) + iteration_edges = np.arange(start=bin_min_iteration, stop=bin_max_iteration + bin_size_y, step=1) + + result_x_vals = all_source_locations[0, :, :].flatten() + result_y_vals = all_source_locations[1, :, :].flatten() + result_z_vals = all_source_locations[2, :, :].flatten() + # 1-indexing for iterations effectively + result_iteration_vals = np.array(range(nof_iterations)).reshape(1, -1) + 1 + result_iteration_vals = np.tile(result_iteration_vals, (max_nof_sources, 1)).flatten() + results_estimates = model_object.mcmc.store["s"].flatten() + + result_weighted, _ = np.histogramdd( + sample=np.array([result_x_vals, result_y_vals, result_iteration_vals]).T, + bins=[x_edges, y_edges, iteration_edges], + weights=results_estimates, + density=False, + ) + + count_result, edges_result = np.histogramdd( + sample=np.array([result_x_vals, result_y_vals, result_iteration_vals]).T, + bins=[x_edges, y_edges, iteration_edges], + density=False, + ) + + enu_x = edges_result[0] + enu_x = enu_x[:-1] + np.diff(enu_x) / 2 + enu_y = edges_result[1] + enu_y = enu_y[:-1] + np.diff(enu_y) / 2 + + enu_x, enu_y = np.meshgrid(enu_x, enu_y, indexing="ij") + + enu_object_full_grid = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude) + enu_object_full_grid.east = enu_x.flatten() + enu_object_full_grid.north = enu_y.flatten() + enu_object_full_grid.up = np.zeros_like(enu_object_full_grid.north) + lla_object_full_grid = enu_object_full_grid.to_lla() + + _, gridsize_lat = is_regularly_spaced(lla_object_full_grid.latitude, tolerance=1e-6) + _, gridsize_lon = is_regularly_spaced(lla_object_full_grid.longitude, tolerance=1e-6) + + overall_count = np.sum(count_result, axis=2) + normalized_count = overall_count / (nof_iterations - burn_in) + + count_boolean = normalized_count >= normalized_count_limit + + enu_object = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude) + enu_object.east = enu_x[count_boolean].flatten() + enu_object.north = enu_y[count_boolean].flatten() + enu_object.up = np.zeros_like(enu_object.north) + lla_object = enu_object.to_lla() + + polygons = [ + geometry.box( + lla_object.longitude[idx] - gridsize_lon / 2, + lla_object.latitude[idx] - gridsize_lat / 2, + lla_object.longitude[idx] + gridsize_lon / 2, + lla_object.latitude[idx] + gridsize_lat / 2, + ) + for idx in range(lla_object.nof_observations) + ] + + if show_summary_results: + summary_trace = self.create_summary_trace( + result_iteration_vals=result_iteration_vals, + burn_in=burn_in, + result_x_vals=result_x_vals, + result_y_vals=result_y_vals, + result_z_vals=result_z_vals, + results_estimates=results_estimates, + count_boolean=count_boolean, + x_edges=x_edges, + y_edges=y_edges, + nof_iterations=nof_iterations, + ref_latitude=ref_latitude, + ref_longitude=ref_longitude, + ref_altitude=ref_altitude, + ) + + self.create_empty_mapbox_figure(dict_key="count_map") + trace = plot_polygons_on_map( + polygons=polygons, + values=normalized_count[count_boolean].flatten(), + opacity=0.8, + name="normalized_count", + colorbar={"title": "Normalized Count", "orientation": "h"}, + map_color_scale="Bluered", + ) + self.figure_dict["count_map"].add_trace(trace) + self.figure_dict["count_map"].update_layout( + mapbox_accesstoken=self.mapbox_token, + mapbox_style="carto-positron", + mapbox={"zoom": 15, "center": {"lon": ref_longitude, "lat": ref_latitude}}, + title=f"Source location probability " + f"(>={normalized_count_limit}) for " + f"{datetime_min_string} to {datetime_max_string}", + font_family="Futura", + font_size=15, + ) + model_object.sensor_object.plot_sensor_location(self.figure_dict["count_map"]) + self.figure_dict["count_map"].update_traces(showlegend=False) + + adjusted_result_weights = result_weighted.copy() + adjusted_result_weights[adjusted_result_weights == 0] = np.nan + + median_of_all_emissions = np.nanmedian(adjusted_result_weights, axis=2) + + self.create_empty_mapbox_figure(dict_key="median_map") + + trace = plot_polygons_on_map( + polygons=polygons, + values=median_of_all_emissions[count_boolean].flatten(), + opacity=0.8, + name="median_emission", + colorbar={"title": "Median Emission", "orientation": "h"}, + map_color_scale="Bluered", + ) + self.figure_dict["median_map"].add_trace(trace) + self.figure_dict["median_map"].update_layout( + mapbox_accesstoken=self.mapbox_token, + mapbox_style="carto-positron", + mapbox={"zoom": 15, "center": {"lon": ref_longitude, "lat": ref_latitude}}, + title=f"Median emission rate estimate for {datetime_min_string} to {datetime_max_string}", + font_family="Futura", + font_size=15, + ) + model_object.sensor_object.plot_sensor_location(self.figure_dict["median_map"]) + self.figure_dict["median_map"].update_traces(showlegend=False) + + iqr_of_all_emissions = np.nanquantile(a=adjusted_result_weights, q=0.75, axis=2) - np.nanquantile( + a=adjusted_result_weights, q=0.25, axis=2 + ) + self.create_empty_mapbox_figure(dict_key="iqr_map") + + trace = plot_polygons_on_map( + polygons=polygons, + values=iqr_of_all_emissions[count_boolean].flatten(), + opacity=0.8, + name="iqr_emission", + colorbar={"title": "IQR", "orientation": "h"}, + map_color_scale="Bluered", + ) + self.figure_dict["iqr_map"].add_trace(trace) + self.figure_dict["iqr_map"].update_layout( + mapbox_accesstoken=self.mapbox_token, + mapbox_style="carto-positron", + mapbox={"zoom": 15, "center": {"lon": ref_longitude, "lat": ref_latitude}}, + title=f"Inter Quartile range (25%-75%) of emission rate " + f"estimate for {datetime_min_string} to {datetime_max_string}", + font_family="Futura", + font_size=15, + ) + model_object.sensor_object.plot_sensor_location(self.figure_dict["iqr_map"]) + self.figure_dict["iqr_map"].update_traces(showlegend=False) + + if show_summary_results: + self.figure_dict["count_map"].add_trace(summary_trace) + self.figure_dict["count_map"].update_traces(showlegend=True) + self.figure_dict["median_map"].add_trace(summary_trace) + self.figure_dict["median_map"].update_traces(showlegend=True) + self.figure_dict["iqr_map"].add_trace(summary_trace) + self.figure_dict["iqr_map"].update_traces(showlegend=True) + + def plot_coverage( + self, + coordinates: LLA, + couplings: np.ndarray, + threshold_function: Callable = np.max, + coverage_threshold: float = 6, + opacity: float = 0.8, + map_color_scale="jet", + ): + """Creates a coverage plot using the coverage function from Gaussian Plume. + + Args: + coordinates (LLA object): A LLA coordinate object containing a set of locations. + couplings (np.array): The calculated values of coupling (The 'A matrix') for a set of wind data. + threshold_function (Callable, optional): Callable function which returns some single value that defines the + maximum or 'threshold' coupling. Examples: np.quantile(q=0.9), + np.max, np.mean. Defaults to np.max. + coverage_threshold (float, optional): The threshold value of the estimated emission rate which is + considered to be within the coverage. Defaults to 6 kg/hr. + opacity (float): The opacity of the grid cells when they are plotted. + map_color_scale (str): The string which defines which plotly colour scale should be used when plotting + the values. + + """ + coverage_values = GaussianPlume(source_map=None).compute_coverage( + couplings=couplings, threshold_function=threshold_function, coverage_threshold=coverage_threshold + ) + self.plot_values_on_map( + dict_key="coverage_map", + coordinates=coordinates, + values=coverage_values, + aggregate_function=np.max, + opacity=opacity, + map_color_scale=map_color_scale, + ) + + @staticmethod + def create_summary_trace( + result_x_vals: np.ndarray, + result_y_vals: np.ndarray, + result_z_vals: np.ndarray, + results_estimates: np.ndarray, + result_iteration_vals: np.ndarray, + count_boolean: np.ndarray, + x_edges: np.ndarray, + y_edges: np.ndarray, + nof_iterations: int, + burn_in: int, + ref_latitude: float, + ref_longitude: float, + ref_altitude: float, + ) -> go.Scattermapbox: + """Helper function to create the summary information to plot on top of map type plots. + + We identify all blobs of estimates which appear close together on the map by looking at connected pixels in the + count_boolean array. Next we find the summary statistics for all estimates in that blob like overall median and + IQR estimate, mean location and the likelihood of that blob. + + When multiple sources are present in the same blob at the same iteration we first sum those emission rate + estimates before taking the median. + + The summary statistics are also printed out on screen. + + Args: + result_x_vals (np.ndarray): X-coordinate of estimates, flattened array of (n_sources_max * nof_iterations,). + result_y_vals (np.ndarray): Y-coordinate of estimates, flattened array of (n_sources_max * nof_iterations,). + result_z_vals (np.ndarray): Z-coordinate of estimates, flattened array of (n_sources_max * nof_iterations,). + results_estimates (np.ndarray): Emission rate estimates, flattened array of + (n_sources_max * nof_iterations,). + result_iteration_vals (np.ndarray): Iteration number corresponding each estimated value, flattened array + of (n_sources_max * nof_iterations,). + count_boolean (np.ndarray): Boolean array which indicates if likelihood of pixel is over threshold. + x_edges (np.ndarray): Pixel edges x-coordinates. + y_edges (np.ndarray): Pixel edges y-coordinates. + nof_iterations (int): Number of iterations used in MCMC. + burn_in (int): Burn-in used in MCMC. + ref_latitude (float): Reference latitude in degrees of ENU coordinate system. + ref_longitude (float): Reference longitude in degrees of ENU coordinate system. + ref_altitude (float): Reference altitude in meters of ENU coordinate system. + + Returns: + summary_trace (go.Scattermapbox): Trace with summary information to plot on top of map type plots. + + """ + labeled_array, num_features = label(input=count_boolean, structure=np.ones((3, 3))) + + burn_in_bool = result_iteration_vals > burn_in + nan_x_vals = np.isnan(result_x_vals) + nan_y_vals = np.isnan(result_y_vals) + nan_z_vals = np.isnan(result_z_vals) + no_nan_idx = np.logical_not(np.logical_or(np.logical_or(nan_x_vals, nan_y_vals), nan_z_vals)) + no_nan_and_burn_in_bool = np.logical_and(no_nan_idx, burn_in_bool) + result_x_vals_no_nan = result_x_vals[no_nan_and_burn_in_bool] + result_y_vals_no_nan = result_y_vals[no_nan_and_burn_in_bool] + result_z_vals_no_nan = result_z_vals[no_nan_and_burn_in_bool] + results_estimates_no_nan = results_estimates[no_nan_and_burn_in_bool] + result_iteration_vals_no_nan = result_iteration_vals[no_nan_and_burn_in_bool] + + x_idx = np.digitize(result_x_vals_no_nan, x_edges, right=False) - 1 + y_idx = np.digitize(result_y_vals_no_nan, y_edges, right=False) - 1 + bin_numbers = np.ravel_multi_index((x_idx, y_idx), labeled_array.shape) + + bin_numbers_per_label = [ + np.ravel_multi_index(np.nonzero(labeled_array == value), labeled_array.shape) + for value in np.array(range(num_features)) + 1 + ] + + summary_result = pd.DataFrame() + for label_idx, curr_bins in enumerate(bin_numbers_per_label): + boolean_for_result = np.isin(bin_numbers, curr_bins) + mean_x = np.mean(result_x_vals_no_nan[boolean_for_result]) + mean_y = np.mean(result_y_vals_no_nan[boolean_for_result]) + mean_z = np.mean(result_z_vals_no_nan[boolean_for_result]) + + unique_iteration_vals, indices, counts = np.unique( + result_iteration_vals_no_nan[boolean_for_result], return_inverse=True, return_counts=True + ) + nof_iterations_present = unique_iteration_vals.size + blob_likelihood = nof_iterations_present / (nof_iterations - burn_in) + single_idx = np.argwhere(counts == 1) + results_estimates_for_blob = results_estimates_no_nan[boolean_for_result] + temp_estimate_result = results_estimates_for_blob[indices[single_idx.flatten()]] + multiple_idx = np.argwhere(counts > 1) + for single_idx in multiple_idx: + temp_val = np.sum(results_estimates_for_blob[indices == single_idx]) + temp_estimate_result = np.append(temp_estimate_result, temp_val) + + median_estimate = np.median(temp_estimate_result) + iqr_estimate = np.nanquantile(a=temp_estimate_result, q=0.75) - np.nanquantile( + a=temp_estimate_result, q=0.25 + ) + lower_bound = np.nanquantile(a=temp_estimate_result, q=0.025) + upper_bound = np.nanquantile(a=temp_estimate_result, q=0.975) + enu_object = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude) + enu_object.east = mean_x + enu_object.north = mean_y + enu_object.up = mean_z + lla_object = enu_object.to_lla() + + summary_result.loc[label_idx, "latitude"] = lla_object.latitude + summary_result.loc[label_idx, "longitude"] = lla_object.longitude + summary_result.loc[label_idx, "altitude"] = lla_object.altitude + summary_result.loc[label_idx, "height"] = mean_z + summary_result.loc[label_idx, "median_estimate"] = median_estimate + summary_result.loc[label_idx, "quantile_025"] = lower_bound + summary_result.loc[label_idx, "quantile_975"] = upper_bound + summary_result.loc[label_idx, "iqr_estimate"] = iqr_estimate + summary_result.loc[label_idx, "absolute_count_iterations"] = nof_iterations_present + summary_result.loc[label_idx, "blob_likelihood"] = blob_likelihood + + summary_text_values = [ + f"Source ID: {value}
" + f"(Lon, Lat, Alt) ([deg], [deg], [m]):
" + f"({summary_result.longitude[value]:.7f}, " + f"{summary_result.latitude[value]:.7f}, {summary_result.altitude[value]:.3f})
" + f"Height: {summary_result.height[value]:.3f} [m]
" + f"Median emission rate: {summary_result.median_estimate[value]:.4f} [kg/hr]
" + f"2.5% quantile: {summary_result.quantile_025[value]:.3f} [kg/hr]
" + f"97.5% quantile: {summary_result.quantile_975[value]:.3f} [kg/hr]
" + f"IQR: {summary_result.iqr_estimate[value]:.4f} [kg/hr]
" + f"Blob present during: " + f"{summary_result.absolute_count_iterations[value]:.0f} iterations
" + f"Blob likelihood: {summary_result.blob_likelihood[value]:.5f}
" + for value in summary_result.index + ] + + summary_trace = go.Scattermapbox( + lat=summary_result.latitude, + lon=summary_result.longitude, + mode="markers", + marker=go.scattermapbox.Marker(size=14, color="black"), + text=summary_text_values, + name="Summary", + hoverinfo="text", + ) + + summary_result.index.name = "source_ID" + summary_result = summary_result.astype({"absolute_count_iterations": "int"}) + print("Summary results:") + print(summary_result.to_string(float_format=lambda x: "%.7f" % x)) + + return summary_trace + + +def is_regularly_spaced(array: np.ndarray, tolerance: float = 0.01, return_delta: bool = True): + """Determines whether an input array is regularly spaced, within some (absolute) tolerance. + + Gets the large differences (defined by tolerance) in the array, and sees whether all of them are within 5% of one + another. + + Args: + array (np.ndarray): Input array to be analysed. + tolerance (float, optional): Absolute value above which the difference between values is considered significant. + Defaults to 0.01. + return_delta (bool, optional): Whether to return the value of the regular grid spacing. Defaults to True. + + Returns: + (bool): Whether or not the grid is regularly spaced. + (float): The value of the regular grid spacing. + + """ + unique_vals = np.unique(array) + diff_unique_vals = np.diff(unique_vals) + diff_big = diff_unique_vals[diff_unique_vals > tolerance] + + boolean = np.all([np.isclose(diff_big[i], diff_big[i + 1], rtol=0.05) for i in range(len(diff_big) - 1)]) + + if return_delta: + return boolean, np.mean(diff_big) + + return boolean, None diff --git a/src/pyelq/preprocessing.py b/src/pyelq/preprocessing.py new file mode 100644 index 0000000..895cf2f --- /dev/null +++ b/src/pyelq/preprocessing.py @@ -0,0 +1,262 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Class for performing preprocessing on the loaded data.""" + +from copy import deepcopy +from dataclasses import dataclass +from typing import Union + +import numpy as np +import pandas as pd + +from pyelq.meteorology import Meteorology, MeteorologyGroup +from pyelq.sensor.sensor import Sensor, SensorGroup +from pyelq.support_functions.spatio_temporal_interpolation import temporal_resampling + + +@dataclass +class Preprocessor: + """Class which implements generic functionality for pre-processing of sensor and meteorology information. + + Attributes: + time_bin_edges (pd.arrays.DatetimeArray): edges of the time bins to be used for smoothing/interpolation. + sensor_object (SensorGroup): sensor group object containing raw data. + met_object (Meteorology): met object containing raw data. + aggregate_function (str): function to be used for aggregation of data. Defaults to mean. + sensor_fields (list): standard list of sensor attributes that we wish to regularize and/or filter. + met_fields (list): standard list of meteorology attributes that we wish to regularize/filter. + + """ + + time_bin_edges: pd.arrays.DatetimeArray + sensor_object: SensorGroup + met_object: Union[Meteorology, MeteorologyGroup] + aggregate_function: str = "mean" + sensor_fields = ["time", "concentration", "source_on"] + met_fields = [ + "time", + "wind_direction", + "wind_speed", + "pressure", + "temperature", + "u_component", + "v_component", + "w_component", + "wind_turbulence_horizontal", + "wind_turbulence_vertical", + ] + + def __post_init__(self) -> None: + """Initialise the class. + + Attaching the sensor and meteorology objects as attributes, and running initial regularization and NaN filtering + steps. + + Before running the regularization & NaN filtering, the function ensures that u_component and v_component are + present as fields on met_object. The post-smoothing wind speed and direction are then calculated from the + smoothed u and v components, to eliminate the need to take means of directions when binning. + + The sensor and meteorology group objects attached to the class will have identical numbers of data points per + device, identical time stamps, and be free of NaNs. + + """ + self.met_object.calculate_uv_from_wind_speed_direction() + + self.regularize_data() + self.met_object.calculate_wind_direction_from_uv() + self.met_object.calculate_wind_speed_from_uv() + self.filter_nans() + + def regularize_data(self) -> None: + """Smoothing or interpolation of data onto a common set of time points. + + Function which takes in sensor and meteorology objects containing raw data (on original time points), and + smooths or interpolates these onto a common set of time points. + + When a SensorGroup object is supplied, the function will return a SensorGroup object with the same number of + sensors. When a MeteorologyGroup object is supplied, the function will return a MeteorologyGroup object with the + same number of objects. When a Meteorology object is supplied, the function will return a MeteorologyGroup + object with the same number of objects as there is sensors in the SensorGroup object. The individual Meteorology + objects will be identical. + + Assumes that sensor_object and met_object attributes contain the RAW data, on the original time stamps, as + loaded from file/API using the relevant data access class. + + After the function has been run, the sensor and meteorology group objects attached to the class as attributes + will have identical time stamps, but may still contain NaNs. + + """ + sensor_out = deepcopy(self.sensor_object) + for sns_new, sns_old in zip(sensor_out.values(), self.sensor_object.values()): + for field in self.sensor_fields: + if (field != "time") and (getattr(sns_old, field) is not None): + time_out, resampled_values = temporal_resampling( + sns_old.time, getattr(sns_old, field), self.time_bin_edges, self.aggregate_function + ) + setattr(sns_new, field, resampled_values) + sns_new.time = time_out + + met_out = MeteorologyGroup() + if isinstance(self.met_object, Meteorology): + single_met_object = self.interpolate_single_met_object(met_in_object=self.met_object) + for key in sensor_out.keys(): + met_out[key] = single_met_object + else: + for key, temp_met_object in self.met_object.items(): + met_out[key] = self.interpolate_single_met_object(met_in_object=temp_met_object) + + self.sensor_object = sensor_out + self.met_object = met_out + + def filter_nans(self) -> None: + """Filter out data points where any of the specified sensor or meteorology fields has a NaN value. + + Assumes that sensor_object and met_object attributes have first been passed through the regularize_data + function, and thus have fields on aligned time grids. + + Function first works through all sensor and meteorology fields and finds indices of all times where there is a + NaN value in any field. Then, it uses the resulting index to filter all fields. + + The result of this function is that the sensor_object and met_object attributes of the class are updated, any + NaN values having been removed. + + """ + for sns_key, met_key in zip(self.sensor_object, self.met_object): + sns_in = self.sensor_object[sns_key] + met_in = self.met_object[met_key] + filter_index = np.ones(sns_in.nof_observations, dtype=bool) + for field in self.sensor_fields: + if (field != "time") and (getattr(sns_in, field) is not None): + filter_index = np.logical_and(filter_index, np.logical_not(np.isnan(getattr(sns_in, field)))) + for field in self.met_fields: + if (field != "time") and (getattr(met_in, field) is not None): + filter_index = np.logical_and(filter_index, np.logical_not(np.isnan(getattr(met_in, field)))) + + self.sensor_object[sns_key] = self.filter_object_fields(sns_in, self.sensor_fields, filter_index) + self.met_object[met_key] = self.filter_object_fields(met_in, self.met_fields, filter_index) + + def filter_on_met(self, filter_variable: list, lower_limit: list = None, upper_limit: list = None) -> None: + """Filter the supplied data on given properties of the meteorological data. + + Assumes that the SensorGroup and MeteorologyGroup objects attached as attributes have corresponding values (one + per sensor device), and have attributes that have been pre-smoothed/interpolated onto a common time grid per + device. + + The result of this function is that the sensor_object and met_object attributes are updated with the filtered + versions. + + Args: + filter_variable (list of str): list of meteorology variables that we wish to use for filtering. + lower_limit (list of float): list of lower limits associated with the variables in filter_variables. + Defaults to None. + upper_limit (list of float): list of upper limits associated with the variables in filter_variables. + Defaults to None. + + """ + if lower_limit is None: + lower_limit = [-np.infty] * len(filter_variable) + if upper_limit is None: + upper_limit = [np.infty] * len(filter_variable) + + for vrb, low, high in zip(filter_variable, lower_limit, upper_limit): + for sns_key, met_key in zip(self.sensor_object, self.met_object): + sns_in = self.sensor_object[sns_key] + met_in = self.met_object[met_key] + index_keep = np.logical_and(getattr(met_in, vrb) >= low, getattr(met_in, vrb) <= high) + self.sensor_object[sns_key] = self.filter_object_fields(sns_in, self.sensor_fields, index_keep) + self.met_object[met_key] = self.filter_object_fields(met_in, self.met_fields, index_keep) + + def block_data( + self, time_edges: pd.arrays.DatetimeArray, data_object: Union[SensorGroup, MeteorologyGroup] + ) -> list: + """Break the supplied data group objects into time-blocked chunks. + + Returning a list of sensor and meteorology group objects per time chunk. + + If there is no data for a given device in a particular period, then that device is simply dropped from the group + object in that block. + + Either a SensorGroup or a MeteorologyGroup object can be supplied, and the list of blocked objects returned will + be of the same type. + + Args: + time_edges (pd.Arrays.DatetimeArray): [(n_period + 1) x 1] array of edges of the time bins to be used for + dividing the data into blocks. + data_object (SensorGroup or MeteorologyGroup): data object containing either or meteorological data, to be + divided into blocks. + + Returns: + data_list (list): list of [n_period x 1] data objects, each list element being either a SensorGroup or + MeteorologyGroup object (depending on the input) containing the data for the corresponding period. + + """ + data_list = [] + nof_periods = len(time_edges) - 1 + if isinstance(data_object, SensorGroup): + field_list = self.sensor_fields + elif isinstance(data_object, MeteorologyGroup): + field_list = self.met_fields + else: + raise TypeError("Data input must be either a SensorGroup or MeteorologyGroup.") + + for k in range(nof_periods): + data_list.append(type(data_object)()) + for key, dat in data_object.items(): + idx_time = (dat.time >= time_edges[k]) & (dat.time <= time_edges[k + 1]) + if np.any(idx_time): + data_list[-1][key] = deepcopy(dat) + data_list[-1][key] = self.filter_object_fields(data_list[-1][key], field_list, idx_time) + return data_list + + @staticmethod + def filter_object_fields( + data_object: Union[Sensor, Meteorology], fields: list, index: np.ndarray + ) -> Union[Sensor, Meteorology]: + """Apply a filter index to all the fields in a given data object. + + Can be used for either a Sensor or Meteorology object. + + Args: + data_object (Union[Sensor, Meteorology]): sensor or meteorology object (corresponding to a single device) + for which fields are to be filtered. + fields (list): list of field names to be filtered. + index (np.ndarray): filter index. + + Returns: + Union[Sensor, Meteorology]: filtered data object. + + """ + return_object = deepcopy(data_object) + for field in fields: + if getattr(return_object, field) is not None: + setattr(return_object, field, getattr(return_object, field)[index]) + return return_object + + def interpolate_single_met_object(self, met_in_object: Meteorology) -> Meteorology: + """Interpolate a single Meteorology object onto the time grid of the class. + + Args: + met_in_object (Meteorology): Meteorology object to be interpolated onto the time grid of the class. + + Returns: + met_out_object (Meteorology): interpolated Meteorology object. + + """ + met_out_object = Meteorology() + time_out = None + for field in self.met_fields: + if (field != "time") and (getattr(met_in_object, field) is not None): + time_out, resampled_values = temporal_resampling( + met_in_object.time, + getattr(met_in_object, field), + self.time_bin_edges, + self.aggregate_function, + ) + setattr(met_out_object, field, resampled_values) + + if time_out is not None: + met_out_object.time = time_out + + return met_out_object diff --git a/src/pyelq/sensor/__init__.py b/src/pyelq/sensor/__init__.py new file mode 100644 index 0000000..731dcc4 --- /dev/null +++ b/src/pyelq/sensor/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Sensor Module.""" +__all__ = ["satellite", "beam", "sensor"] diff --git a/src/pyelq/sensor/beam.py b/src/pyelq/sensor/beam.py new file mode 100644 index 0000000..61c7519 --- /dev/null +++ b/src/pyelq/sensor/beam.py @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Beam module. + +Subclass of Sensor. Used for beam sensors + +""" + +from dataclasses import dataclass + +import numpy as np + +from pyelq.sensor.sensor import Sensor + + +@dataclass +class Beam(Sensor): + """Defines Beam sensor class. + + Location attribute from superclass is assumed to be a Coordinate class object containing 2 locations, the first of + the sensor and the second of the retro. + + Attributes: + n_beam_knots (int, optional): Number of beam knots to evaluate along a single beam + + """ + + n_beam_knots: int = 50 + + @property + def midpoint(self) -> np.ndarray: + """np.ndarray: Midpoint of the beam.""" + return np.mean(self.location.to_array(), axis=0) + + def make_beam_knots(self, ref_latitude, ref_longitude, ref_altitude=0) -> np.ndarray: + """Create beam knot locations. + + Creates beam knot locations based on location attribute and n_beam_knot attribute. + Results in an array of beam knot locations of shape [n_beam_knots x 3]. Have to provide a reference point in + order to create the beam knots in a local frame, spaced in meters + + Args: + ref_latitude (float): Reference latitude in degrees + ref_longitude (float): Reference longitude in degrees + ref_altitude (float, optional): Reference altitude in meters + + """ + temp_location = self.location.to_enu( + ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude + ).to_array() + beam_knot_array = np.linspace(temp_location[0, :], temp_location[1, :], num=self.n_beam_knots, endpoint=True) + return beam_knot_array diff --git a/src/pyelq/sensor/satellite.py b/src/pyelq/sensor/satellite.py new file mode 100644 index 0000000..a4a0808 --- /dev/null +++ b/src/pyelq/sensor/satellite.py @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Satellite module. + +Subclass of Sensor. Mainly used to accommodate satellite sensor TROPOMI. See: +http://www.tropomi.eu/data-products/methane +: http: //www.tropomi.eu/data-products/methane and http://www.tropomi.eu/data-products/nitrogen-dioxide + +""" + +from dataclasses import dataclass, field + +import numpy as np + +from pyelq.sensor.sensor import Sensor + + +@dataclass +class Satellite(Sensor): + """Defines Satellite sensor class. + + Attributes: + qa_value (np.ndarray, optional): Array containing quality values associated with the observations. + precision (np.ndarray, optional): Array containing precision values associated with the observations. + precision_kernel (np.ndarray, optional): Array containing precision kernel values associated with the + observations. + ground_pixel (np.ndarray, optional): Array containing ground pixels values associated with the observations. + Ground pixels are indicating the dimension perpendicular to the flight direction. + scanline (np.ndarray, optional): Array containing scanline values associated with the observations. + Scanlines are indicating the dimension in the direction of flight. + orbit (np.ndarray, optional): Array containing orbit values associated with the observations. + pixel_bounds (np.ndarray, optional): Array containing Polygon features which define the pixel bounds. + + """ + + qa_value: np.ndarray = field(init=False) + precision: np.ndarray = field(init=False) + precision_kernel: np.ndarray = field(init=False) + ground_pixel: np.ndarray = field(init=False) + scanline: np.ndarray = field(init=False) + orbit: np.ndarray = field(init=False, default=None) + pixel_bounds: np.ndarray = field(init=False) + + def get_orbits(self) -> np.ndarray: + """Gets the unique orbits which are present in the data. + + Raises: + ValueError: When orbits attribute is None + + Returns: + np.ndarray: Unique orbits present in the data. + + """ + if self.orbit is None: + raise ValueError("Orbits attribute is None") + return np.unique(self.orbit) diff --git a/src/pyelq/sensor/sensor.py b/src/pyelq/sensor/sensor.py new file mode 100644 index 0000000..f6a2ba8 --- /dev/null +++ b/src/pyelq/sensor/sensor.py @@ -0,0 +1,241 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Sensor module. + +The superclass for the sensor classes. This module provides the higher level Sensor and SensorGroup classes. The Sensor +class is a single sensor, the SensorGroup is a dictionary of Sensors. The SensorGroup class is created to deal with the +properties over all sensors together. + +""" + +from copy import deepcopy +from dataclasses import dataclass, field + +import numpy as np +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +from pandas.arrays import DatetimeArray + +from pyelq.coordinate_system import ECEF, ENU, LLA, Coordinate + + +@dataclass +class Sensor: + """Defines the properties and methods of the sensor class. + + Attributes: + label (str, optional): String label for sensor + time (pandas.arrays.DatetimeArray, optional): Array containing time values associated with concentration + reading + location (Coordinate, optional): Coordinate object specifying the observation locations + concentration (np.ndarray, optional): Array containing concentration values associated with time reading + source_on (np.ndarray, optional): Array of size nof_observations containing boolean values indicating + whether a source is on or off for each observation, i.e. we are assuming the sensor can/can't see a source + + """ + + label: str = field(init=False) + time: DatetimeArray = field(init=False, default=None) + location: Coordinate = field(init=False) + concentration: np.ndarray = field(default_factory=lambda: np.array([])) + source_on: np.ndarray = field(init=False, default=None) + + @property + def nof_observations(self) -> int: + """Int: Number of observations contained in concentration array.""" + return self.concentration.size + + def plot_sensor_location(self, fig: go.Figure(), color=None) -> go.Figure: + """Plotting the sensor location. + + Args: + fig (go.Figure): Plotly figure object to add the trace to + color (`optional`): When specified, the color to be used + + Returns: + fig (go.Figure): Plotly figure object with sensor location trace added to it + + """ + lla_object = self.location.to_lla() + + marker_dict = {"size": 10, "opacity": 0.8} + if color is not None: + marker_dict["color"] = color + + fig.add_trace( + go.Scattermapbox( + mode="markers+lines", + lat=np.array(lla_object.latitude), + lon=np.array(lla_object.longitude), + marker=marker_dict, + line={"width": 3}, + name=self.label, + ) + ) + return fig + + def plot_timeseries(self, fig: go.Figure(), color=None, mode: str = "markers") -> go.Figure: + """Timeseries plot of the sensor concentration observations. + + Args: + fig (go.Figure): Plotly figure object to add the trace to + color (`optional`): When specified, the color to be used + mode (str, optional): Mode used for plotting, i.e. markers, lines or markers+lines + + Returns: + fig (go.Figure): Plotly figure object with sensor concentration timeseries trace added to it + + """ + marker_dict = {"size": 5, "opacity": 1} + if color is not None: + marker_dict["color"] = color + + fig.add_trace( + go.Scatter( + x=self.time, + y=self.concentration.flatten(), + mode=mode, + marker=marker_dict, + name=self.label, + legendgroup=self.label, + ) + ) + + return fig + + +@dataclass +class SensorGroup(dict): + """A dictionary containing multiple Sensors. + + This class is used when we want to combine a collection of sensors and be able to store/access overall properties. + + Attributes: + color_map (list, optional): Default colormap to use for plotting + + """ + + color_map: list = field(default_factory=list, init=False) + + def __post_init__(self): + self.color_map = px.colors.qualitative.Pastel + + @property + def nof_observations(self) -> int: + """Int: The total number of observations across all the sensors.""" + return int(np.sum([sensor.nof_observations for sensor in self.values()], axis=None)) + + @property + def concentration(self) -> np.ndarray: + """np.ndarray: Column vector of concentration values across all sensors, unwrapped per sensor.""" + return np.concatenate([sensor.concentration.flatten() for sensor in self.values()], axis=0) + + @property + def time(self) -> pd.arrays.DatetimeArray: + """DatetimeArray: Column vector of time values across all sensors.""" + return pd.arrays.DatetimeArray(np.concatenate([sensor.time for sensor in self.values()])) + + @property + def location(self) -> Coordinate: + """Coordinate: Coordinate object containing observation locations from all sensors in the group.""" + location_object = deepcopy(list(self.values())[0].location) + if isinstance(location_object, ENU): + attr_list = ["east", "north", "up"] + elif isinstance(location_object, LLA): + attr_list = ["latitude", "longitude", "altitude"] + elif isinstance(location_object, ECEF): + attr_list = ["x", "y", "z"] + else: + raise TypeError( + f"Location object should be either ENU, LLA or ECEF, while currently it is{type(location_object)}" + ) + for attr in attr_list: + setattr( + location_object, + attr, + np.concatenate([np.array(getattr(sensor.location, attr), ndmin=1) for sensor in self.values()], axis=0), + ) + return location_object + + @property + def sensor_index(self) -> np.ndarray: + """np.ndarray: Column vector of integer indices linking concentration observation to a particular sensor.""" + return np.concatenate( + [np.ones(sensor.nof_observations, dtype=int) * i for i, sensor in enumerate(self.values())] + ) + + @property + def source_on(self) -> np.ndarray: + """Column vector of booleans indicating whether sources are expected to be on, unwrapped over sensors. + + Assumes source is on when None is specified for a specific sensor. + + Returns: + np.ndarray: Source on attribute, unwrapped over sensors. + + """ + overall_idx = np.array([]) + for curr_key in list(self.keys()): + if self[curr_key].source_on is None: + temp_idx = np.ones(self[curr_key].nof_observations).astype(bool) + else: + temp_idx = self[curr_key].source_on + + overall_idx = np.concatenate([overall_idx, temp_idx]) + return overall_idx.astype(bool) + + @property + def nof_sensors(self) -> int: + """Int: Number of sensors contained in the SensorGroup.""" + return len(self) + + def add_sensor(self, sensor: Sensor): + """Add a sensor to the SensorGroup.""" + self[sensor.label] = sensor + + def plot_sensor_location(self, fig: go.Figure, color_map: list = None) -> go.Figure: + """Plotting of the locations of all sensors in the SensorGroup. + + Args: + fig (go.Figure): Plotly figure object to add the trace to + color_map (list, optional): When specified, the colormap to be used, plotting will cycle through + the colors + + Returns: + fig (go.Figure): Plotly figure object with sensor location traces added to it + + """ + if color_map is None: + color_map = self.color_map + + for i, sensor in enumerate(self.values()): + color_idx = i % len(color_map) + fig = sensor.plot_sensor_location(fig, color=color_map[color_idx]) + + return fig + + def plot_timeseries(self, fig: go.Figure, color_map: list = None, mode: str = "markers") -> go.Figure: + """Plotting of the concentration timeseries of all sensors in the SensorGroup. + + Args: + fig (go.Figure): Plotly figure object to add the trace to + color_map (list, optional): When specified, the colormap to be used, plotting will cycle through + the colors + mode (str, optional): Mode used for plotting, i.e. markers, lines or markers+lines + + Returns: + fig (go.Figure): Plotly figure object with sensor concentration time series traces added to it + + """ + if color_map is None: + color_map = self.color_map + + for i, sensor in enumerate(self.values()): + color_idx = i % len(color_map) + fig = sensor.plot_timeseries(fig, color=color_map[color_idx], mode=mode) + + return fig diff --git a/src/pyelq/source_map.py b/src/pyelq/source_map.py new file mode 100644 index 0000000..368eaa6 --- /dev/null +++ b/src/pyelq/source_map.py @@ -0,0 +1,115 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""SourceMap module. + +The class for the source maps used in pyELQ + +""" +from dataclasses import dataclass, field +from typing import Union + +import numpy as np + +from pyelq.coordinate_system import Coordinate, make_latin_hypercube +from pyelq.sensor.sensor import Sensor + + +@dataclass +class SourceMap: + """Defines SourceMap class. + + Attributes: + location (Coordinate, optional): Coordinate object specifying the potential source locations + prior_value (np.ndarray, optional): Array with prior values for each source + inclusion_idx (np.ndarray, optional): Array of lists containing indices of the observations of a + corresponding sensor_object which are within the inclusion_radius of that particular source + inclusion_n_obs (list, optional): Array containing number of observations of a sensor_object within + radius for each source + + """ + + location: Coordinate = field(init=False, default=None) + prior_value: np.ndarray = None + inclusion_idx: np.ndarray = field(init=False, default=None) + inclusion_n_obs: np.ndarray = field(init=False, default=None) + + @property + def nof_sources(self) -> int: + """Number of sources.""" + if self.location is None: + return 0 + return self.location.nof_observations + + def calculate_inclusion_idx(self, sensor_object: Sensor, inclusion_radius: Union[int, np.ndarray]) -> None: + """Find observation indices which are within specified radius of each source location. + + This method takes the sensor object and for each source in the source_map object it calculates which + observations are within the specified radius. + When sensor_object location and sourcemap_object location are not of the same type, simply convert both to ECEF + and calculate inclusion indices accordingly. + The result is an array of lists which are the indices of the observations in sensor_object which are within the + specified radius. Result is stored in the corresponding attribute. + Also calculating number of observations in radius per source and storing result as a list in inclusion_n_obs + attribute + When a location attribute is in LLA we convert to ECEF for the inclusion radius to make sense + + Args: + sensor_object (Sensor): Sensor object containing location information on the observations under + consideration + inclusion_radius (Union[float, np.ndarray], optional): Inclusion radius in [m] radius from source + for which we take observations into account + + """ + sensor_kd_tree = sensor_object.location.to_ecef().create_tree() + source_points = self.location.to_ecef().to_array() + + inclusion_idx = sensor_kd_tree.query_ball_point(source_points, inclusion_radius) + idx_array = np.array(inclusion_idx, dtype=object) + self.inclusion_idx = idx_array + self.inclusion_n_obs = np.array([len(value) for value in self.inclusion_idx]) + + def generate_sources( + self, + coordinate_object: Coordinate, + sourcemap_limits: np.ndarray, + sourcemap_type: str = "central", + nof_sources: int = 5, + grid_shape: Union[tuple, np.ndarray] = (5, 5, 1), + ) -> None: + """Generates source locations based on specified inputs. + + The result gets stored in the location attribute + + In grid_sphere we scale the latitude and longitude from -90/90 and -180/180 to 0/1 for the use in temp_lat_rad + and temp_lon_rad + + Args: + coordinate_object (Coordinate): Empty coordinate object which specifies the coordinate class to populate + location with + sourcemap_limits (np.ndarray): Limits of the sourcemap on which to generate the sources of size [dim x 2] + if dim == 2 we assume the third dimension will be zeros. Assuming the units of the limits are defined in + the desired coordinate system + sourcemap_type (str, optional): Type of sourcemap to generate: central == 1 central source, + hypercube == nof_sources through a Latin Hypercube design, grid == grid of shape grid_shape + filled with sources, grid_sphere == grid of shape grid_shape taking into account a spherical spacing + nof_sources (int, optional): Number of sources to generate (used in 'hypercube' case) + grid_shape: (tuple, optional): Number of sources to generate in each dimension, total number of + sources will be the product of the entries of this tuple (used in 'grid' and 'grid_sphere' case) + + """ + sourcemap_dimension = sourcemap_limits.shape[0] + if sourcemap_type == "central": + array = sourcemap_limits.mean(axis=1).reshape(1, sourcemap_dimension) + elif sourcemap_type == "hypercube": + array = make_latin_hypercube(bounds=sourcemap_limits, nof_samples=nof_sources) + elif sourcemap_type == "grid": + array = coordinate_object.make_grid(bounds=sourcemap_limits, grid_type="rectangular", shape=grid_shape) + elif sourcemap_type == "grid_sphere": + array = coordinate_object.make_grid(bounds=sourcemap_limits, grid_type="spherical", shape=grid_shape) + else: + raise NotImplementedError("Please provide a valid sourcemap type") + coordinate_object.from_array(array=array) + self.location = coordinate_object diff --git a/src/pyelq/support_functions/__init__.py b/src/pyelq/support_functions/__init__.py new file mode 100644 index 0000000..043f324 --- /dev/null +++ b/src/pyelq/support_functions/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Support Functions Module.""" +__all__ = ["spatio_temporal_interpolation"] diff --git a/src/pyelq/support_functions/spatio_temporal_interpolation.py b/src/pyelq/support_functions/spatio_temporal_interpolation.py new file mode 100644 index 0000000..f0fa58f --- /dev/null +++ b/src/pyelq/support_functions/spatio_temporal_interpolation.py @@ -0,0 +1,229 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Spatio-temporal interpolation module. + +Support function to perform interpolation in various ways + +""" +import warnings +from typing import Tuple, Union + +import numpy as np +import pandas as pd +from scipy.interpolate import griddata + + +def interpolate( + location_in: np.ndarray = None, + time_in: Union[np.ndarray, pd.arrays.DatetimeArray] = None, + values_in: np.ndarray = None, + location_out: np.ndarray = None, + time_out: Union[np.ndarray, pd.arrays.DatetimeArray] = None, + **kwargs, +) -> np.ndarray: + """Interpolates data based on input. + + Interpolation using scipy.griddata function. Which in turn uses linear barycentric interpolation. + + It is assumed that the shape of location_in, time_in and values_in is consistent + + When time_out has the same size as number of rows of location_out, it is assumed these are aligned and be treated as + consistent, hence the output will be a column vector. + If this is not the case an interpolation will be performed for all combinations of rows in location out with times + of time_out and output wil be shaped as [nof_location_values x dimension] + + If location_out == None, we only perform temporal (1D) interpolation. + If time_out == None we only perform spatial interpolation + + If linear interpolation is not possible for spatio or spatiotemporal interpolation, we use nearest neighbor + interpolation, a warning will be displayed + + Args: + location_in (np.ndarray): Array of size [nof_values x dimension] with locations to interpolate from + time_in (Union[np.ndarray, pd.arrays.DatetimeArray]): Array of size [nof_values x 1] with timestamps or some + form of time values (seconds) to interpolate from + values_in (np.ndarray): Array of size [nof_values x 1] with values to interpolate from + location_out (np.ndarray): Array of size [nof_location_values x dimension] with locations to interpolate to + time_out (Union[np.ndarray, pd.arrays.DatetimeArray]): Array of size [nof_time_values x 1] with + timestamps or some form of time values (seconds) to interpolate to + **kwargs (dict): Other keyword arguments which get passed into the griddata interpolation function + + Returns: + result (np.ndarray): Array of size [nof_location_values x nof_time_values] with interpolated values + + """ + _sense_check_interpolate_inputs( + location_in=location_in, time_in=time_in, values_in=values_in, location_out=location_out, time_out=time_out + ) + + if ( + time_out is not None + and isinstance(time_out, pd.arrays.DatetimeArray) + and isinstance(time_in, pd.arrays.DatetimeArray) + ): + min_time_out = np.amin(time_out) + time_out = (time_out - min_time_out).total_seconds() + time_in = (time_in - min_time_out).total_seconds() + + if location_out is None: + return _griddata(points_in=time_in, values=values_in, points_out=time_out, **kwargs) + + if time_out is None: + return _griddata(points_in=location_in, values=values_in, points_out=location_out, **kwargs) + + if location_in.shape[0] != time_in.size: + raise ValueError("Location and time are do not have consistent sizes") + + if location_out.shape[0] != time_out.size: + location_temp = np.tile(location_out, (time_out.size, 1)) + time_temp = np.repeat(time_out.squeeze(), location_out.shape[0]) + out_array = np.column_stack((location_temp, time_temp)) + else: + out_array = np.column_stack((location_out, time_out)) + + in_array = np.column_stack((location_in, time_in)) + + result = _griddata(points_in=in_array, values=values_in, points_out=out_array, **kwargs) + + if location_out.shape[0] != time_out.size: + result = result.reshape((location_out.shape[0], time_out.size), order="C") + + return result + + +def _sense_check_interpolate_inputs( + location_in: np.ndarray, + time_in: Union[np.ndarray, pd.arrays.DatetimeArray], + values_in: np.ndarray, + location_out: np.ndarray, + time_out: Union[np.ndarray, pd.arrays.DatetimeArray], +): + """Helper function to sense check inputs and raise errors when applicable. + + Args: + location_in (np.ndarray): Array of size [nof_values x dimension] with locations to interpolate from + time_in (Union[np.ndarray, pd.arrays.DatetimeArray]): Array of size [nof_values x 1] with timestamps or some + form of time values (seconds) to interpolate from + values_in (np.ndarray): Array of size [nof_values x 1] with values to interpolate from + location_out (np.ndarray): Array of size [nof_location_values x dimension] with locations to interpolate to + time_out (Union[np.ndarray, pd.arrays.DatetimeArray]): Array of size [nof_time_values x 1] with + timestamps or some form of time values (seconds) to interpolate to + + Raises: + ValueError: When inputs do not match up. + + """ + if location_out is not None and location_in is None: + raise ValueError("Cannot specify output location without input location") + if time_out is not None and time_in is None: + raise ValueError("Cannot specify output time without input time") + if values_in is None: + raise ValueError("Must provide values_in") + if location_out is None and time_out is None: + raise ValueError("location_out or time_out not specified. Need to specify somewhere to interpolate to") + + +def _griddata(points_in: np.ndarray, values: np.ndarray, points_out: np.ndarray, **kwargs): + """Wrapped function to handle special cases around the gridded interpolate. + + Will try nearest neighbour method when few enough points that spatial cases fail. + + Syntax like scipy.griddata + + Args: + points_in (np.ndarray): 2-D ndarray of floats with shape (n, D), or length D tuple of 1-D + nd-arrays with shape (n,). Data point coordinates. + values (np.ndarray): _ndarray of float or complex, shape (n,). Data values + points_out (np.ndarray): 2-D ndarray of floats with shape (m, D), or length D tuple of nd-arrays + broadcastable to the same shape. Points at which to interpolate data. + + Returns: + ndarray: Array of interpolated values. + + """ + if values.size == 1: + return np.ones((points_out.shape[0], 1)) * values + + try: + return griddata(points=points_in, values=values.flatten(), xi=points_out, **kwargs) + except RuntimeError: + warnings.warn( + "Warning linear interpolation did not succeed, most likely too few input points (<5)," + "trying again with method==nearest" + ) + if "method" in kwargs: + del kwargs["method"] + return griddata(points=points_in, values=values, xi=points_out, method="nearest", **kwargs) + + +def temporal_resampling( + time_in: Union[np.ndarray, pd.arrays.DatetimeArray], + values_in: np.ndarray, + time_bin_edges: Union[np.ndarray, pd.arrays.DatetimeArray], + aggregate_function: str = "mean", + side: str = "center", +) -> Tuple[Union[np.ndarray, pd.arrays.DatetimeArray], np.ndarray]: + """Resamples data into a set of time bins. + + Checks which values of time_in are withing 2 consecutive values of time_bin_edges and performs the aggregate + function on the corresponding values from values_in. time_in values outside the time_bin_edges are ignored. + Empty bins will be assigned a 'NaN' value. + + When 'time_in' is a sequence of time stamps, a DatetimeArray should be used. Otherwise, a np.ndarray should be used. + + Args: + time_in (Union[np.ndarray, pd.arrays.DatetimeArray]): A vector of times which correspond to values_in. + values_in (np.ndarray): A vector of the values to be resampled. + time_bin_edges (Union[np.ndarray, pd.arrays.DatetimeArray]): A vector of times which define the edges of the + bins into which the data will be resampled. + aggregate_function (str, optional): The function which is used to aggregate the data after it has been + sorted into bins. Defaults to mean. + side (str, optional): Which side of the time bins should be used to generate times_out. Possible values are: + 'left', 'center', and 'right'. Defaults to 'center'. + + Returns: + time_out (Union[np.ndarray, pd.arrays.DatetimeArray]): Vector-like object containing the times of the resampled + values consistent with time_in dtype and side input + argument. + values_out (np.ndarray): A vector of resampled values, according to the time bins and the aggregate function. + + Raises: + ValueError: If any of the input arguments are not of the correct type or shape, this error is raised. + + """ + if not isinstance(time_bin_edges, type(time_in)) or values_in.size != time_in.size: + raise ValueError("Arguments 'time_in', 'time_bin_edges' and/or 'values_in' are not of consistent type or size.") + + if not isinstance(aggregate_function, str): + raise ValueError("The supplied 'aggregate_function' is not a string.") + + if side == "center": + time_out = np.diff(time_bin_edges) / 2 + time_bin_edges[:-1] + elif side == "left": + time_out = time_bin_edges[:-1] + elif side == "right": + time_out = time_bin_edges[1:] + else: + raise ValueError(f"The 'side' argument must be 'left', 'center', or 'right', but received '{side}'.") + + zero_value = 0 + if isinstance(time_bin_edges, pd.arrays.DatetimeArray): + zero_value = np.array(0).astype(" zero_value): + raise ValueError("Argument 'time_bin_edges' does not monotonically increase.") + + if np.any(time_in < time_bin_edges[0]) or np.any(time_in > time_bin_edges[-1]): + warnings.warn("Values in time_in are outside of range of time_bin_edges. These values will be ignored.") + + index = np.searchsorted(time_bin_edges, time_in, side="left") + grouped_vals = pd.Series(values_in).groupby(index).agg(aggregate_function) + grouped_vals = grouped_vals.drop(index=[0, time_bin_edges.size], errors="ignore").sort_index() + + values_out = np.full(time_out.shape, np.nan) + values_out[grouped_vals.index - 1] = grouped_vals.to_numpy() + + return time_out, values_out diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..24163f1 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Main test module.""" +__all__ = [ + "component", + "sensor", + "support_functions", + "conftest", + "test_coordinate_system", + "test_dlm", + "test_gas_species", + "test_gaussian_plume", + "test_meteorology", + "test_preprocessing", + "test_source_map", +] diff --git a/tests/component/__init__.py b/tests/component/__init__.py new file mode 100644 index 0000000..c593460 --- /dev/null +++ b/tests/component/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Component test module.""" +__all__ = ["test_background_model", "test_error_model", "test_offset_model", "test_source_model"] diff --git a/tests/component/test_background_model.py b/tests/component/test_background_model.py new file mode 100644 index 0000000..bd43e1d --- /dev/null +++ b/tests/component/test_background_model.py @@ -0,0 +1,72 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for background models.""" + +import numpy as np +import pytest +from openmcmc.distribution.distribution import Gamma +from openmcmc.distribution.location_scale import Normal +from openmcmc.model import Model as mcmcModel +from openmcmc.sampler.sampler import NormalGamma, NormalNormal +from scipy import sparse + +from pyelq.component.background import SpatioTemporalBackground, TemporalBackground +from tests.conftest import initialise_sampler + + +@pytest.fixture( + name="background_model", + params=[TemporalBackground(), SpatioTemporalBackground()], + ids=["Temporal", "Spatiotemporal"], +) +def fix_background_model(request, sensor_group, met_group, gas_species): + """Fix the background models to be tested.""" + background_model = request.param + background_model.update_precision = True + background_model.initialise(sensor_object=sensor_group, meteorology=met_group, gas_species=gas_species) + if isinstance(background_model, SpatioTemporalBackground): + background_model.spatial_dependence = True + return background_model + + +def test_background_init(background_model): + """Check that the background object initialises with properties that make sense.""" + assert np.allclose(np.sum(background_model.basis_matrix, axis=1), np.ones(background_model.n_obs)) + + +def test_make_state(background_model, sensor_group): + """Check that the state is constructed properly.""" + state = background_model.make_state(state={}) + n_param = background_model.n_parameter + n_obs = sensor_group.nof_observations + + assert state["B_bg"].shape == (n_obs, n_param) + assert sparse.issparse(state["B_bg"]) + + assert state["P_bg"].shape == (n_param, n_param) + if n_param > 1: + assert sparse.issparse(state["P_bg"]) + else: + assert isinstance(state["P_bg"], np.ndarray) + + assert state["bg"].shape == (n_param, 1) + assert state["mu_bg"].shape == (n_param, 1) + + +def test_make_model(background_model): + """Check that the model is constructed as expected.""" + model = mcmcModel(background_model.make_model(model=[])) + + assert isinstance(model["bg"], Normal) + if background_model.update_precision: + assert isinstance(model["lambda_bg"], Gamma) + + +def test_make_sampler(background_model): + """Check that the sampler is constructed as expected.""" + sampler_object = initialise_sampler(background_model) + assert isinstance(sampler_object[0], NormalNormal) + if background_model.update_precision: + assert isinstance(sampler_object[1], NormalGamma) diff --git a/tests/component/test_error_model.py b/tests/component/test_error_model.py new file mode 100644 index 0000000..3acac31 --- /dev/null +++ b/tests/component/test_error_model.py @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for the error model.""" + +import numpy as np +import pytest +from openmcmc import parameter +from openmcmc.distribution.distribution import Gamma +from openmcmc.distribution.location_scale import Normal +from openmcmc.model import Model as mcmcModel +from openmcmc.sampler.sampler import NormalGamma +from scipy import sparse + +from pyelq.component.error_model import ByRelease, BySensor, ErrorModel +from pyelq.sensor.sensor import SensorGroup + + +@pytest.fixture(name="error_model", params=[BySensor, ByRelease], ids=["BySensor", "ByRelease"]) +def fix_error_model(request, sensor_group, met_group, gas_species): + """Set up the error model based on the sensor, met and gas fixtures.""" + call_fun = request.param + error_model = call_fun() + error_model.initialise(sensor_object=sensor_group, meteorology=met_group, gas_species=gas_species) + return error_model + + +def test_make_state(error_model: ErrorModel, sensor_group: SensorGroup): + """Test the initialisation of the state. + + Tests: + 1) that the parameters are the correct size for the particular type of error model. + 2) that the assignment index has one element for every observation. + 3) that every precision parameter in the set is used at least once. + + """ + state = error_model.make_state(state=None) + n_obs = sensor_group.nof_observations + n_sensor = len(sensor_group) + if isinstance(error_model, BySensor): + n_param = n_sensor + elif isinstance(error_model, ByRelease): + n_param = 2 * n_sensor + + assert state["tau"].shape == (n_param,) + assert state["a_tau"].shape == (n_param,) + assert state["b_tau"].shape == (n_param,) + assert state["precision_index"].shape == (n_obs,) + if n_obs > n_sensor: + assert np.allclose(np.unique(state["precision_index"]), np.arange(n_param)) + + +def test_make_model(error_model: ErrorModel): + """Test the construction of the model object. + + Tests: + 1) that a Gamma distribution is added to the model with the correct parameters + 2) that the predictor for the data distribution has the correct form. + + """ + model = error_model.make_model(model=None) + assert isinstance(model[0], Gamma) + assert model[0].response == "tau" + + assert isinstance(error_model.precision_parameter, parameter.MixtureParameterMatrix) + + +def test_precision_predictor(error_model: ErrorModel): + """Test that the precision predictor gives expected values. + + Tests: + 1) that the precision predictor gives a sparse diagonal matrix with expected values on the diagonal. + 2) that when we assign parameter index to state["tau"], we recover the precision index on the diagonal. + + """ + state = error_model.make_state(state=None) + state["tau"] = np.arange(state["tau"].shape[0]) + precision_matrix = error_model.precision_parameter.predictor(state) + + assert sparse.issparse(precision_matrix) + assert np.array_equal(precision_matrix.toarray(), np.diag(np.diag(precision_matrix.toarray()))) + + assert np.allclose(np.diag(precision_matrix.toarray()), error_model.precision_index) + + +def test_make_sampler(error_model: ErrorModel): + """Test the construction of the sampler object. + + Tests: + 1) that the sampler object created for the precisions is of NormalGamma type. + + """ + model = [Normal(response="y", mean="mu", precision=error_model.precision_parameter)] + model = error_model.make_model(model=model) + sampler_object = error_model.make_sampler(model=mcmcModel(model), sampler_list=None) + assert isinstance(sampler_object[0], NormalGamma) diff --git a/tests/component/test_offset_model.py b/tests/component/test_offset_model.py new file mode 100644 index 0000000..b783cda --- /dev/null +++ b/tests/component/test_offset_model.py @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for the offset model.""" + +import numpy as np +import pytest +from openmcmc.distribution.location_scale import Normal +from openmcmc.sampler.sampler import NormalNormal +from scipy import sparse + +from pyelq.component.offset import PerSensor +from pyelq.sensor.sensor import SensorGroup +from tests.conftest import initialise_sampler + + +@pytest.fixture(name="offset_model", params=[PerSensor()], ids=["PerSensor"]) +def fix_offset_model(request, sensor_group, met_group, gas_species): + """Set up the specific error model, based on sensor, met and gas fixtures.""" + offset_model = request.param + offset_model.update_precision = True + offset_model.initialise(sensor_object=sensor_group, meteorology=met_group, gas_species=gas_species) + return offset_model + + +def test_make_state(offset_model: PerSensor, sensor_group: SensorGroup): + """Test the function which initialises the state. + + Tests: + 1) that the parameters have the correct size. + 2) that the basis and precision matrix are both sparse. + 3) that the allocation basis allocates each parameter to the correct number of observations. + + """ + state = offset_model.make_state(state=None) + n_obs = sensor_group.nof_observations + n_sensor = len(sensor_group) + n_param = n_sensor - 1 + + assert state["mu_d"].shape == (n_param, 1) + assert state["B_d"].shape == (n_obs, n_param) + assert state["P_d"].shape == (n_param, n_param) + assert isinstance(state["B_d"], sparse.csc_matrix) + assert isinstance(state["P_d"], sparse.csc_matrix) + assert isinstance(state["lambda_d"], float) + if offset_model.update_precision: + assert isinstance(state["a_lam_d"], float) + assert isinstance(state["b_lam_d"], float) + + sum_basis = np.array(np.sum(state["B_d"], axis=0)).flatten() + for k, sns in enumerate(sensor_group.values()): + if k > 0: + assert sns.nof_observations == sum_basis[k - 1] + + +def test_make_model(offset_model: PerSensor): + """Test the construction of the model object. + + Tests: + 1) that a normal distribution is added to the model with the correct parameters. + + """ + model = offset_model.make_model(model=None) + assert isinstance(model[0], Normal) + assert model[0].response == "d" + + +def test_make_sampler(offset_model: PerSensor): + """Test the construction of the sampler object. + + Tests: + 1) that the sampler is a conjugate NormalNormal object. + + """ + sampler_object = initialise_sampler(offset_model) + assert isinstance(sampler_object[0], NormalNormal) diff --git a/tests/component/test_source_model.py b/tests/component/test_source_model.py new file mode 100644 index 0000000..1e0507e --- /dev/null +++ b/tests/component/test_source_model.py @@ -0,0 +1,209 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for the source model.""" + +from copy import deepcopy + +import numpy as np +import pytest +from openmcmc import parameter +from openmcmc.distribution.distribution import Gamma +from openmcmc.distribution.location_scale import Normal as mcmcNormal +from openmcmc.sampler.sampler import NormalNormal + +from pyelq.component.source_model import Normal, NormalSlabAndSpike, SourceModel +from tests.conftest import initialise_sampler + + +@pytest.fixture( + name="source_model", + params=[(Normal, False), (NormalSlabAndSpike, False), (Normal, True), (NormalSlabAndSpike, True)], + ids=["Normal", "Normal_SlabAndSpike", "Normal_RJ", "Normal_SlabAndSpike_RJ"], +) +def fix_source_model(request, sensor_group, met_group, gas_species, dispersion_model, site_limits): + """Set up the source model based on all previous fixtures.""" + call_fun, rj_flag = request.param + source_model = call_fun() + source_model.dispersion_model = dispersion_model + source_model.update_precision = True + source_model.reversible_jump = rj_flag + source_model.site_limits = site_limits + source_model.initialise(sensor_object=sensor_group, meteorology=met_group, gas_species=gas_species) + return source_model + + +@pytest.fixture(name="fix_coupling_matrix") +def fix_coupling_matrix(monkeypatch): + """Mock source_model.update_coupling_column to simply return a column of ones.""" + + def mock_update_coupling_column(self, state, update_column): + state["A"][:, update_column] = np.ones((state["A"].shape[0],)) + return state + + monkeypatch.setattr(SourceModel, "update_coupling_column", mock_update_coupling_column) + + +def test_make_state(source_model, sensor_group): + """Test the make_state() function. + + Tests that the parameters stored in the state have the correct sizes based on the inputs. + + """ + state = source_model.make_state(state={}) + n_obs = sensor_group.nof_observations + n_source = source_model.dispersion_model.source_map.nof_sources + + assert state["A"].shape == (n_obs, n_source) + assert state["s"].shape == (n_source, 1) + assert state["alloc_s"].shape == (n_source, 1) + if isinstance(source_model, NormalSlabAndSpike): + prior_param_shape = (2, 1) + elif isinstance(source_model, Normal): + prior_param_shape = (1,) + assert state["lambda_s"].shape == prior_param_shape + assert state["mu_s"].shape == prior_param_shape + + +def test_make_model(source_model): + """Test the make_model() function. + + Tests the following aspects of the model + + """ + model = source_model.make_model(model=[]) + + if isinstance(source_model, Normal): + assert model[0].response == "s" + assert isinstance(model[0], mcmcNormal) + assert isinstance(model[0].mean, parameter.MixtureParameterVector) + assert isinstance(model[0].precision, parameter.MixtureParameterMatrix) + elif isinstance(source_model, NormalSlabAndSpike): + assert model[1].response == "s" + assert isinstance(model[1], mcmcNormal) + assert isinstance(model[1].mean, parameter.MixtureParameterVector) + assert isinstance(model[1].precision, parameter.MixtureParameterMatrix) + if source_model.update_precision: + if source_model.reversible_jump: + assert model[-3].response == "lambda_s" + assert isinstance(model[-3], Gamma) + else: + assert model[-1].response == "lambda_s" + assert isinstance(model[-1], Gamma) + + +def test_make_sampler(source_model): + """Test the construction of the sampler.""" + sampler_object = initialise_sampler(source_model) + assert isinstance(sampler_object[0], NormalNormal) + + +def test_birth_function(source_model): + """Test the birth_function implementation, and some aspects of the reversible jump sampler. + + Runs source_model.birth_function on the initialised state and + checks the following: + 1. That the coupling matrix in the proposed state has one + additional column. + 2. That the new coupling column has been appended to the + right-hand side of the matrix. + 3. That log(p(current|proposed)) is 0 after this step. + + Following this, runs ReversibleJump.matched_birth_transition and + checks the following properties of the result: + 4. That there is one extra element appended to the source + vector in the state. + 5. That the existing elements of the source vector in the + state remain unchanged. + + """ + if not source_model.reversible_jump: + return + current_state = source_model.make_state(state={}) + current_state["A"] = np.random.random_sample(size=current_state["A"].shape) + + prop_state = deepcopy(current_state) + prop_state["n_src"] = current_state["n_src"] + 1 + prop_state["z_src"] = np.concatenate((prop_state["z_src"], np.zeros((3, 1))), axis=1) + + prop_state, logp_pr_g_cr, logp_cr_g_pr = source_model.birth_function(current_state, prop_state) + + assert prop_state["A"].shape[1] == (current_state["A"].shape[1] + 1) + assert np.allclose(prop_state["A"][:, :-1], current_state["A"]) + assert logp_cr_g_pr == 0 + + sampler_object = initialise_sampler(source_model) + prop_state, logp_pr_g_cr, logp_cr_g_pr = sampler_object[-1].matched_birth_transition( + current_state, prop_state, logp_pr_g_cr, logp_cr_g_pr + ) + + assert prop_state["s"].shape[0] == (current_state["s"].shape[0] + 1) + assert np.allclose(prop_state["s"][:-1], current_state["s"]) + + +def test_death_function(source_model): + """Test the death_function implementation, and some aspects of the reversible jump sampler. + + Performs the equivalent checks as in the birth case, adapted for the death move. + + """ + if not source_model.reversible_jump: + return + current_state = source_model.make_state(state={}) + if current_state["n_src"] == 0: + return + current_state["A"] = np.random.random_sample(size=current_state["A"].shape) + + prop_state = deepcopy(current_state) + prop_state["n_src"] = current_state["n_src"] - 1 + deletion_index = np.random.randint(low=0, high=current_state["n_src"]) + prop_state["z_src"] = np.delete(prop_state["z_src"], obj=deletion_index, axis=1) + + prop_state, logp_pr_g_cr, logp_cr_g_pr = source_model.death_function(current_state, prop_state, deletion_index) + + assert prop_state["A"].shape[1] == (current_state["A"].shape[1] - 1) + assert np.allclose(prop_state["A"], np.delete(current_state["A"], obj=deletion_index, axis=1)) + assert logp_pr_g_cr == 0 + + sampler_object = initialise_sampler(source_model) + prop_state, logp_pr_g_cr, logp_cr_g_pr = sampler_object[-1].matched_death_transition( + current_state, prop_state, logp_pr_g_cr, logp_cr_g_pr, deletion_index + ) + + assert prop_state["s"].shape[0] == (current_state["s"].shape[0] - 1) + assert np.allclose(np.delete(current_state["s"], obj=deletion_index, axis=0), prop_state["s"]) + + +def test_move_function(source_model): + """Test the move_function, which updates the coupling matrix after a source is relocated by the sampler. + + The source_model.update_coupling_function is mocked so that it always + returns a column of ones. + + Checks the following: + 1. That the size of the coupling matrix is the same before and + after the move. + 2. That the other elements of the coupling matrix are unchanged + by the move_function. + 3. That the column of the coupling matrix corresponding to the + relocated source has changed + + """ + if not source_model.reversible_jump: + return + current_state = source_model.make_state(state={}) + if current_state["n_src"] == 0: + return + current_state["A"] = np.random.random_sample(size=current_state["A"].shape) + + prop_state = deepcopy(current_state) + move_index = np.random.randint(low=0, high=current_state["n_src"]) + prop_state["z_src"][:, move_index] = np.zeros((3,)) + prop_state = source_model.move_function(prop_state, update_column=move_index) + + assert prop_state["A"].shape == current_state["A"].shape + assert np.allclose( + np.delete(current_state["A"], obj=move_index, axis=1), np.delete(prop_state["A"], obj=move_index, axis=1) + ) + assert np.logical_not(np.allclose(current_state["A"][:, move_index], prop_state["A"][:, move_index])) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..bb1cc85 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,133 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Generic fixtures that can be used for any component tests.""" + +from datetime import datetime, timedelta + +import numpy as np +import pandas as pd +import pytest +from openmcmc.model import Model + +from pyelq.coordinate_system import ENU +from pyelq.dispersion_model.gaussian_plume import GaussianPlume +from pyelq.gas_species import CH4 +from pyelq.meteorology import Meteorology, MeteorologyGroup +from pyelq.sensor.beam import Beam +from pyelq.sensor.sensor import Sensor, SensorGroup +from pyelq.source_map import SourceMap + + +@pytest.fixture(name="ref_longitude", scope="module") +def fix_ref_longitude(): + """Fix the reference longitude to be used in the tests.""" + return 0.0 + + +@pytest.fixture(name="ref_latitude", scope="module") +def fix_ref_latitude(): + """Fix the reference latitude to be used in the tests.""" + return 0.0 + + +@pytest.fixture(name="site_limits", scope="module") +def fix_site_limits(): + """Fix the site limits to be used in the tests.""" + return np.array([[-100, 100], [-100, 100], [0, 5]]) + + +@pytest.fixture( + params=[(1, 1), (1, 3), (100, 1), (100, 3)], + ids=["1_1", "1_3", "100_1", "100_3"], + name="sensor_group", + scope="module", +) +def fix_sensor_group(request, ref_longitude, ref_latitude): + """Create sensor fixture. + + We add n_sensor-1 sensors to the sensor group, and one Beam sensor to make sure we cover both cases. + + """ + [n_time, n_sensor] = request.param + locations = np.concatenate( + (100 * np.random.random_sample(size=(n_sensor, 1)), 100 * np.random.random_sample(size=(n_sensor, 1))), axis=1 + ) + + sensor = SensorGroup() + for k in range(n_sensor - 1): + device_name = "device_" + str(k) + sensor[device_name] = Sensor() + sensor[device_name].time = pd.arrays.DatetimeArray( + pd.date_range(start=datetime.now(), end=datetime.now() + timedelta(hours=1.0), periods=n_time) + ) + sensor[device_name].concentration = np.random.random_sample(size=(n_time,)) + sensor[device_name].location = ENU( + east=locations[k, 0], + north=locations[k, 1], + up=5.0, + ref_longitude=ref_longitude, + ref_latitude=ref_latitude, + ref_altitude=0.0, + ).to_lla() + sensor[device_name].source_on = np.random.choice(a=[False, True], size=(n_time,), p=[0.5, 0.5]) + + k = n_sensor - 1 + device_name = "device_" + str(k) + sensor[device_name] = Beam() + sensor[device_name].time = pd.arrays.DatetimeArray( + pd.date_range(start=datetime.now(), end=datetime.now() + timedelta(hours=1.0), periods=n_time) + ) + sensor[device_name].concentration = np.random.random_sample(size=(n_time,)) + sensor[device_name].location = ENU( + east=np.array([0, locations[k, 0]]), + north=np.array([0, locations[k, 1]]), + up=np.array([5.0, 5.0]), + ref_longitude=ref_longitude, + ref_latitude=ref_latitude, + ref_altitude=0.0, + ).to_lla() + sensor[device_name].source_on = np.random.choice(a=[False, True], size=(n_time,), p=[0.5, 0.5]) + return sensor + + +@pytest.fixture(name="met_group", scope="module") +def fix_met_group(sensor_group): + """Create meteorology fixture.""" + met_group = MeteorologyGroup() + for name, sns in sensor_group.items(): + met_group[name] = Meteorology() + met_group[name].time = sns.time + met_group[name].wind_speed = 2.0 + 3.0 * np.random.random_sample(size=met_group[name].time.shape) + met_group[name].wind_direction = 360.0 * np.random.random_sample(size=met_group[name].time.shape) + met_group[name].wind_turbulence_horizontal = 10.0 * np.ones(shape=met_group[name].time.shape) + met_group[name].wind_turbulence_vertical = 10.0 * np.ones(shape=met_group[name].time.shape) + met_group[name].temperature = 293.0 * np.ones(shape=met_group[name].time.shape) + met_group[name].pressure = 101.0 * np.ones(shape=met_group[name].time.shape) + met_group[name].calculate_uv_from_wind_speed_direction() + return met_group + + +@pytest.fixture(name="gas_species", scope="module") +def fix_gas_species(): + """Create gas species fixture.""" + return CH4() + + +@pytest.fixture(name="dispersion_model", scope="module") +def fix_dispersion_model(ref_longitude, ref_latitude, site_limits): + """Set up the dispersion model.""" + source_map = SourceMap() + coordinate_object = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=0.0) + source_map.generate_sources( + coordinate_object=coordinate_object, sourcemap_limits=site_limits, sourcemap_type="hypercube" + ) + dispersion_model = GaussianPlume(source_map=source_map) + return dispersion_model + + +def initialise_sampler(component): + """Helper function to initialise the sampler for any given component.""" + model = component.make_model(model=[]) + sampler_object = component.make_sampler(model=Model(model), sampler_list=None) + return sampler_object diff --git a/tests/sensor/__init__.py b/tests/sensor/__init__.py new file mode 100644 index 0000000..64cf821 --- /dev/null +++ b/tests/sensor/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Test module for Sensor code.""" +__all__ = ["test_beam", "test_satellite", "test_sensor", "test_sensorgroup"] diff --git a/tests/sensor/test_beam.py b/tests/sensor/test_beam.py new file mode 100644 index 0000000..5e2abe1 --- /dev/null +++ b/tests/sensor/test_beam.py @@ -0,0 +1,63 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for beam sensor class. + +This module provides tests for the beam sensor class in pyELQ. + +""" +import numpy as np + +from pyelq.coordinate_system import ENU +from pyelq.sensor.beam import Beam + + +def test_midpoint(): + """Basic test to check if midpoint is working correctly. + + Setting the absolute tolerance to 1e-6 because of rounding errors in the conversion + + """ + + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + location.east = np.array([-1, 1]) + location.north = np.array([-1, 1]) + location.up = np.array([-1, 1]) + midpoint = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + midpoint.from_array(np.array([[0, 0, 0]])) + + sensor = Beam() + sensor.location = location + test_enu = sensor.midpoint + assert np.allclose(test_enu, midpoint.to_array()) + sensor.location = location.to_lla() + test_lla = sensor.midpoint + assert np.allclose(test_lla, midpoint.to_lla().to_array(), atol=1e-06) + sensor.location = location.to_ecef() + test_ecef = sensor.midpoint + assert np.allclose(test_ecef, midpoint.to_ecef().to_array()) + + +def test_make_beam_knots(): + """Basic test to check if make_beam_knots is working correctly. + + Checking all beam locations are inside bounding box and number of points correct. As well ass if they are linearly + spaced + + """ + sensor = Beam() + sensor.location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + sensor.location.east = np.array([-1, 1]) + sensor.location.north = np.array([-2, 0]) + sensor.location.up = np.array([0, 2]) + beam_knot_array = sensor.make_beam_knots(ref_latitude=0, ref_longitude=0, ref_altitude=0) + assert np.all(beam_knot_array[:, 0] >= sensor.location.east[0]) + assert np.all(beam_knot_array[:, 0] <= sensor.location.east[1]) + assert np.all(beam_knot_array[:, 1] >= sensor.location.north[0]) + assert np.all(beam_knot_array[:, 1] <= sensor.location.north[1]) + assert np.all(beam_knot_array[:, 2] >= sensor.location.up[0]) + assert np.all(beam_knot_array[:, 2] <= sensor.location.up[1]) + assert beam_knot_array.shape[0] == sensor.n_beam_knots + assert np.unique(np.round(np.diff(beam_knot_array, axis=0), 10), axis=0).shape[0] == 1 diff --git a/tests/sensor/test_satellite.py b/tests/sensor/test_satellite.py new file mode 100644 index 0000000..0dd62aa --- /dev/null +++ b/tests/sensor/test_satellite.py @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for satellite sensor class. + +This module provides tests for the satellite sensor class in pyELQ. + +""" +import numpy as np +import pytest + +from pyelq.sensor.satellite import Satellite + + +def test_orbits(): + """Basic test to check is Satellite can be instantiated and if it correctly finds the unique orbits.""" + sensor = Satellite() + assert isinstance(sensor, Satellite) + + with pytest.raises(ValueError): + sensor.get_orbits() + + orbits = np.array([1, 2, 3, 4, 5]) + random_integer = np.random.randint(low=1, high=10 + 1) + random_repeat = np.repeat(orbits, random_integer) + sensor.orbit = random_repeat + result = sensor.get_orbits() + assert np.all(result == orbits) diff --git a/tests/sensor/test_sensor.py b/tests/sensor/test_sensor.py new file mode 100644 index 0000000..2f475e0 --- /dev/null +++ b/tests/sensor/test_sensor.py @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for sensor superclass. + +This module provides tests for the sensor superclass in pyELQ + +""" + +import numpy as np +import pytest + +from pyelq.sensor.sensor import Sensor + + +@pytest.mark.parametrize("nof_observations", [0, 1, 10]) +def test_nof_observables(nof_observations: int): + """Basic test to check Sensor class method. + + Args: + nof_observations (int): Number observations + + """ + sensor = Sensor() + if nof_observations > 0: + sensor.concentration = np.random.rand(nof_observations, 1) + + assert sensor.nof_observations == nof_observations diff --git a/tests/sensor/test_sensorgroup.py b/tests/sensor/test_sensorgroup.py new file mode 100644 index 0000000..290405b --- /dev/null +++ b/tests/sensor/test_sensorgroup.py @@ -0,0 +1,101 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for SensorGroup superclass. + +This module provides tests for the SensorGroup superclass in pyELQ + +""" +from copy import deepcopy + +import numpy as np +import pandas as pd +import plotly.graph_objects as go + +from pyelq.coordinate_system import LLA +from pyelq.sensor.sensor import Sensor, SensorGroup + + +def test_sensorgroup(): + """Tests to check all properties of the SensorGroup class have the correct output shapes.""" + nof_sensors = 3 + total_observations = 0 + group = SensorGroup() + for idx in range(nof_sensors): + sensor = Sensor() + nof_observations = np.random.randint(1, 10) + total_observations += nof_observations + sensor.concentration = np.random.rand(nof_observations, 1) + sensor.time = pd.arrays.DatetimeArray(pd.date_range(start="1/1/2022", periods=nof_observations)) + sensor.location = LLA( + latitude=0.01 * np.random.rand(), longitude=0.01 * np.random.rand(), altitude=0.01 * np.random.rand() + ) + sensor.label = str(idx) + group.add_sensor(sensor=sensor) + + assert group.nof_sensors == nof_sensors + assert group.nof_observations == total_observations + assert group.concentration.shape == (total_observations,) + assert group.time.shape == (total_observations,) + assert group.sensor_index.shape == (total_observations,) + + enu_location = group.location.to_enu() + assert enu_location.east.shape == (nof_sensors,) + assert enu_location.north.shape == (nof_sensors,) + assert enu_location.up.shape == (nof_sensors,) + + +def test_plotting(): + """Tests to check if plotting methods provide a plotly figure with the correct amount of traces.""" + nof_sensors = 3 + total_observations = 0 + group = SensorGroup() + for idx in range(nof_sensors): + sensor = Sensor() + nof_observations = np.random.randint(5, 10) + total_observations += nof_observations + sensor.concentration = np.random.rand(nof_observations, 1) + sensor.time = pd.arrays.DatetimeArray(pd.date_range(start="1/1/2022", periods=nof_observations)) + location = LLA() + location.latitude = np.array(idx) + location.longitude = np.array(idx) + sensor.location = location + sensor.label = str(idx) + group.add_sensor(sensor=sensor) + + fig_1 = go.Figure() + fig_1 = group.plot_timeseries(fig_1) + assert isinstance(fig_1, go.Figure) + assert len(fig_1.data) == group.nof_sensors + # fig_1.show(renderer='browser') + + fig_2 = go.Figure() + fig_2 = group.plot_sensor_location(fig_2) + fig_2.update_layout(mapbox={"style": "open-street-map", "center": {"lon": 0, "lat": 0}, "zoom": 7}) + assert isinstance(fig_2, go.Figure) + assert len(fig_2.data) == group.nof_sensors + # fig_2.show(renderer='browser') + + +def test_source_on_attribute(): + """Simple test to check correct concatenation of source_on attribute of SensorGroup.""" + location = LLA() + location.from_array(np.ones((5, 3))) + sensor = Sensor() + sensor.concentration = np.array([1, 2, 3, 4, 5]) + sensor.location = location + sensor.source_on = np.array([True, True, False, False, False]) + sensor.label = "1" + + sensor_2 = deepcopy(sensor) + sensor_2.source_on = None + sensor_2.label = "2" + + sns_group = SensorGroup() + sns_group.add_sensor(sensor) + sns_group.add_sensor(sensor_2) + + correct_results = np.array([True, True, False, False, False, True, True, True, True, True]) + assert np.all(sns_group.source_on == correct_results) diff --git a/tests/support_functions/__init__.py b/tests/support_functions/__init__.py new file mode 100644 index 0000000..ce24bc0 --- /dev/null +++ b/tests/support_functions/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +"""Support functions test module.""" +__all__ = ["test_spatio_temporal_interpolation"] diff --git a/tests/support_functions/test_spatio_temporal_interpolation.py b/tests/support_functions/test_spatio_temporal_interpolation.py new file mode 100644 index 0000000..bf33aa2 --- /dev/null +++ b/tests/support_functions/test_spatio_temporal_interpolation.py @@ -0,0 +1,229 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for spatio-temporal interpolation module.""" +from datetime import datetime, timedelta + +import numpy as np +import pandas as pd +import pytest + +import pyelq.support_functions.spatio_temporal_interpolation as sti + + +def test_error_catching(): + """Test to see if errors are being thrown with incorrect inputs.""" + with pytest.raises(ValueError): + sti.interpolate(location_out=np.array(3)) + with pytest.raises(ValueError): + sti.interpolate(time_out=np.array(3)) + with pytest.raises(ValueError): + sti.interpolate(values_in=None) + with pytest.raises(ValueError): + sti.interpolate(location_out=None, time_out=None, location_in=np.array(3), values_in=np.array(3)) + + +def test_default_returns(): + """Tests if default values are returned Tests if same values are returned when input/output locations/times are the + same.""" + + loc_in = np.array([[0, 0, 0], [1, 1, 1]]) + time_in = pd.arrays.DatetimeArray(pd.date_range(pd.Timestamp.now(), periods=loc_in.shape[0], freq="H"))[:, None] + vals = np.random.random((loc_in.shape[0], 1)) + # check if same input/output locations and time give the same answer + return_vals = sti.interpolate( + location_in=loc_in, time_in=time_in, values_in=vals, location_out=loc_in, time_out=time_in + ) + assert np.all(return_vals == vals) + return_vals = sti.interpolate(location_in=loc_in, time_in=time_in, values_in=vals, location_out=loc_in) + assert np.all(return_vals == vals) + return_vals = sti.interpolate(location_in=loc_in, time_in=time_in, values_in=vals, time_out=time_in) + assert np.all(return_vals == vals) + + +def test_single_value(): + """Tests if all interpolated values are set to the same value when 1 input value is provided.""" + loc_in = np.array([[0, 0, 0], [1, 1, 1]]) + n_obs = loc_in.shape[0] + time_in = pd.arrays.DatetimeArray(pd.date_range(pd.Timestamp.now(), periods=n_obs, freq="H"))[:, None] + vals = np.random.random((loc_in.shape[0], 1)) + + # Check if we get the same output for all values when 1 value is provided + return_vals = sti.interpolate( + location_in=loc_in[[0], :], time_in=time_in[[0]], values_in=vals[[0]], location_out=loc_in, time_out=time_in + ) + assert np.all(return_vals == vals[0]) + assert return_vals.shape == (n_obs, 1) + return_vals = sti.interpolate( + location_in=loc_in[[0], :], time_in=time_in[[0]], values_in=vals[[0]], location_out=loc_in + ) + assert np.all(return_vals == vals[0]) + assert return_vals.shape == (n_obs, 1) + return_vals = sti.interpolate( + location_in=loc_in[[0], :], time_in=time_in[[0]], values_in=vals[[0]], time_out=time_in + ) + assert np.all(return_vals == vals[0]) + assert return_vals.shape == (n_obs, 1) + + +def test_temporal_interpolation(): + """Check interpolation value with simple manually calculated value for temporal interpolation (hence linear + interpolation in 1d) Also checks if we get the same values when an array of integers (representing seconds) is + supplied instead of an array of datetimes.""" + periods = 10 + time_in = pd.arrays.DatetimeArray(pd.date_range(pd.Timestamp.now(), periods=periods, freq="s"))[:, None] + time_in_array = np.array(range(periods))[:, None] + vals = np.random.random(time_in.size) + random_index = np.random.randint(0, periods - 1) + random_factor = np.random.random() + return_vals = sti.interpolate( + time_in=time_in, values_in=vals, time_out=time_in[[random_index]] + random_factor * pd.Timedelta(1, unit="sec") + ) + assert np.allclose(return_vals, random_factor * (vals[random_index + 1] - vals[random_index]) + vals[random_index]) + return_vals_array = sti.interpolate( + time_in=time_in_array, values_in=vals, time_out=time_in_array[[random_index]] + random_factor + ) + assert np.allclose(return_vals, return_vals_array) + + +def test_nearest_neighbour(): + """Test to check spatial interpolation when we don't have more than 5 points and hence want to check for nearest + value.""" + loc_in = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]]) + vals = np.random.random(loc_in.shape[0]) + return_vals = sti.interpolate(location_in=loc_in, values_in=vals, location_out=loc_in[0, :] + 1e-6, method="linear") + assert np.all(return_vals == vals[0]) + + +def test_spatial_interpolation(): + """Test spatial interpolation by checking if value interpolated at center of tetrahedron is actually the mean of the + values on the vertices of the tetrahedron.""" + # check spatial interpolation with a cube + loc_in = np.array([[0, 0, 0], [0, 1, 0], [1, 0.5, 0], [0.5, 0.5, 1]]) + vals = np.random.random((loc_in.shape[0], 1)) + return_vals = sti.interpolate( + location_in=loc_in, values_in=vals, location_out=np.mean(loc_in, axis=0, keepdims=True) + ) + assert np.allclose(return_vals, np.mean(vals)) + + +def test_same_value(): + """Test to check if all values are the same we get that value.""" + loc_in = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0], [0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) + vals = np.ones((loc_in.shape[0], 1)) + return_vals = sti.interpolate( + location_in=loc_in, values_in=vals, location_out=np.mean(loc_in, axis=0, keepdims=True) + ) + + assert np.all(return_vals == 1) + + +def test_fill_value(): + """Test to check if fill value argument works for point outside of interpolation points.""" + loc_in = np.array([[0, 0, 0], [0, 1, 0], [1, 0.5, 0], [0.5, 0.5, 1]]) + vals = np.random.random((loc_in.shape[0], 1)) + return_vals = sti.interpolate( + location_in=loc_in, values_in=vals, location_out=np.array([[-1, -1, -1]]), fill_value=-99 + ) + assert np.all(return_vals == -99) + + +def test_consistent_shapes(): + """Test if output shapes are consistent with provided input.""" + loc_in = np.array([[0, 0, 0], [1, 1, 1]]) + time_in = pd.arrays.DatetimeArray(pd.date_range(pd.Timestamp.now(), periods=loc_in.shape[0] - 1, freq="H"))[:, None] + vals = np.random.random((loc_in.shape[0], 1)) + with pytest.raises(ValueError): + sti.interpolate(location_in=loc_in, time_in=time_in, values_in=vals, location_out=loc_in, time_out=time_in) + + loc_in = np.array([[0, 0, 0], [0, 1, 0], [1, 0.5, 0], [0.5, 0.5, 1]]) + time_in = pd.arrays.DatetimeArray(pd.date_range(pd.Timestamp.now(), periods=loc_in.shape[0], freq="H"))[:, None] + vals = np.random.random((loc_in.shape[0], 1)) + return_vals = sti.interpolate( + location_in=loc_in, time_in=time_in, values_in=vals, location_out=loc_in, time_out=time_in + ) + assert return_vals.shape == (loc_in.shape[0], 1) + + time_out = time_in[:-1, :] + return_vals = sti.interpolate( + location_in=loc_in, time_in=time_in, values_in=vals, location_out=loc_in, time_out=time_out + ) + assert return_vals.shape == (loc_in.shape[0], time_out.size) + + +def test_temporal_resampling(): + """This test function generates a set of 100 synthetic data points from 1st January 2000. + + It then finds what + the correct values_out would be, and afterwards shuffles the data. It uses this to check that temporal_resampling() + can do the following: + 1: Run without error. + 2: Handle any incorrect input arguments including incorrect types and shapes. + 3. Test that the values are correctly resampled in time, regardless of: + A: The settings used + B: The order of the values in time_in + C: Any values outside the time bins. + + """ + n_values_in = 100 + n_time_out = 10 + + values_in = np.array(np.random.rand(n_values_in)) + time_in = [datetime(2000, 1, 1, 0, 0, 1) + timedelta(minutes=i) for i in range(n_values_in)] + time_bin_edges = pd.arrays.DatetimeArray( + pd.to_datetime([datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(n_time_out + 1)]) + ) + + correct_values_out_mean = np.array([np.mean(i) for i in np.split(values_in, n_time_out)]) + correct_values_out_max = np.array([np.max(i) for i in np.split(values_in, n_time_out)]) + correct_values_out_min = np.array([np.min(i) for i in np.split(values_in, n_time_out)]) + + time_bin_edges_non_monotonic = pd.arrays.DatetimeArray( + pd.Series(list(time_bin_edges)[:-1] + [datetime(1999, 1, 1)]) + ) + + time_in = pd.arrays.DatetimeArray(pd.to_datetime(time_in + [datetime(2001, 1, 1)])) + values_in = np.append(values_in, 1000000) + + p = np.random.permutation(len(time_in)) + time_in = time_in[p] + values_in = values_in[p] + + incorrect_arguments_list = [ + [time_in[:3], values_in, time_bin_edges], + [np.array(time_in), values_in, time_bin_edges], + [time_in, values_in, time_bin_edges, "mean", "nonsense_text"], + [time_in, values_in, time_bin_edges, np.mean], + [time_in, values_in, time_bin_edges_non_monotonic], + ] + + for incorrect_arguments in incorrect_arguments_list: + with pytest.raises(ValueError): + sti.temporal_resampling(*incorrect_arguments) + + time_out, values_out = sti.temporal_resampling(time_in, values_in, time_bin_edges, "mean", "center") + correct_time_out = np.diff(time_bin_edges) / 2 + time_bin_edges[:-1] + assert (time_out == correct_time_out).all() + assert np.allclose(values_out, correct_values_out_mean) + + time_out, values_out = sti.temporal_resampling(time_in, values_in, time_bin_edges, "max", "left") + correct_time_out = time_bin_edges[:-1] + assert (time_out == correct_time_out).all() + assert np.allclose(values_out, correct_values_out_max) + + time_out, values_out = sti.temporal_resampling(time_in, values_in, time_bin_edges, "min", "right") + correct_time_out = time_bin_edges[1:] + assert (time_out == correct_time_out).all() + assert np.allclose(values_out, correct_values_out_min) + + +def test_temporal_resampling_empty_bins(): + """This test function test to see if the temporal resampling provides a nan value for an empty bin.""" + time_in = np.array([1, 3]) + values_in = np.array([1, 3]) + time_bin_edges = np.array([0.5, 1.5, 2.5, 3.5]) + correct_values_out = np.array([1, np.nan, 3]) + _, values_out = sti.temporal_resampling(time_in, values_in, time_bin_edges, "mean", "center") + assert np.allclose(values_out, correct_values_out, equal_nan=True) diff --git a/tests/test_coordinate_system.py b/tests/test_coordinate_system.py new file mode 100644 index 0000000..61f38c6 --- /dev/null +++ b/tests/test_coordinate_system.py @@ -0,0 +1,302 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Test module for coordinate system functions. + +This module provides tests for the coordinate system + +""" + +import numpy as np +import pytest + +from pyelq.coordinate_system import ECEF, ENU, LLA, make_latin_hypercube + + +@pytest.mark.parametrize("n", [1, 100]) +def test_lla(n): + """Testing conversion functions in LLA class to convert to all other types and check get back to the same place. + + Args: + n (int): Parameter to define size of location vector + + """ + + lat = np.random.rand(n) * 10 + 40 + lon = (np.random.rand(n) - 0.5) * 5 + alt = np.random.rand(n) * 100 + crd = LLA(latitude=lat, longitude=lon, altitude=alt) + + crd2 = crd.to_lla() + crd3 = crd2.to_enu() + crd4 = crd3.to_ecef() + crd5 = crd4.to_lla() + + assert np.all(crd5.latitude == pytest.approx(lat)) + assert np.all(crd5.longitude == pytest.approx(lon)) + assert np.all(crd5.altitude == pytest.approx(alt)) + + # test tree is created successfully + crd.create_tree() + + +@pytest.mark.parametrize("n", [1, 100]) +def test_enu(n): + """Testing conversion functions in ENU class to convert to all other types and check get back to the same place. + + Args: + n (int): Parameter to define size of location vector + + """ + east = np.random.rand(n) * 10000 + north = (np.random.rand(n)) * 5000 + up = np.random.rand(n) * 100 + ref_lat = 50 + ref_lon = 0 + ref_alt = 0 + crd = ENU(ref_longitude=ref_lon, ref_latitude=ref_lat, ref_altitude=ref_alt, east=east, north=north, up=up) + + ref_lat_2 = 54 + ref_lon_2 = 2 + ref_alt_2 = 10 + + crd2 = crd.to_enu() + crd3 = crd2.to_enu(ref_longitude=ref_lon_2, ref_latitude=ref_lat_2, ref_altitude=ref_alt_2) + crd4 = crd3.to_lla() + crd5 = crd4.to_ecef() + crd6 = crd5.to_enu(ref_longitude=ref_lon, ref_latitude=ref_lat, ref_altitude=ref_alt) + + assert np.all(crd6.east == pytest.approx(east)) + assert np.all(crd6.north == pytest.approx(north)) + assert np.all(crd6.up == pytest.approx(up)) + + # test tree is created successfully + crd.create_tree() + + +@pytest.mark.parametrize("n", [1, 100]) +def test_ecef(n): + """Testing conversion functions in ECEF class to convert to all other types and check get back to the same place. + + Args: + n (int): Parameter to define size of location vector + + """ + + x = np.random.rand(n) * 1000 + 4107864.0912067825 + y = np.random.rand(n) * 1000 + z = np.random.rand(n) * 5 + 4862789.037706433 + crd = ECEF(x=x, y=y, z=z) + + crd2 = crd.to_ecef() + crd3 = crd2.to_enu() + crd4 = crd3.to_lla() + crd5 = crd4.to_ecef() + + assert np.all(crd5.x == pytest.approx(x)) + assert np.all(crd5.y == pytest.approx(y)) + assert np.all(crd5.z == pytest.approx(z)) + + # test tree is created successfully + crd.create_tree() + + +@pytest.mark.parametrize("grid_crd", ["to_lla", "to_enu", "to_ecef"]) +@pytest.mark.parametrize("mid_crd", ["to_lla", "to_enu", "to_ecef"]) +@pytest.mark.parametrize("dim", [2, 3]) +def test_interpolate(dim, grid_crd, mid_crd): + """Test interpolation function to/from different coordinate systems. + + Define a box or cube in of which all corners are relative close (so we don't get round-off errors due to the shape + of the Earth) see if the interpolate function gives exactly the midpoint + + Args: + dim (int): dimension for interpolation + grid_crd (str): function for the coordinate transform for grid + mid_crd (str): unction for the coordinate transform for midpoint + + """ + + if dim == 2: + lon, lat = np.meshgrid([10, 10.01], [50, 50.01]) + z = np.array([[0, 1], [0, 1]]) + grid = LLA(longitude=lon, latitude=lat) + mid = LLA(longitude=np.array(10.005), latitude=np.array(50.005)) + else: + z = np.array([[[0, 1], [0, 1]], [[0, 1], [0, 1]]]) + lon, lat, alt = np.meshgrid([10, 10.01], [50, 50.01], [0, 10]) + grid = LLA(longitude=lon, latitude=lat, altitude=alt) + mid = LLA(longitude=np.array(10.005), latitude=np.array(50.005), altitude=np.array(5)) + + conv_func = getattr(grid, grid_crd) + grid = conv_func() + + conv_func = getattr(mid, mid_crd) + mid = conv_func() + + z_interp = grid.interpolate(values=z, locations=mid, dim=dim) + + assert z_interp == pytest.approx(0.5, abs=0.05) + + +def test_interpolate_outside(): + """Test interpolation fill extrapolation. + + Only use dim==2 because the test_interpolate already takes care of dim==3, main reason for this test is to see if + **kwargs are passed on correctly. + + """ + lon, lat = np.meshgrid([10, 14], [50, 51]) + z = np.array([[0, 1], [0, 1]]) + grid = LLA(longitude=lon, latitude=lat) + out = LLA(longitude=np.array(15), latitude=np.array(50.5)) + + z_interp = grid.interpolate(values=z, locations=out, dim=2, fill_value=-99) + + assert z_interp == -99 + + +def test_interpolate_single_values(): + """Test interpolation single input value. + + Test to see if all values are set to the single input value when only 1 input value is provided + + """ + location = LLA() + location.from_array(np.array([[0, 0, 0]])) + + temp_array = np.random.uniform(-30, 30, (5, 3)) + output_location = LLA() + output_location.from_array(temp_array) + + interpolated_values = location.interpolate(values=np.array([39]), locations=output_location) + + assert np.all(interpolated_values == 39) + + +@pytest.mark.parametrize("dim", [2, 3]) +def test_consistency_from_array_to_array(dim): + """Test to_array and from_array methods. + + This test is designed to check for consistency between the to_array and from_array methods by just filling the + attributes with random data and see if we get back the same data + + """ + n_samples = np.random.randint(1, 100) + array = np.random.random((n_samples, dim)) + + lla_object = LLA() + lla_object.from_array(array) + assert np.allclose(lla_object.to_array(dim=dim), array) + + enu_object = ENU(ref_latitude=0, ref_longitude=0, ref_altitude=0) + enu_object.from_array(array) + assert np.allclose(enu_object.to_array(dim=dim), array) + + ecef_object = ECEF() + ecef_object.from_array(array) + assert np.allclose(ecef_object.to_array(dim=dim), array) + + +def test_nof_observations(): + """Test nof_observations calculation.""" + n_samples = np.random.randint(1, 100) + array = np.random.random((n_samples, 3)) + + lla_object = LLA() + assert lla_object.nof_observations == 0 + lla_object.from_array(array) + assert lla_object.nof_observations == n_samples + + enu_object = ENU(ref_latitude=0, ref_longitude=0, ref_altitude=0) + assert enu_object.nof_observations == 0 + enu_object.from_array(array) + assert enu_object.nof_observations == n_samples + + ecef_object = ECEF() + assert ecef_object.nof_observations == 0 + ecef_object.from_array(array) + assert ecef_object.nof_observations == n_samples + + +@pytest.mark.parametrize("grid_type", ["rectangular", "spherical", "test"]) +@pytest.mark.parametrize("dim", [2, 3]) +def test_make_grid(grid_type, dim): + """Test the make_grid method. + + Checks if the not implement error gets raised. Checks for correct number of samples generated. Checks if all + samples are within the specified limits + + Args: + grid_type (str): Type of grid to generate + dim (int): Dimension of each grid (2 or 3) + + """ + enu_object = ENU(ref_latitude=0, ref_longitude=0, ref_altitude=0) + grid_limits = np.array([[-100, 100], [-100, 100], [-100, 100]]) + grid_limits = grid_limits[:dim, :] + random_shape = np.random.randint(1, 100, size=dim) + + if grid_type in ["rectangular", "spherical"]: + grid = enu_object.make_grid(bounds=grid_limits, grid_type=grid_type, shape=random_shape) + assert grid.shape[0] == random_shape.prod() + + for idx in range(dim): + assert np.all(grid[:, idx] >= grid_limits[idx, 0]) + assert np.all(grid[:, idx] <= grid_limits[idx, 1]) + else: + with pytest.raises(NotImplementedError): + grid = enu_object.make_grid(bounds=grid_limits, grid_type=grid_type, shape=random_shape) + + +@pytest.mark.parametrize("input_system", [LLA, ENU, ECEF]) +@pytest.mark.parametrize("output_system", [LLA, ENU, ECEF]) +def test_to_object_type(input_system, output_system): + """Test the to_object method. + + Creates a very basic coordinate system object and check if it is converted to the right output system. + Also checks if an error gets thrown when applicable + + Args: + input_system (Coordinate): Input coordinate class + output_system (Coordinate): Output coordinate class + + """ + if input_system == ENU: + input_object = input_system(ref_latitude=0, ref_longitude=0, ref_altitude=0) + else: + input_object = input_system() + if output_system == ENU: + output_object = output_system(ref_latitude=0, ref_longitude=0, ref_altitude=0) + else: + output_object = output_system() + + input_object.from_array(np.array([[0, 0, 0]])) + test_object = input_object.to_object_type(output_object) + assert isinstance(test_object, output_system) + + with pytest.raises(TypeError): + test_object = input_object.to_object_type("test") + + +@pytest.mark.parametrize("dim", [2, 3]) +def test_make_latin_hypercube(dim): + """Test the make_latin_hypercube method. + + Checks for correct number of samples generated. Checks if all samples are within the specified limits + + Args: + dim (int): Dimension of the hypercube (2 or 3) + + """ + grid_limits = np.array([[-100, 100], [-100, 100], [-100, 100]]) + grid_limits = grid_limits[:dim, :] + random_number = np.random.randint(1, 100) + array = make_latin_hypercube(bounds=grid_limits, nof_samples=random_number) + + assert array.shape == (random_number, dim) + + for idx in range(dim): + assert np.all(array[:, idx] >= grid_limits[idx, 0]) + assert np.all(array[:, idx] <= grid_limits[idx, 1]) diff --git a/tests/test_dlm.py b/tests/test_dlm.py new file mode 100644 index 0000000..b324d02 --- /dev/null +++ b/tests/test_dlm.py @@ -0,0 +1,698 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for DLM code. + +This module provides various tests for the DLM related code part of pyELQ + +""" + +from typing import Tuple + +import numpy as np +import pytest +from scipy.stats import cramervonmises + +from pyelq.dlm import DLM + + +@pytest.mark.parametrize( + "g_matrix, power", + [(np.array([3], ndmin=2), 3), (np.identity(4), 4), (np.random.default_rng().random(size=(5, 5)), 5)], +) +def test_calculate_g_power(g_matrix: np.ndarray, power: int): + """Test to check calculate_g_power method. + + Uses the numpy matrix power function to compare the output of the implemented method. + + Args: + g_matrix (np.ndarray): Matrix to perform power operation on + power (int): Power to calculate + + """ + model = DLM(g_matrix=g_matrix) + model.calculate_g_power(max_power=power) + numpy_result = np.linalg.matrix_power(g_matrix, power) + dlm_result = model.g_power[:, :, -1].squeeze() + assert np.allclose(numpy_result, dlm_result) + + +@pytest.mark.parametrize("nof_observables, order", [(1, 1), (2, 2), (4, 3)]) +def test_polynomial_f_g(nof_observables: int, order: int): + """Test to check polynomial_f_g method. + + Check if the shapes are consistent and if resulting G matrix of n-th order polynomial DLM has a single unit + eigenvalue of multiplicity n * nof_observables (from Harrison and West Chap 7.1), in particular no zero eigenvalues + + Also checks if 0 is returned for nof_observables and nof_state_parameters when F or G are not set + + Args: + nof_observables (int): Dimension of observation + order (int): Polynomial order (0=constant, 1=linear, 2=quadratic etc.) + + """ + model = DLM() + assert model.nof_observables == 0 + assert model.nof_state_parameters == 0 + model.polynomial_f_g(nof_observables=nof_observables, order=order) + assert model.f_matrix.shape == ((order + 1) * nof_observables, nof_observables) + assert model.g_matrix.shape == ((order + 1) * nof_observables, (order + 1) * nof_observables) + eigenvalues = np.linalg.eigvals(model.g_matrix) + unique_vals, unique_counts = np.unique(eigenvalues, return_counts=True) + assert unique_vals.size == 1 + assert unique_counts[0] == (order + 1) * nof_observables + assert unique_vals[0] != 0 + + +@pytest.mark.parametrize("nof_observables, order", [(1, 1), (2, 2)]) +def test_values_polynomial_f_g(nof_observables: int, order: int): + """Test to check polynomial_f_g method. + + Check if we get exactly the correct F and G matrices for a few order/observation combinations + + Args: + nof_observables (int): Dimension of observation + order (int): Polynomial order (0=constant, 1=linear, 2=quadratic etc.) + + """ + model = DLM() + model.polynomial_f_g(nof_observables=nof_observables, order=order) + if nof_observables == 1 and order == 1: + true_f = np.array([[1], [0]]) + true_g = np.array([[1.0, 1.0], [0.0, 1.0]]) + + elif nof_observables == 2 and order == 2: + true_f = np.array([[1.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [0.0, 0.0], [0.0, 0.0]]) + true_g = np.array( + [ + [1.0, 1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 1.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 1.0], + ] + ) + else: + true_f = None + true_g = None + assert np.all(model.f_matrix == true_f) + assert np.all(model.g_matrix == true_g) + + +def kullback_leibler_gaussian( + mean_0: np.ndarray, sigma_0: np.ndarray, mean_1: np.ndarray, sigma_1: np.ndarray +) -> float: + """Kullback-Leibler divergence from N(mean_0, sigma_0) to N(mean_1, sigma_1). + + Helper function to compute the Kullback Leibler divergence + + Duchi, J. "Derivations for Linear Algebra and Optimization" (PDF): 13. + https://stanford.edu/~jduchi/projects/general_notes.pdf#page=13 + + KL( (mean_0, sigma_0) || (mean_1, sigma_1)) = 0.5 * (tr(sigma_1^(-1) @ sigma_0) + + (mean_1 - mean_0).T @ sigma_1^(-1) @ (mean_1 - mean_0) - k + ln(det(sigma_1)/det(sigma_0))), + with k = dimension of multivariate normal distribution + + Args: + mean_0 (np.ndarray): Mean vector of first normal distribution of shape [k x 1] + sigma_0 (np.ndarray): Covariance matrix of first normal distribution of shape [k x k] + mean_1 (np.ndarray): Mean vector of second normal distribution of shape [k x 1] + sigma_1 (np.ndarray): Covariance matrix of second normal distribution of shape [k x k] + + Returns: + float: Kullback-Leibler divergence + + """ + k = mean_0.shape[0] + sigma_1_inv = np.linalg.inv(sigma_1) + diff_mean = mean_1 - mean_0 + statistic = 0.5 * ( + np.trace(sigma_1_inv @ sigma_0) + + diff_mean.T @ sigma_1_inv @ diff_mean + - k + + np.log(np.linalg.det(sigma_1) / np.linalg.det(sigma_0)) + ) + + return statistic.flatten()[0] + + +def create_init_state_and_covariance_matrices( + nof_observables: int, order: int, rho: float +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Helper function to create initial state and V and W matrices. + + Args: + nof_observables (int): Number of observables + order (int): Order of the polynomial DLM (0==constant, 1==linear, etc.) + rho (float): Correlation parameter to use in creation of W matrix + + Returns: + init_state (np.ndarray): initial state vector of shape [nof_observables * (order + 1) x 1] + v_true (np.ndarray): Observation covariance matrix of shape [nof_observables X nof_observables] + w_true (np.ndarray): State covariance matrix of shape + [nof_observables * (order + 1) X nof_observables * (order + 1)] + + """ + v_true = np.eye(nof_observables) + if order == 0: + w_true = np.eye(nof_observables) * (1 - rho) + np.ones((nof_observables, nof_observables)) * rho + init_state = np.zeros((nof_observables, 1)) + else: + init_state = np.concatenate([1 / 10 ** (np.array(range(order + 1)))] * nof_observables) + w_true = np.diag(init_state) * 0.1 + + return init_state.reshape(-1, 1), v_true, w_true + + +def forecasts_and_simulate_data( + nof_observables: int, order: int, rho: float, forecast_horizon: int +) -> Tuple[np.ndarray, np.ndarray]: + """Helper function to perform a single run of the test to check for consistency between forecasting functions and + simulated data. Checking if the covariance matrices are actually positive definite. Also checks error catching when + not all matrices are set and when forecast steps are set to a number smaller than 1. + + Args: + nof_observables (int): Number of observables + order (int): Order of the polynomial DLM (0==constant, 1==linear, etc.) + rho (float): Correlation parameter to use in creation of W matrix + forecast_horizon (int): Maximum forecast step ahead + + Returns: + statistic_observation_result (np.ndarray): Boolean array of shape [forecast_horizon, ] containing the results + of the test on the observation forecast + statistic_state_result (np.ndarray): Boolean array of shape [forecast_horizon, ] containing the results + of the test on the state forecast + + """ + nof_runs = 1000 + model = DLM() + with pytest.raises(ValueError): + _ = model.simulate_data(init_state=np.array([]), nof_timesteps=1) + model.polynomial_f_g(nof_observables=nof_observables, order=order) + init_state, model.v_matrix, model.w_matrix = create_init_state_and_covariance_matrices(nof_observables, order, rho) + model.calculate_g_power(max_power=forecast_horizon) + + init_cov = np.zeros(model.w_matrix.shape) + a_t, f_t = model.forecast_mean(init_state, forecast_steps=np.array(range(forecast_horizon)) + 1) + r_matrix_t, q_matrix_t = model.forecast_covariance(init_cov, forecast_steps=np.array(range(forecast_horizon)) + 1) + + with pytest.raises(ValueError): + _ = model.forecast_mean(init_state, forecast_steps=-10) + + with pytest.raises(ValueError): + _ = model.forecast_covariance(init_cov, forecast_steps=-10) + + state_realizations = np.zeros((model.nof_state_parameters, forecast_horizon, nof_runs)) + observation_realizations = np.zeros((model.nof_observables, forecast_horizon, nof_runs)) + + for run in range(nof_runs): + state_realizations[:, :, run], observation_realizations[:, :, run] = model.simulate_data( + init_state=init_state, nof_timesteps=forecast_horizon + ) + + statistic_observation_result = np.zeros(forecast_horizon).astype(bool) + statistic_state_result = np.zeros(forecast_horizon).astype(bool) + for forecast_step in range(forecast_horizon): + assert np.linalg.det(q_matrix_t[:, :, forecast_step]) > 0 + assert np.linalg.det(r_matrix_t[:, :, forecast_step]) > 0 + statistic_observation = kullback_leibler_gaussian( + observation_realizations[:, forecast_step, :].mean(axis=1).reshape(-1, 1), + np.cov(observation_realizations[:, forecast_step, :]).reshape(nof_observables, nof_observables), + f_t[:, [forecast_step]], + q_matrix_t[:, :, forecast_step], + ) + statistic_observation_result[forecast_step] = statistic_observation < 0.05 + statistic_state = kullback_leibler_gaussian( + state_realizations[:, forecast_step, :].mean(axis=1).reshape(-1, 1), + np.cov(state_realizations[:, forecast_step, :]).reshape( + model.nof_state_parameters, model.nof_state_parameters + ), + a_t[:, [forecast_step]], + r_matrix_t[:, :, forecast_step], + ) + statistic_state_result[forecast_step] = statistic_state < 0.05 + + return statistic_observation_result, statistic_state_result + + +@pytest.mark.parametrize( + "nof_observables, order, rho", [(1, 0, 0.8), (2, 0, 0.8), (1, 1, 0.8), (2, 1, 0.8), (3, 2, 0.8)] +) +def test_forecasts_and_simulate_data(nof_observables: int, order: int, rho: float): + """Function to perform multiple runs of the test to check for consistency between forecast functions and simulated + data. + + Multiple runs are carried out because of the stochastics in the methods under test + Eventually we check if more than half of the runs pass the test which would indicate good working code. + If less than half pass we feel like there is a bug in the code. + + Args: + nof_observables (int): Number of observables + order (int): Order of the polynomial DLM (0==constant, 1==linear, etc.) + rho (float): Correlation parameter to use in creation of W matrix + + """ + nof_tests = 5 + forecast_horizon = 5 + overall_test_observation = np.zeros((forecast_horizon, nof_tests)) + overall_test_state = np.zeros((forecast_horizon, nof_tests)) + for test in range(nof_tests): + overall_test_observation[:, test], overall_test_state[:, test] = forecasts_and_simulate_data( + nof_observables, order, rho, forecast_horizon + ) + + assert np.all(np.count_nonzero(overall_test_observation, axis=1) >= nof_tests / 2) + assert np.all(np.count_nonzero(overall_test_state, axis=1) >= nof_tests / 2) + + +def full_dlm_update_and_mahalanobis_distance( + nof_observables: int, order: int, rho: float, forecast_horizon: int +) -> Tuple[np.ndarray, np.ndarray]: + """Helper function to perform 1 test run to check full DLM update and mahalanobis distance calculation. + + Args: + nof_observables (int): Number of observables + order (int): Order of the polynomial DLM (0==constant, 1==linear, etc.) + rho (float): Correlation parameter to use in creation of W matrix + forecast_horizon (int): Maximum forecast step ahead + + Returns: + overall_test_fail (np.ndarray): Boolean array of shape [1, ] containing the result + of the test using all beams, True = fail, False = pass + per_beam_test_fail (np.ndarray): Boolean array of shape [nof_observables, ] containing the results + of the tests for each individual beam, True = fail, False = pass + + We are using the Cramer Von Mises test. The Mahalanobis distance should follow the chi2 distribution with the number + of degrees of freedom as specified in the args input argument + + """ + n_time = 100 + + model = DLM() + model.polynomial_f_g(nof_observables=nof_observables, order=order) + init_state, model.v_matrix, model.w_matrix = create_init_state_and_covariance_matrices(nof_observables, order, rho) + model.calculate_g_power(max_power=forecast_horizon) + + _, observations = model.simulate_data(init_state=init_state, nof_timesteps=n_time) + + cov_state = np.zeros(model.w_matrix.shape) + dlm_state = np.empty((model.nof_state_parameters, n_time + 1)) + dlm_state[:, 0] = init_state.flatten() + mhd_overall = np.empty(n_time) + mhd_per_beam = np.empty((model.nof_observables, n_time)) + mhd_overall[:] = np.nan + mhd_per_beam[:] = np.nan + + new_state_ignore, new_cov_ignore, _ = model.dlm_full_update( + observations[:, [0]], dlm_state[:, [0]], cov_state, mode="ignore" + ) + + true_state_forecast, _ = model.forecast_mean(dlm_state[:, [0]], forecast_steps=1) + true_cov_forecast, _ = model.forecast_covariance(cov_state, forecast_steps=1) + assert np.all(new_state_ignore == true_state_forecast) + assert np.all(new_cov_ignore == true_cov_forecast) + + with pytest.raises(TypeError): + _ = model.dlm_full_update(observations[:, [0]], dlm_state[:, [0]], cov_state, mode="error") + + with pytest.raises(AttributeError): + _ = model.calculate_mahalanobis_distance( + observations[:, :forecast_horizon], dlm_state[:, [0]], cov_state, forecast_steps=-10 + ) + with pytest.raises(AttributeError): + _ = model.calculate_mahalanobis_distance( + observations[:, : (forecast_horizon + 2)], dlm_state[:, [0]], cov_state, forecast_steps=forecast_horizon + ) + + with pytest.raises(AttributeError): + _ = model.calculate_mahalanobis_distance( + observations[:, :forecast_horizon], dlm_state[:, :3], cov_state, forecast_steps=forecast_horizon + ) + + for i in range(n_time): + dlm_state[:, [i + 1]], cov_state, _ = model.dlm_full_update( + observations[:, [i]], dlm_state[:, [i]], cov_state, mode="learn" + ) + if i + forecast_horizon < n_time: + ( + mhd_overall[i + forecast_horizon], + mhd_per_beam[:, [i + forecast_horizon]], + ) = model.calculate_mahalanobis_distance( + observations[:, i : (i + forecast_horizon)], + dlm_state[:, [i]], + cov_state, + forecast_steps=forecast_horizon, + return_statistics=False, + ) + + temp_mhd = mhd_overall[~np.isnan(mhd_overall)].flatten() + overall_test_result = cramervonmises( + temp_mhd[::forecast_horizon], "chi2", args=(forecast_horizon * model.nof_observables,) + ) + + # plt.figure() + # plt.hist(temp_mhd, density=True, cumulative=True, bins=100, histtype='step', color='k') + # plt.hist(temp_mhd[::forecast_horizon], density=True, cumulative=True, bins=100, histtype='step', color='r') + # x = np.linspace(start=0, stop=np.nanmax(mhd_overall), num=100) + # plt.plot(x, chi2.cdf(x, df=forecast_horizon * model.nof_observables), '-g') + + overall_test_fail = overall_test_result.pvalue < 0.05 + per_beam_test_fail = np.zeros(nof_observables).astype(bool) + + for beam in range(model.nof_observables): + temp_value = mhd_per_beam[beam, :].flatten() + temp_mhd = temp_value[~np.isnan(temp_value)] + test_result_beam = cramervonmises(temp_mhd[::forecast_horizon], "chi2", args=(forecast_horizon,)) + per_beam_test_fail[beam] = test_result_beam.pvalue < 0.05 + + return overall_test_fail, per_beam_test_fail + + +@pytest.mark.parametrize( + "nof_observables, order, rho, forecast_horizon", + [ + (1, 0, 0.8, 1), + (1, 1, 0.8, 1), + (2, 0, 0.8, 1), + (1, 0, 0.8, 10), + (2, 1, 0.8, 10), + (2, 0, 0.8, 10), + (3, 2, 0.8, 10), + ], +) +def test_full_dlm_update_and_mahalanobis_distance(nof_observables, order, rho, forecast_horizon): + """Function to perform multiple runs of the test to check full DLM update and mahalanobis distance calculation. + + Multiple runs are carried out because of the stochastics in the methods under test + Eventually we check if more than half of the runs pass the test which would indicate good working code. + If less than half pass we feel like there is a bug in the code. + + Args: + nof_observables (int): Number of observables + order (int): Order of the polynomial DLM (0==constant, 1==linear, etc.) + rho (float): Correlation parameter to use in creation of W matrix + forecast_horizon (int): Maximum forecast step ahead + + """ + nof_tests = 5 + overall_test = np.zeros(nof_tests) + per_beam_test = np.zeros((nof_observables, nof_tests)) + for run in range(nof_tests): + overall_test[run], per_beam_test[:, run] = full_dlm_update_and_mahalanobis_distance( + nof_observables, order, rho, forecast_horizon + ) + + assert np.count_nonzero(overall_test) <= nof_tests / 2 + assert np.all(np.count_nonzero(per_beam_test, axis=1) <= nof_tests / 2) + + +@pytest.mark.parametrize("nof_observables, order, forecast_horizon", [(2, 0, 10), (2, 1, 10), (2, 2, 10)]) +def test_missing_value_mahalanobis_distance(nof_observables, order, forecast_horizon): + """Function to test if missing values in the observations are handled correctly. + + The functions hsould return a nan value in the one step ahead error where applicable. + We create 2 identical beams and remove data for a few timesteps of 1 of the beams. The mahalanobis distance should + be lower for the beam with missing data because effectively that error has been set to 0 in the processing and also + the number of degrees of freedom should be lower too. For the Mahalanobis distance check we add 1 to the start idx + as the first entry should still be the same but due to machine precision it might give a different value which + doesn't pass the test. Also, we subtract 2 from the end index for the same reason and to ensure the forecast + horizon 'covers' the missing data period and the test is actually valid. + Due to the stochastic nature of the process we can't really perform a good test to check validity of the chi2 + statistic, but visual inspection of plots have concluded it gives a sensible output. + + Args: + nof_observables (int): Number of observables + order (int): Order of the polynomial DLM (0==constant, 1==linear, etc.) + forecast_horizon (int): Maximum forecast step ahead + + """ + n_time = 100 + start_idx_missing = 50 + end_idx_missing = 55 + + model = DLM() + model.polynomial_f_g(nof_observables=nof_observables, order=order) + init_state, model.v_matrix, model.w_matrix = create_init_state_and_covariance_matrices( + nof_observables, order, rho=0.8 + ) + model.calculate_g_power(max_power=forecast_horizon) + + _, observations = model.simulate_data(init_state=init_state, nof_timesteps=n_time) + observations[1, :] = observations[0, :].copy() + observations[1, start_idx_missing:end_idx_missing] = np.nan + + cov_state = np.zeros(model.w_matrix.shape) + dlm_state = np.empty((model.nof_state_parameters, n_time + 1)) + dlm_state[:, 0] = init_state.flatten() + dlm_state[int(model.nof_state_parameters / 2) :, 0] = dlm_state[: int(model.nof_state_parameters / 2), 0] + error = np.zeros((model.nof_observables, n_time)) + mhd_overall = np.empty(n_time) + mhd_per_beam = np.empty((model.nof_observables, n_time)) + mhd_overall[:] = np.nan + mhd_per_beam[:] = np.nan + + dof_overall = np.zeros(n_time) + dof_per_beam = np.zeros((model.nof_observables, n_time)) + chi2_overall = np.zeros(n_time) + chi2_per_beam = np.zeros((model.nof_observables, n_time)) + + for i in range(n_time): + dlm_state[:, [i + 1]], cov_state, error[:, [i]] = model.dlm_full_update( + observations[:, [i]], dlm_state[:, [i]], cov_state, mode="learn" + ) + if i + forecast_horizon < n_time: + ( + mhd_overall[i + forecast_horizon], + mhd_per_beam[:, [i + forecast_horizon]], + dof_overall[i + forecast_horizon], + dof_per_beam[:, [i + forecast_horizon]], + chi2_overall[i + forecast_horizon], + chi2_per_beam[:, [i + forecast_horizon]], + ) = model.calculate_mahalanobis_distance( + observations[:, i : (i + forecast_horizon)], + dlm_state[:, [i]], + cov_state, + forecast_steps=forecast_horizon, + return_statistics=True, + ) + + assert np.all(np.isnan(error[1, start_idx_missing:end_idx_missing])) + assert np.all( + mhd_per_beam[1, start_idx_missing + 1 : end_idx_missing + forecast_horizon - 2] + <= mhd_per_beam[0, start_idx_missing + 1 : end_idx_missing + forecast_horizon - 2] + ) + assert np.all( + dof_per_beam[1, start_idx_missing : end_idx_missing + forecast_horizon - 2] + <= dof_per_beam[0, start_idx_missing : end_idx_missing + forecast_horizon - 2] + ) + + +@pytest.mark.parametrize("nof_observables, order, forecast_horizon", [(2, 0, 10), (2, 1, 10), (2, 2, 10)]) +def test_missing_value_updating(nof_observables, order, forecast_horizon): + """Function to test if updating of the dlm works correctly when missing values in the observations are present. + + When no observation is present we should set the posterior equal to the prior for that variable, so we check if the + state evolves accordingly. + Checking if variance of observation estimate which has missing data is monotonically increasing over missing data + period. + Finally, checking if the implementation is correct by running a dlm model without any nan values and comparing the + state values of interest on equality to ensure the nan updating does not affect the non-nan values. + + Args: + nof_observables (int): Number of observables + order (int): Order of the polynomial DLM (0==constant, 1==linear, etc.) + forecast_horizon (int): Maximum forecast step ahead + + """ + n_time = 100 + start_idx_missing = 50 + end_idx_missing = 55 + + model = DLM() + model.polynomial_f_g(nof_observables=nof_observables, order=order) + init_state, model.v_matrix, model.w_matrix = create_init_state_and_covariance_matrices( + nof_observables, order, rho=0 + ) + model.calculate_g_power(max_power=forecast_horizon) + + _, observations = model.simulate_data(init_state=init_state, nof_timesteps=n_time) + observations[1, :] = observations[0, :].copy() + observations_no_nan = observations.copy() + observations[1, start_idx_missing:end_idx_missing] = np.nan + + cov_state = np.zeros((model.w_matrix.shape[0], model.w_matrix.shape[1], n_time + 1)) + dlm_state = np.empty((model.nof_state_parameters, n_time + 1)) + dlm_state[:, 0] = init_state.flatten() + dlm_state[int(model.nof_state_parameters / 2) :, 0] = dlm_state[: int(model.nof_state_parameters / 2), 0] + error = np.zeros((model.nof_observables, n_time)) + + cov_state_no_nan = cov_state.copy() + dlm_state_no_nan = dlm_state.copy() + error_no_nan = error.copy() + + for i in range(n_time): + dlm_state[:, [i + 1]], cov_state[:, :, i + 1], error[:, [i]] = model.dlm_full_update( + observations[:, [i]], dlm_state[:, [i]], cov_state[:, :, i], mode="learn" + ) + dlm_state_no_nan[:, [i + 1]], cov_state_no_nan[:, :, i + 1], error_no_nan[:, [i]] = model.dlm_full_update( + observations_no_nan[:, [i]], dlm_state_no_nan[:, [i]], cov_state_no_nan[:, :, i], mode="learn" + ) + + for idx in range(end_idx_missing - start_idx_missing): + temp_prior = model.g_matrix @ dlm_state[:, start_idx_missing + idx] + temp_posterior = dlm_state[:, start_idx_missing + idx + 1] + assert np.allclose( + temp_prior[int(model.nof_state_parameters / 2) :], temp_posterior[int(model.nof_state_parameters / 2) :] + ) + + variance_observations = np.zeros((model.nof_observables, n_time + 1)) + for idx in range(n_time + 1): + temp_matrix = model.f_matrix.T @ cov_state[:, :, idx] @ model.f_matrix + variance_observations[:, idx] = np.diag(temp_matrix) + difference = np.diff(variance_observations[1, :]) + + assert np.all(difference[start_idx_missing:end_idx_missing] > 0) + + state_idx = int(model.nof_state_parameters / 2) + assert np.allclose(dlm_state[:state_idx, :], dlm_state_no_nan[:state_idx, :]) + assert np.allclose(cov_state[:state_idx, :state_idx, :], cov_state_no_nan[:state_idx, :state_idx, :]) + assert np.allclose(error[0, :], error_no_nan[0, :]) + + +@pytest.mark.parametrize("nof_observables, order, forecast_horizon", [(2, 0, 10), (2, 1, 10), (2, 2, 10)]) +def test_full_covariance_matrix(nof_observables, order, forecast_horizon): + """Function to test if we correctly construct the full covariance matrix. + + Compares the forecast covariance matrix calculated using the power formula from book with the 'standard' + method of calculating it recursively. Note that the observation variance contribution to the diagonal blocks seems + to be missing from the power formula version, this has been accounted for here. + + Args: + nof_observables (int): Number of observables + order (int): Order of the polynomial DLM (0==constant, 1==linear, etc.) + forecast_horizon (int): Maximum forecast step ahead + + """ + n_time = 20 + + model = DLM() + model.polynomial_f_g(nof_observables=nof_observables, order=order) + init_state, model.v_matrix, model.w_matrix = create_init_state_and_covariance_matrices( + nof_observables, order, rho=0.8 + ) + model.calculate_g_power(max_power=forecast_horizon) + + _, observations = model.simulate_data(init_state=init_state, nof_timesteps=n_time) + + cov_state = np.zeros((model.w_matrix.shape[0], model.w_matrix.shape[1], n_time + 1)) + dlm_state = np.empty((model.nof_state_parameters, n_time + 1)) + dlm_state[:, 0] = init_state.flatten() + dlm_state[int(model.nof_state_parameters / 2) :, 0] = dlm_state[: int(model.nof_state_parameters / 2), 0] + error = np.zeros((model.nof_observables, n_time)) + + for i in range(n_time): + dlm_state[:, [i + 1]], cov_state[:, :, i + 1], error[:, [i]] = model.dlm_full_update( + observations[:, [i]], dlm_state[:, [i]], cov_state[:, :, i], mode="learn" + ) + + r_t_k, q_t_k = model.forecast_covariance( + c_matrix=cov_state[:, :, i], forecast_steps=np.array(range(forecast_horizon)) + 1 + ) + full_cov_model = model.create_full_covariance(r_t_k=r_t_k, q_t_k=q_t_k, forecast_steps=forecast_horizon) + full_cov_test = np.zeros(full_cov_model.shape) + + base_idx = np.array(range(model.nof_observables)) * forecast_horizon + + for k in np.array(range(forecast_horizon)) + 1: + for v in range(forecast_horizon - k + 1): + matrix_idx = np.ix_(base_idx + k - 1 + v, base_idx + k - 1) + matrix_idx_transpose = np.ix_(base_idx + k - 1, base_idx + k - 1 + v) + value = model.f_matrix.T @ model.g_power[:, :, v] @ r_t_k[:, :, k - 1] @ model.f_matrix + if v == 0: + value = value + model.v_matrix + full_cov_test[matrix_idx] = value + full_cov_test[matrix_idx_transpose] = value + + assert np.allclose(full_cov_model, full_cov_test) + + +def test_dlm_full_update_example(): + """Testing if implementation gives same output as the KURIT example from Harrison and West. + + See table 2.1 on page 41. + We use an uncorrelated duplication of the 1D input to check if the matrix multiplication works well. We needed + to round some values in between steps in order to replicate the results from the book. 2 minor things then remain: + - in dlm_state we changed the check value from 142.6 (from the example) to 142.7 (6th entry) which is the + answer we are getting. + - in cov_state we changed the test value from 20 to 21 for the last element for a similar reason. + Given the rest is giving exactly the same results we are confident this works. Our assumption is that the numbers + presented in the book table are rounded before being propagated to the next stage of the calculation, giving rise + to the differences. + + """ + n_time = 9 + + model = DLM() + model.f_matrix = np.array([[1, 0], [0, 1]]) + model.g_matrix = np.array([[1, 0], [0, 1]]) + model.calculate_g_power(max_power=1) + model.v_matrix = np.array([[100, 0], [0, 100]]) + model.w_matrix = np.array([[5, 0], [0, 5]]) + + cov_state = np.zeros((model.w_matrix.shape[0], model.w_matrix.shape[1], n_time + 1)) + cov_state[:, :, 0] = np.array([[400, 0], [0, 400]]) + + dlm_state = np.empty((model.nof_state_parameters, n_time + 1)) + dlm_state[:, [0]] = np.array([[130], [130]]) + + observations = np.array( + [[150, 136, 143, 154, 135, 148, 128, 149, 146], [150, 136, 143, 154, 135, 148, 128, 149, 146]] + ) + + error = np.zeros((model.nof_observables, n_time)) + r_t_k = np.zeros((model.nof_state_parameters, model.nof_state_parameters, n_time)) + q_t_k = np.zeros((model.nof_observables, model.nof_observables, n_time)) + + for i in range(n_time): + r_t_k[:, :, [i]], q_t_k[:, :, [i]] = model.forecast_covariance(c_matrix=cov_state[:, :, i], forecast_steps=1) + + dlm_state[:, [i + 1]], cov_state[:, :, i + 1], error[:, [i]] = model.dlm_full_update( + observations[:, [i]], dlm_state[:, [i]], cov_state[:, :, i], mode="learn" + ) + + dlm_state[:, [i + 1]] = np.round(dlm_state[:, [i + 1]], decimals=1) + cov_state[:, :, i + 1] = np.round(cov_state[:, :, i + 1], decimals=0) + error[:, [i]] = np.round(error[:, [i]], decimals=1) + + dlm_state = np.round(dlm_state, decimals=1) + q_t_k = np.round(q_t_k, decimals=0) + r_t_k = np.round(r_t_k, decimals=0) + error = np.round(error, decimals=1) + cov_state = np.round(cov_state, decimals=0) + + adaptive_coefficient_0 = np.round(r_t_k[0, 0, :] / q_t_k[0, 0, :], decimals=2) + adaptive_coefficient_1 = np.round(r_t_k[1, 1, :] / q_t_k[1, 1, :], decimals=2) + + assert np.allclose(q_t_k[0, 0, :], np.array([505, 185, 151, 139, 133, 130, 128, 127, 126])) + assert np.allclose( + dlm_state[0, :], np.array([130.0, 146.0, 141.4, 141.9, 145.3, 142.7, 143.9, 140.4, 142.2, 143.0]) + ) + assert np.allclose(adaptive_coefficient_0, np.array([0.80, 0.46, 0.34, 0.28, 0.25, 0.23, 0.22, 0.21, 0.21])) + assert np.allclose(error[0, :], np.array([20.0, -10.0, 1.6, 12.1, -10.3, 5.3, -15.9, 8.6, 3.8])) + assert np.allclose(cov_state[0, 0, :], np.array([400, 80, 46, 34, 28, 25, 23, 22, 21, 21])) + + assert np.allclose(error[0, :], error[1, :]) + assert np.allclose(dlm_state[0, :], dlm_state[1, :]) + assert np.allclose(cov_state[0, 0, :], cov_state[1, 1, :]) + assert np.all(cov_state[0, 1, :] == 0) + assert np.all(cov_state[1, 0, :] == 0) + assert np.allclose(adaptive_coefficient_0, adaptive_coefficient_1) + assert np.allclose(r_t_k[0, 0, :], r_t_k[1, 1, :]) + assert np.all(r_t_k[0, 1, :] == 0) + assert np.all(r_t_k[1, 0, :] == 0) + assert np.allclose(q_t_k[0, 0, :], q_t_k[1, 1, :]) + assert np.all(q_t_k[0, 1, :] == 0) + assert np.all(q_t_k[1, 0, :] == 0) diff --git a/tests/test_gas_species.py b/tests/test_gas_species.py new file mode 100644 index 0000000..827d27b --- /dev/null +++ b/tests/test_gas_species.py @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for gas species superclass. + +This module provides tests for the gas species superclass in pyELQ + +""" + + +import numpy as np +import pytest + +from pyelq.gas_species import C2H6, C3H8, CH4, CO2, NO2 + + +@pytest.mark.parametrize("gas_species", [CH4, C2H6, C3H8, CO2, NO2]) +def test_consistency_emission_rate(gas_species): + """Basic test to check consistency in gas species methods. + + Checks density conversions to/from kg/hr and m^3 /s to check match up. + Checks with default (STP) temp and pressure and then with randomly close temp are pressure. + + Args: + gas_species (pyelq.gas_species): gas species type to check + + """ + emission_m3s_start = np.random.rand(5, 1) * 10 + emission_kghr_start = np.random.rand(5, 1) * 1000 + alternate_temperature = 273.15 * (np.random.rand(1) + 0.5) + alternate_pressure = 100 * (np.random.rand(1) + 0.5) + gas_object = gas_species() + + kghr_intermediate = gas_object.convert_emission_m3s_to_kghr(emission_m3s_start) + m3s_result = gas_object.convert_emission_kghr_to_m3s(kghr_intermediate) + assert np.allclose(emission_m3s_start, m3s_result) + + kghr_intermediate = gas_object.convert_emission_m3s_to_kghr( + emission_m3s_start, temperature=alternate_temperature, pressure=alternate_pressure + ) + m3s_result = gas_object.convert_emission_kghr_to_m3s( + kghr_intermediate, temperature=alternate_temperature, pressure=alternate_pressure + ) + assert np.allclose(emission_m3s_start, m3s_result) + + m3s_intermediate = gas_object.convert_emission_kghr_to_m3s(emission_kghr_start) + kghr_result = gas_object.convert_emission_m3s_to_kghr(m3s_intermediate) + assert np.allclose(emission_kghr_start, kghr_result) + + m3s_intermediate = gas_object.convert_emission_kghr_to_m3s( + emission_kghr_start, temperature=alternate_temperature, pressure=alternate_pressure + ) + kghr_result = gas_object.convert_emission_m3s_to_kghr( + m3s_intermediate, temperature=alternate_temperature, pressure=alternate_pressure + ) + assert np.allclose(emission_kghr_start, kghr_result) + + +@pytest.mark.parametrize( + "gas_species, temperature, density", + [ + (CH4, 293.15, 0.668), + (CH4, 273.15, 0.717), + (C2H6, 273.15, 1.3547), + (C3H8, 293.15, 1.8988), + (C3H8, 303.15, 1.8316), + (CO2, 293.15, 1.842), + (CO2, 273.15, 1.977), + (NO2, 273.15, 2.05), + ], +) +def test_density_calculation(gas_species, temperature, density): + """Test density calculation against known values for a set of gases https://www.engineeringtoolbox.com/gas-density- + d_158.html https://encyclopedia.airliquide.com/ethane#properties + https://encyclopedia.airliquide.com/propane#properties https://www.thermopedia.com/content/980/ + + Assumes atmospheric pressure of 101.325 kPa + + Args: + gas_species (pyelq.gas_species): gas species type to check + temperature (float): temperature + density (float): true density from reference. + + """ + gas_object = gas_species() + result = gas_object.gas_density(temperature=temperature, pressure=101.325) + assert np.isclose(result, density, rtol=1e-2) + + +@pytest.mark.parametrize("gas_species", [CH4, C2H6, C3H8, CO2, NO2]) +def test_name_and_formula(gas_species): + """Test to see if name and formula give back a string output.""" + gas_object = gas_species() + assert isinstance(gas_object.name, str) + assert isinstance(gas_object.formula, str) diff --git a/tests/test_gaussian_plume.py b/tests/test_gaussian_plume.py new file mode 100644 index 0000000..97ac66b --- /dev/null +++ b/tests/test_gaussian_plume.py @@ -0,0 +1,610 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for gaussian plume module. + +This module provides various tests for the Gaussian plume related code part of pyELQ + +""" + +from copy import deepcopy + +import numpy as np +import pandas as pd +import pytest + +from pyelq.coordinate_system import ENU +from pyelq.dispersion_model.gaussian_plume import GaussianPlume +from pyelq.gas_species import CH4 +from pyelq.meteorology import Meteorology, MeteorologyGroup +from pyelq.sensor.beam import Beam +from pyelq.sensor.satellite import Satellite +from pyelq.sensor.sensor import Sensor, SensorGroup +from pyelq.source_map import SourceMap + + +@pytest.fixture(name="met_object") +def fixture_met_object(): + """Fixture to define a meteorology object.""" + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + loc_in = np.array([[-1, -1, -1], [0, 0, 0], [1, 1, 1]]) + location.from_array(loc_in) + time = pd.arrays.DatetimeArray( + pd.date_range(pd.Timestamp.fromisoformat("2022-01-01 00:00:00"), periods=loc_in.shape[0], freq="s") + )[:, None] + met_object = Meteorology() + met_object.location = location + met_object.time = time + met_object.u_component = np.random.randint(low=1, high=5, size=time.shape) + met_object.v_component = np.random.randint(low=1, high=5, size=time.shape) + met_object.calculate_wind_direction_from_uv() + met_object.calculate_wind_speed_from_uv() + met_object.temperature = np.random.randint(low=270, high=275, size=time.shape) + met_object.pressure = np.random.randint(low=99, high=103, size=time.shape) + met_object.wind_turbulence_horizontal = 5 + 10 * np.random.random(size=time.shape) + met_object.wind_turbulence_vertical = 5 + 10 * np.random.random(size=time.shape) + return met_object + + +@pytest.fixture(name="met_object_single") +def fixture_met_object_single(): + """Fixture to define a meteorology object with a single observation.""" + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + loc_in = np.array([[0, 0, 0]]) + location.from_array(loc_in) + time = pd.arrays.DatetimeArray( + pd.date_range(pd.Timestamp.fromisoformat("2022-01-01 00:00:00"), periods=loc_in.shape[0], freq="s") + )[:, None] + met_object = Meteorology() + met_object.location = location + met_object.time = time + met_object.u_component = np.random.randint(low=1, high=5, size=time.shape) + met_object.v_component = np.random.randint(low=1, high=5, size=time.shape) + met_object.calculate_wind_direction_from_uv() + met_object.calculate_wind_speed_from_uv() + met_object.temperature = np.random.randint(low=270, high=275, size=time.shape) + met_object.pressure = np.random.randint(low=99, high=103, size=time.shape) + met_object.wind_turbulence_horizontal = 5 * np.ones(time.shape) + met_object.wind_turbulence_vertical = 5 * np.ones(time.shape) + return met_object + + +@pytest.fixture(name="sensor_object") +def fixture_sensor_object(): + """Fixture to define a generic sensor object.""" + sensor_object = Sensor() + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + location.from_array(np.array([[25, 0, 0]])) + sensor_object.location = location + time = pd.arrays.DatetimeArray( + pd.date_range(pd.Timestamp.fromisoformat("2022-01-01 00:00:00"), periods=5, freq="ns") + )[:, None] + sensor_object.time = time + sensor_object.concentration = np.zeros(time.size) + sensor_object.label = "Generic" + return sensor_object + + +@pytest.fixture(name="drone_object") +def fixture_drone_object(): + """Fixture to define a drone sensor object.""" + sensor_object = Sensor() + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + loc_in = np.array([[0, 50, 0], [25, 25, 0], [50, 0, 0]]) + location.from_array(loc_in) + sensor_object.location = location + time = pd.arrays.DatetimeArray( + pd.date_range(pd.Timestamp.fromisoformat("2022-01-01 00:00:00"), periods=loc_in.shape[0], freq="s") + )[:, None] + sensor_object.time = time + sensor_object.concentration = np.zeros(time.size) + sensor_object.label = "Generic" + return sensor_object + + +@pytest.fixture(name="beam_object") +def fixture_beam_object(): + """Fixture to define a beam sensor object.""" + beam_location = ENU(ref_latitude=0, ref_longitude=0, ref_altitude=0) + beam_location.from_array(np.array([[24.99, 0, 0], [25.01, 0, 0]])) + beam_object = Beam() + beam_object.location = beam_location + time = pd.arrays.DatetimeArray( + pd.date_range(pd.Timestamp.fromisoformat("2022-01-01 00:00:00"), periods=4, freq="ns") + )[:, None] + beam_object.time = time + beam_object.concentration = np.zeros(time.size) + beam_object.label = "Beam" + return beam_object + + +@pytest.fixture(name="satellite_object") +def fixture_satellite_object(): + """Fixture to define a satellite sensor object.""" + satellite_location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + temp_array = np.array( + [[-25, 25, 0], [0, 25, 0], [25, 25, 0], [25, 0, 0], [25, -25, 0], [0, -25, 0], [-25, -25, 0], [-25, 0, 0]] + ) + satellite_location.from_array(temp_array) + satellite_object = Satellite() + satellite_object.location = satellite_location + time = None + satellite_object.time = time + satellite_object.concentration = np.zeros(temp_array.shape[0]) + satellite_object.label = "Satellite" + return satellite_object + + +@pytest.mark.parametrize("sourcemap_type", ["central", "hypercube"]) +def test_compute_coupling_array(sourcemap_type): + """Test to check compute_coupling_array method. + + Tests two configurations: + 1- Places a sensor on the upwind edge of the possible source domain, computes + the couplings to randomly-generated sources, and then checks that the coupling values + are all 0. + 2- Generates a random number of sensor locations, at random downwind locations, then checks + that the computed coupling array has the correct shape, that all values are >=0, and that + values for which the raw coupling is less than the minimum contribution have been correctly + set to 0. + + """ + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + source_object = SourceMap() + source_object.generate_sources( + location, sourcemap_type=sourcemap_type, nof_sources=3, sourcemap_limits=np.array([[-1, 1], [-1, 1], [-1, 1]]) + ) + plume_object = GaussianPlume(source_map=source_object) + coupling_array = plume_object.compute_coupling_array( + sensor_x=np.array([-1]), + sensor_y=np.array([0]), + sensor_z=np.array([0]), + source_z=np.array([0]), + wind_speed=np.array([0]), + theta=np.array([0]), + wind_turbulence_horizontal=np.array([5]), + wind_turbulence_vertical=np.array([5]), + gas_density=1, + ) + assert np.all(coupling_array == 0) + + random_shape = np.random.randint(1, 5, 3) + coupling_array = plume_object.compute_coupling_array( + sensor_x=np.random.random(random_shape) * 6 - 3, + sensor_y=np.random.random(random_shape) * 6 - 3, + sensor_z=np.random.random(random_shape) * 6 - 3, + source_z=np.array(0), + wind_speed=np.random.randint(1, 5), + theta=np.random.random(1) * 2 * np.pi, + wind_turbulence_horizontal=np.array([5]), + wind_turbulence_vertical=np.array([5]), + gas_density=1, + ) + assert np.all(coupling_array.shape == random_shape) + assert np.all(coupling_array >= 0) + assert not np.any(np.logical_and(coupling_array > 0, coupling_array < plume_object.minimum_contribution)) + + +@pytest.mark.parametrize("sourcemap_type", ["central", "hypercube"]) +@pytest.mark.parametrize("coordinate_type", ["ENU", "LLA"]) +def test_compute_coupling_single_sensor_non_satellite( + sourcemap_type, coordinate_type, met_object, sensor_object, beam_object, met_object_single, drone_object +): + """Test to check compute_coupling_single_sensor method for a non satellite sensor. + + Coordinate_type is varied from ENU to LLA to check the way coupling handles the coordinate conversions. + + Takes in a generic sensor object and a beam sensor object. The observation locations for the generic sensor object + are defined at the mid-points of the beam sensors. Performs the following tests: 1- Checks the shape of the + coupling array produced (in both the general sensor and beam cases). 2- Compares the beam sensor coupling values + with point sensor values evaluated at the midpoints of the beams, checks that they are close (for the first + four time points). 3- Checks that inputting a sensor with multiple location values and a single meteorological + observation gives the correct shape, and calculates the right values used to check the drone type sensor + case. + + """ + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + source_object = SourceMap() + source_object.generate_sources( + location, + sourcemap_type=sourcemap_type, + nof_sources=3, + sourcemap_limits=np.array([[-100, 100], [-100, 100], [-100, 100]]), + ) + + if coordinate_type == "LLA": + source_object.location = source_object.location.to_lla() + + plume_object = GaussianPlume(source_map=source_object) + plume_object.minimum_contribution = 1e-15 + + coupling = plume_object.compute_coupling_single_sensor( + sensor_object=sensor_object, meteorology=met_object, gas_object=None + ) + assert coupling.shape == (sensor_object.nof_observations, source_object.nof_sources) + + coupling_beam = plume_object.compute_coupling_single_sensor( + sensor_object=beam_object, meteorology=met_object, gas_object=None + ) + + assert coupling_beam.shape == (beam_object.nof_observations, source_object.nof_sources) + assert np.allclose(coupling_beam, coupling[:-1, :]) + + coupling_drone = plume_object.compute_coupling_single_sensor( + sensor_object=drone_object, meteorology=met_object_single, gas_object=None + ) + + assert coupling_drone.shape == (drone_object.nof_observations, source_object.nof_sources) + + drone_location = drone_object.location.to_array() + for idx in range(drone_object.nof_observations): + drone_object_single = deepcopy(drone_object) + + drone_object_single.time = drone_object.time[idx, :] + drone_object_single.concentration = drone_object.concentration[[idx]] + drone_object_single.location.from_array(drone_location[[idx], :]) + + coupling_drone_single = plume_object.compute_coupling_single_sensor( + sensor_object=drone_object_single, meteorology=met_object_single, gas_object=None + ) + assert np.allclose(coupling_drone[idx, :], coupling_drone_single) + + +@pytest.mark.parametrize("sourcemap_type", ["central", "hypercube"]) +@pytest.mark.parametrize("inclusion_radius", [26, 1]) +def test_compute_coupling_single_sensor_satellite(sourcemap_type, inclusion_radius, met_object, satellite_object): + """Test to check compute_coupling_single_sensor method for a satellite sensor. + + Checks for consistent shapes, i.e. length of list output and shapes of array with and without usage of + inclusion_idx. + + """ + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + source_object = SourceMap() + source_object.generate_sources( + location, + sourcemap_type=sourcemap_type, + nof_sources=3, + sourcemap_limits=np.array([[-100, 100], [-100, 100], [-100, 100]]), + ) + + plume_object = GaussianPlume(source_map=source_object) + plume_object.minimum_contribution = 1e-15 + + coupling_sat = plume_object.compute_coupling_single_sensor( + sensor_object=satellite_object, meteorology=met_object, gas_object=None + ) + + assert len(coupling_sat) == source_object.nof_sources + assert np.all([value.shape == (satellite_object.nof_observations, 1) for value in coupling_sat]) + + source_object.calculate_inclusion_idx(sensor_object=satellite_object, inclusion_radius=inclusion_radius) + coupling_sat_subset = plume_object.compute_coupling_single_sensor( + sensor_object=satellite_object, meteorology=met_object, gas_object=None + ) + assert len(coupling_sat_subset) == source_object.nof_sources + assert np.all( + [ + coupling_sat_subset[value].size == source_object.inclusion_n_obs[value] + for value in range(source_object.nof_sources) + ] + ) + + +@pytest.mark.parametrize("sourcemap_type", ["central", "hypercube"]) +def test_coupling_non_stp(sourcemap_type, met_object, sensor_object): + """Test to check if magnitude of coupling value changes correctly when using a non-Standard Pressure and + Temperature. + + Performs two tests: + 1- The temperature is decreased to 100 degrees below the default: we check that the coupling + values all decrease as expected (due to higher density). + 2- The pressure is decreased to 80 kPa below the default: we check that the coupling values + all increase as expected (due to lower density). + + """ + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + source_object = SourceMap() + source_object.generate_sources( + location, + sourcemap_type=sourcemap_type, + nof_sources=3, + sourcemap_limits=np.array([[-100, 100], [-100, 100], [-100, 100]]), + ) + + plume_object = GaussianPlume(source_map=source_object) + plume_object.minimum_contribution = 1e-15 + + gas_species = CH4() + met_object.temperature = np.array(273.15) + met_object.pressure = np.array(101.325) + coupling_stp = plume_object.compute_coupling_single_sensor( + sensor_object=sensor_object, meteorology=met_object, gas_object=gas_species + ) + + met_object.temperature = np.array(273.15 - 100) + met_object.pressure = None + coupling_low_temp = plume_object.compute_coupling_single_sensor( + sensor_object=sensor_object, meteorology=met_object, gas_object=gas_species + ) + assert np.all(coupling_stp >= coupling_low_temp) + + met_object.temperature = None + met_object.pressure = np.array(101.325 - 80) + coupling_low_pressure = plume_object.compute_coupling_single_sensor( + sensor_object=sensor_object, meteorology=met_object, gas_object=gas_species + ) + + assert np.all(coupling_stp <= coupling_low_pressure) + + +def test_not_implemented_error(): + """Simple test to check if correct error is thrown.""" + with pytest.raises(NotImplementedError): + plume_object = GaussianPlume(source_map=SourceMap()) + plume_object.compute_coupling_single_sensor(sensor_object=None, meteorology=Meteorology()) + + +def test_compute_coupling(monkeypatch): + """Test the high level function to see if the return is of the correct type.""" + + def mock_coupling(*args, **kwargs): + """Return an empty array instead of computing the actual coupling.""" + return np.array([]) + + monkeypatch.setattr(GaussianPlume, "compute_coupling_single_sensor", mock_coupling) + + plume_object = GaussianPlume(source_map=SourceMap()) + coupling_object = plume_object.compute_coupling(sensor_object=Sensor(), meteorology_object=Meteorology()) + assert isinstance(coupling_object, np.ndarray) + + sensor_group = SensorGroup() + sensor = Sensor() + sensor.label = "sensor_1" + sensor_group.add_sensor(sensor) + sensor = Sensor() + sensor.label = "sensor_2" + sensor_group.add_sensor(sensor) + coupling_object = plume_object.compute_coupling(sensor_object=sensor_group, meteorology_object=Meteorology()) + assert isinstance(coupling_object, dict) + assert np.all(coupling_object.keys() == sensor_group.keys()) + assert np.all([isinstance(value, np.ndarray) for value in coupling_object.values()]) + + coupling_object = plume_object.compute_coupling( + sensor_object=sensor_group, meteorology_object=Meteorology(), output_stacked=True + ) + assert isinstance(coupling_object, np.ndarray) + + object_1 = Meteorology() + object_1.label = "sensor_1" + object_2 = Meteorology() + object_2.label = "sensor_2" + group_object = MeteorologyGroup() + group_object.add_object(object_1) + group_object.add_object(object_2) + coupling_object = plume_object.compute_coupling(sensor_object=sensor_group, meteorology_object=group_object) + assert isinstance(coupling_object, dict) + assert np.all(coupling_object.keys() == sensor_group.keys()) + assert np.all([isinstance(value, np.ndarray) for value in coupling_object.values()]) + + with pytest.raises(TypeError): + plume_object.compute_coupling(sensor_object=None, meteorology_object=Meteorology()) + + with pytest.raises(TypeError): + plume_object.compute_coupling(sensor_object=sensor, meteorology_object=group_object) + + +@pytest.mark.parametrize("sourcemap_type", ["central", "hypercube"]) +def test_interpolate_meteorology(sourcemap_type, met_object, sensor_object, satellite_object, beam_object): + """Test to check interpolate_meteorology method. + + Tests as follows: + 1- For each sensor type, check that the meteorology interpolation returns the correct + number of values. + 2- Checks that when the horizontal wind turbulence on the met object is a single value, the + interpolate function correctly returns the same value for all entries. + 3- In the generic sensor case only checks when a field (pressure) is set to None, the + interpolate function correctly returns None. + + Note that the specific values returned are not checked, it is assumes that this is being tested + for the underlying interpolation function. + + """ + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + source_object = SourceMap() + source_object.generate_sources( + location, + sourcemap_type=sourcemap_type, + nof_sources=3, + sourcemap_limits=np.array([[-100, 100], [-100, 100], [-100, 100]]), + ) + + plume_object = GaussianPlume(source_map=source_object) + + for temp_sensor in [beam_object, sensor_object]: + return_values = plume_object.interpolate_meteorology( + meteorology=met_object, variable_name="u_component", sensor_object=temp_sensor + ) + assert return_values.shape == (temp_sensor.nof_observations, 1) + + return_values = plume_object.interpolate_meteorology( + meteorology=met_object, variable_name="wind_turbulence_horizontal", sensor_object=temp_sensor + ) + assert return_values.shape == (temp_sensor.nof_observations, 1) + if met_object.wind_turbulence_horizontal.size == 1: + assert np.all(return_values == met_object.wind_turbulence_horizontal) + + return_values = plume_object.interpolate_meteorology( + meteorology=met_object, variable_name="u_component", sensor_object=satellite_object + ) + assert return_values.shape == (1, plume_object.source_map.nof_sources) + + return_values = plume_object.interpolate_meteorology( + meteorology=met_object, variable_name="wind_turbulence_horizontal", sensor_object=satellite_object + ) + assert return_values.shape == (1, plume_object.source_map.nof_sources) + if met_object.wind_turbulence_horizontal.size == 1: + assert np.all(return_values == met_object.wind_turbulence_horizontal) + + met_object.pressure = None + return_values = plume_object.interpolate_meteorology( + meteorology=met_object, variable_name="pressure", sensor_object=sensor_object + ) + assert return_values is None + + +def test_interpolate_all_meteorology(met_object, sensor_object): + """Checks interpolate_all_meteorology for correct output when run_interpolation flag is set to False.""" + plume_object = GaussianPlume(source_map=SourceMap()) + ( + gas_density, + u_interpolated, + v_interpolated, + wind_turbulence_horizontal, + wind_turbulence_vertical, + ) = plume_object.interpolate_all_meteorology( + sensor_object=sensor_object, meteorology=met_object, gas_object=CH4(), run_interpolation=False + ) + assert np.all(gas_density == CH4().gas_density(temperature=met_object.temperature, pressure=met_object.pressure)) + assert np.all(u_interpolated == met_object.u_component) + assert np.all(v_interpolated == met_object.v_component) + assert np.all(wind_turbulence_horizontal == met_object.wind_turbulence_horizontal) + assert np.all(wind_turbulence_vertical == met_object.wind_turbulence_vertical) + + +@pytest.mark.parametrize("sourcemap_type", ["central", "hypercube"]) +def test_calculate_gas_density(sourcemap_type, met_object, sensor_object, satellite_object, beam_object): + """Test to check calculate_gas_density method. + + The following tests are performed (all performed for all sensor types): 1- Checks that when the temperature and + pressure values are fixed for all time (at standard temperature and pressure), the interpolation returns the + density at STP for all times. 2- Checks that the returned vector has the correct shape. 3- Checks + that when the pressure and temperature are set to None, a vector of ones (of the correct shape) is returned + by the interpolate function. + + """ + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + source_object = SourceMap() + source_object.generate_sources( + location, + sourcemap_type=sourcemap_type, + nof_sources=3, + sourcemap_limits=np.array([[-100, 100], [-100, 100], [-100, 100]]), + ) + + met_object.temperature = np.ones_like(met_object.temperature) * 273.15 + met_object.pressure = np.ones_like(met_object.pressure) * 101.325 + + gas_species = CH4() + plume_object = GaussianPlume(source_map=source_object) + return_values = plume_object.calculate_gas_density( + meteorology=met_object, sensor_object=sensor_object, gas_object=gas_species + ) + assert np.all(return_values == gas_species.gas_density()) + assert return_values.shape == (sensor_object.nof_observations, 1) + + return_values = plume_object.calculate_gas_density( + meteorology=met_object, sensor_object=beam_object, gas_object=gas_species + ) + assert np.all(return_values == gas_species.gas_density()) + assert return_values.shape == (beam_object.nof_observations, 1) + + return_values = plume_object.calculate_gas_density( + meteorology=met_object, sensor_object=satellite_object, gas_object=gas_species + ) + assert np.all(return_values == gas_species.gas_density()) + assert return_values.shape == (1, plume_object.source_map.nof_sources) + + met_object.pressure = None + met_object.temperature = None + return_values = plume_object.calculate_gas_density( + meteorology=met_object, sensor_object=sensor_object, gas_object=None + ) + assert np.all(return_values == 1) + assert return_values.shape == (sensor_object.nof_observations, 1) + + return_values = plume_object.calculate_gas_density( + meteorology=met_object, sensor_object=beam_object, gas_object=None + ) + assert np.all(return_values == 1) + assert return_values.shape == (beam_object.nof_observations, 1) + + return_values = plume_object.calculate_gas_density( + meteorology=met_object, sensor_object=satellite_object, gas_object=None + ) + assert np.all(return_values == 1) + assert return_values.shape == (1, plume_object.source_map.nof_sources) + + +def test_source_on_switch(met_object, sensor_object): + """Test to check the implementation of the source_on attribute. + + Hence, the coupling should be 0 when the sources_on switch is False for that observation. We check if any value was + nonzero before using the switch to be sure the switch is the reason the value is 0 after applying it and hence check + the correct working of the switch. + + """ + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + source_object = SourceMap() + source_object.generate_sources( + location, + sourcemap_type="central", + nof_sources=1, + sourcemap_limits=np.array([[-100, 100], [-100, 100], [-100, 100]]), + ) + + plume_object = GaussianPlume(source_map=source_object) + coupling = plume_object.compute_coupling_single_sensor( + sensor_object=sensor_object, meteorology=met_object, gas_object=None + ) + change_point = int(np.floor(sensor_object.nof_observations / 2)) + switch = np.ones(sensor_object.nof_observations) + switch[change_point:] = 0 + sensor_object.source_on = switch.astype(bool) + + coupling_switch = plume_object.compute_coupling_single_sensor( + sensor_object=sensor_object, meteorology=met_object, gas_object=None + ) + + assert np.all(coupling_switch[change_point:] == 0) and np.any(coupling[change_point:] > 0) + + +def test_compute_coverage(): + """Test to check whether the compute coverage function can correctly determine which sources are, or are not, within + the coverage. + + We define some coupling where there are two sources, and one source is coupled half of the time. We then check that + all the inputs work as intended. + + """ + location = ENU(ref_longitude=0, ref_latitude=0, ref_altitude=0) + source_object = SourceMap() + source_object.location = location + source_object.location.east = np.array([-10, 10]) + source_object.location.north = np.array([25, 25]) + source_object.location.up = np.array([0, 0]) + + plume_object = GaussianPlume(source_map=source_object) + + couplings = np.array( + [ + [1, 0], + [0, 0], + [0, 0], + [1, 0], + ] + ) + + coverage = plume_object.compute_coverage(couplings) + assert np.all(np.equal(coverage, np.array([True, False]))) + + coverage = plume_object.compute_coverage(couplings, coverage_threshold=0.3) + assert np.all(np.equal(coverage, np.array([False, False]))) + + coverage = plume_object.compute_coverage(couplings, threshold_function=np.mean, coverage_threshold=0.3) + assert np.all(np.equal(coverage, np.array([False, False]))) + + coverage = plume_object.compute_coverage(couplings, threshold_function=np.mean, coverage_threshold=6) + assert np.all(np.equal(coverage, np.array([True, False]))) diff --git a/tests/test_meteorology.py b/tests/test_meteorology.py new file mode 100644 index 0000000..6330e01 --- /dev/null +++ b/tests/test_meteorology.py @@ -0,0 +1,225 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for meteorology superclass. + +This module provides tests for the meteorology superclass in pyELQ + +""" + +import datetime as dt + +import numpy as np +import pandas as pd +import pytest + +from pyelq.coordinate_system import LLA +from pyelq.meteorology import Meteorology, MeteorologyGroup + + +@pytest.mark.parametrize( + "u_component, v_component, truth", + [ + (0, 1, 1), + (np.sqrt(0.5), np.sqrt(0.5), 1), + (1, 0, 1), + (np.sqrt(0.5), -np.sqrt(0.5), 1), + (0, -1, 1), + (-np.sqrt(0.5), -np.sqrt(0.5), 1), + (-1, 0, 1), + (-np.sqrt(0.5), np.sqrt(0.5), 1), + ], +) +def test_wind_speed(u_component, v_component, truth): + """Basic test to check wind speed calculation from u and v. + + Args: + u_component (float): u component + v_component (float): v component + truth (float): true wind speed + + """ + + met_object = Meteorology() + met_object.u_component = u_component + met_object.v_component = v_component + met_object.calculate_wind_speed_from_uv() + + assert met_object.wind_speed == truth + + +@pytest.mark.parametrize( + "u_component, v_component, truth", + [ + (0, 1, 180), + (np.sqrt(0.5), np.sqrt(0.5), 225), + (1, 0, 270), + (np.sqrt(0.5), -np.sqrt(0.5), 315), + (0, -1, 0), + (-np.sqrt(0.5), -np.sqrt(0.5), 45), + (-1, 0, 90), + (-np.sqrt(0.5), np.sqrt(0.5), 135), + ], +) +def test_wind_direction(u_component, v_component, truth): + """Basic test to check wind direction (from) calculation from u and v. + + Args: + u_component (float): u component + v_component (float): v component + truth (float): true wind direction (from) + + """ + + met_object = Meteorology() + met_object.u_component = u_component + met_object.v_component = v_component + met_object.calculate_wind_direction_from_uv() + + assert met_object.wind_direction == truth + + +def test_nof_observations(): + """Test if nof_observation property works as expected.""" + n_samples = np.random.randint(1, 100) + array = np.random.random((n_samples, 2)) + + lla_object = LLA() + lla_object.from_array(array) + + met_object = Meteorology() + assert met_object.nof_observations == 0 + + met_object.location = lla_object + assert met_object.nof_observations == n_samples + + +@pytest.mark.parametrize( + "wind_speed, wind_direction, u_component, v_component", + [ + (2, 0, 0, -2), + (1, 45, -np.sqrt(0.5), -np.sqrt(0.5)), + (1, 90, -1, 0), + (1, 135, -np.sqrt(0.5), np.sqrt(0.5)), + (1, 180, 0, 1), + (1, 225, np.sqrt(0.5), np.sqrt(0.5)), + (1, 270, 1, 0), + (1, 315, np.sqrt(0.5), -np.sqrt(0.5)), + ], +) +def test_calculate_uv_from_wind_speed_direction(wind_speed, wind_direction, u_component, v_component): + """Basic test to check the calculation of u and v components from wind speed and direction. + + Args: + wind_speed (float): Example wind speed. + wind_direction (float): Example wind direction. + u_component (float): True u value. + v_component (float): True v value. + + """ + + met_object = Meteorology() + met_object.wind_speed = wind_speed + met_object.wind_direction = wind_direction + met_object.calculate_uv_from_wind_speed_direction() + + assert np.isclose(met_object.u_component, u_component) + assert np.isclose(met_object.v_component, v_component) + + +@pytest.mark.parametrize( + "wind_speed, wind_direction, u_component, v_component", + [ + (2, 0, 0, -2), + (1, 45, -np.sqrt(0.5), -np.sqrt(0.5)), + (1, 90, -1, 0), + (1, 135, -np.sqrt(0.5), np.sqrt(0.5)), + (1, 180, 0, 1), + (1, 225, np.sqrt(0.5), np.sqrt(0.5)), + (1, 270, 1, 0), + (1, 315, np.sqrt(0.5), -np.sqrt(0.5)), + ], +) +def test_consistency_of_functions(wind_speed, wind_direction, u_component, v_component): + """Basic test to check the consistency between the conversion functions of u and v components to/from wind speed and + direction. + + Args: + wind_speed (float): Example wind speed. + wind_direction (float): Example wind direction. + u_component (float): True u value. + v_component (float): True v value. + + """ + + met_object = Meteorology() + met_object.wind_speed = wind_speed + met_object.wind_direction = wind_direction + met_object.calculate_uv_from_wind_speed_direction() + met_object.calculate_wind_speed_from_uv() + met_object.calculate_wind_direction_from_uv() + + assert np.isclose(met_object.wind_speed, wind_speed) + assert np.isclose(met_object.wind_direction, wind_direction) + + met_object = Meteorology() + met_object.u_component = u_component + met_object.v_component = v_component + met_object.calculate_wind_speed_from_uv() + met_object.calculate_wind_direction_from_uv() + + met_object.calculate_uv_from_wind_speed_direction() + + assert np.isclose(met_object.u_component, u_component) + assert np.isclose(met_object.v_component, v_component) + + +def test_meteorology_group(): + """Basic function to test MeteorologyGroup functionality, seeing if we can add objects to the group and check if the + group returns the right number of objects as well as if the uv calculation works for a group object.""" + object_1 = Meteorology() + object_1.label = "One" + object_1.wind_speed = np.array([1, 1, 1, 1]) + object_1.wind_direction = np.array([0, 90, 180, 270]) + object_2 = Meteorology() + object_2.label = "Two" + object_2.wind_speed = np.array([1, 1, 1, 1]) + object_2.wind_direction = np.array([0, 90, 180, 270]) + + group_object = MeteorologyGroup() + group_object.add_object(object_1) + group_object.add_object(object_2) + assert group_object.nof_objects == 2 + group_object.calculate_uv_from_wind_speed_direction() + for _, temp_object in group_object.items(): + assert np.allclose(temp_object.u_component, np.array([0, -1, 0, 1])) + assert np.allclose(temp_object.v_component, np.array([-1, 0, 1, 0])) + + +def test_calculate_wind_turbulence_horizontal(): + """Checks that the wind turbulence values are calculated correctly. + + To verify circstd, we define winds as draws from a normal distribution. We then check that the mean of the + calculated turbulence values is within 3 standard deviations of the true value. + + """ + + met = Meteorology() + met.time = pd.arrays.DatetimeArray( + np.array([dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 1)]).astype("datetime64[ns]") + ) + met.wind_direction = np.linspace(0, 360, met.time.shape[0]) + + sigma = 3 + + met.time = pd.arrays.DatetimeArray(pd.date_range(dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 2), freq="5s")) + met.wind_direction = np.random.normal(180, sigma, met.time.shape[0]) + + met.calculate_wind_turbulence_horizontal(window="300s") + + tolerance = 3 * np.std(met.wind_turbulence_horizontal) + mean_turbulence = np.mean(met.wind_turbulence_horizontal) + + assert (mean_turbulence - tolerance) < sigma < (mean_turbulence + tolerance) diff --git a/tests/test_model.py b/tests/test_model.py new file mode 100644 index 0000000..e761802 --- /dev/null +++ b/tests/test_model.py @@ -0,0 +1,113 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for the main ELQModel class.""" + +from copy import deepcopy + +import pytest + +from pyelq.component.background import SpatioTemporalBackground +from pyelq.component.error_model import BySensor +from pyelq.component.offset import PerSensor +from pyelq.component.source_model import Normal, NormalSlabAndSpike +from pyelq.model import ELQModel + + +@pytest.fixture(name="model_default") +def fix_model_default(sensor_group, met_group, gas_species): + """Fix an instance of model based on the sensor, meteorology and gas species provided.""" + model = ELQModel(sensor_object=sensor_group, meteorology=met_group, gas_species=gas_species) + return model + + +@pytest.fixture(params=[None, SpatioTemporalBackground], ids=["none", "spt"], name="background_model") +def fix_background_model(request): + """Fix a particular type of background model.""" + background_model = request.param + if background_model is None: + return None + return background_model() + + +@pytest.fixture(params=[None, Normal, NormalSlabAndSpike], ids=["none", "normal", "normal-ssp"], name="source_model") +def fix_source_model(request): + """Fix a particular type of source model.""" + source_model = request.param + if source_model is None: + return None + return source_model() + + +@pytest.fixture(params=[None, PerSensor], ids=["none", "per-sns"], name="offset_model") +def fix_offset_model(request): + """Fix a particular type of offset model.""" + offset_model = request.param + if offset_model is None: + return None + return offset_model() + + +@pytest.fixture(params=[None, BySensor], ids=["none", "by-sns"], name="error_model") +def fix_error_model(request): + """Fix a particular type of error model. + + We make sure we don't pass None to the model, as this will raise a UserWarning, instead we set it to the default + BySensor model. + + """ + error_model = request.param + if error_model is None: + return BySensor() + return error_model() + + +@pytest.fixture(name="model") +def fix_model(sensor_group, met_group, gas_species, background_model, source_model, error_model, offset_model): + """Create the ELQModel object using the data/model specifications.""" + if background_model is not None: + background_model.update_precision = True + if offset_model is not None: + offset_model.update_precision = True + if source_model is not None: + source_model.update_precision = True + model = ELQModel( + sensor_object=sensor_group, + meteorology=met_group, + gas_species=gas_species, + background=background_model, + source_model=source_model, + error_model=error_model, + offset_model=offset_model, + ) + model.initialise() + return model + + +def test_default(model_default): + """Test whether the default ELQModel case will initialise (with default component settings).""" + model_default.initialise() + model_default.n_iter = 5 + model_default.to_mcmc() + model_default.run_mcmc() + model_default.from_mcmc() + + +def test_run_mcmc(model): + """Test running a small number of iterations of the MCMC.""" + model.n_iter = 5 + model.to_mcmc() + model.run_mcmc() + model.from_mcmc() + + +def test_mcmc_iterations(model): + """Run one iteration of the MCMC for each of the samplers on the model, and check that the variables stored in the + results dictionary are of the shape expected.""" + model.n_iter = 1 + model.to_mcmc() + original_state = deepcopy(model.mcmc.state) + model.run_mcmc() + for var in model.mcmc.state.keys(): + assert model.mcmc.state[var].shape == original_state[var].shape diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py new file mode 100644 index 0000000..17d9c80 --- /dev/null +++ b/tests/test_preprocessing.py @@ -0,0 +1,210 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for the pre-processing class.""" + +from copy import deepcopy +from datetime import datetime, timedelta + +import numpy as np +import pandas as pd +import pytest + +from pyelq.meteorology import Meteorology, MeteorologyGroup +from pyelq.preprocessing import Preprocessor + + +def get_time_lims(sensor_group): + """Extract the time limits from the sensor group.""" + min_time, max_time = datetime.now(), datetime.now() + for sns in sensor_group.values(): + min_time = np.minimum(min_time, np.min(sns.time)) + max_time = np.maximum(max_time, np.max(sns.time)) + return min_time, max_time + + +@pytest.fixture(name="time_bin_edges") +def fix_time_bin_edges(sensor_group): + """Fix the time bin edges to be used for aggregation.""" + min_time, max_time = get_time_lims(sensor_group=sensor_group) + min_time, max_time = min_time - timedelta(seconds=60), max_time + timedelta(seconds=60) + time_bin_edges = pd.arrays.DatetimeArray(pd.date_range(min_time, max_time, freq="120s")) + return time_bin_edges + + +@pytest.fixture(name="block_times") +def fix_block_times(sensor_group): + """Fix the time bin edges for re-blocking the processed data.""" + min_time, max_time = get_time_lims(sensor_group=sensor_group) + min_time, max_time = min_time - timedelta(hours=1), max_time + timedelta(hours=1) + block_times = pd.arrays.DatetimeArray(pd.date_range(min_time, max_time, freq="1200s")) + return block_times + + +def add_random_nans(data_object, fields, percent_nan): + """Take in a data object (Sensor or Meteorology) and add NaNs in random locations.""" + for field in fields: + idx_nans = np.random.choice( + np.arange(data_object.time.shape[0]), + int(np.floor(data_object.time.shape[0] * percent_nan / 100)), + replace=False, + ) + data_with_nan = getattr(data_object, field) + data_with_nan[idx_nans] = np.nan + setattr(data_object, field, data_with_nan) + return data_object + + +@pytest.fixture(name="sensor_mod", params=[False, True], ids=["no_nans", "sns_nans"]) +def fix_sensor_mod(request, sensor_group): + """Generate versions of the supplied sensor object that do/don't have NaNs.""" + with_nans = request.param + if with_nans: + for sns in sensor_group.values(): + sns = add_random_nans(sns, ["concentration"], percent_nan=5.0) + return sensor_group + + +@pytest.fixture(name="meteorology", params=[False, True], ids=["no_nans", "met_nans"]) +def fix_meteorology(request, sensor_group): + """Fix a meteorology object for the preprocessing test. + + Sets up the wind direction to be between 358 and 2 degrees, so that we can check that the binning in the + preprocessing can recover values in this range. + + """ + with_nans = request.param + min_time, max_time = get_time_lims(sensor_group=sensor_group) + meteorology = Meteorology() + meteorology.time = pd.arrays.DatetimeArray(pd.date_range(min_time, max_time, freq="1s")) + meteorology.wind_speed = 1.9 + 0.2 * np.random.random_sample(size=meteorology.time.shape) + meteorology.wind_direction = np.mod(358.0 + 4.0 * np.random.random_sample(size=meteorology.time.shape), 360) + meteorology.wind_turbulence_horizontal = 10.0 * np.ones(shape=meteorology.time.shape) + meteorology.wind_turbulence_vertical = 10.0 * np.ones(shape=meteorology.time.shape) + meteorology.temperature = 293.0 * np.ones(shape=meteorology.time.shape) + meteorology.pressure = 101.0 * np.ones(shape=meteorology.time.shape) + if with_nans: + meteorology = add_random_nans( + meteorology, + [ + "wind_speed", + "wind_direction", + "wind_turbulence_horizontal", + "wind_turbulence_vertical", + "temperature", + "pressure", + ], + percent_nan=5.0, + ) + return meteorology + + +def check_field_values(data_object, field_list): + """Helper function to check whether all the listed fields on a given object are not NaN or Inf. + + Args: + data_object (Union[SensorGroup, MeteorologyGroup]): data object on which to check the fields. + field_list (list): list of fields to check. + + """ + for data in data_object.values(): + for field in field_list: + if (field != "time") and (getattr(data, field) is not None): + assert np.all(np.logical_not(np.isnan(getattr(data, field)))) + assert np.all(np.logical_not(np.isinf(getattr(data, field)))) + + +def test_initialize(sensor_mod, meteorology, time_bin_edges): + """Test that the preprocessing class initialises successfully. + + Using the wrapper construction to test both a single Meteorology input object as well as a MeteorologyGroup. + + """ + wrapper_initialise(sensor_mod, meteorology, time_bin_edges) + met_group = MeteorologyGroup() + for key in sensor_mod.keys(): + temp_object = deepcopy(meteorology) + temp_object.label = key + met_group.add_object(temp_object) + wrapper_initialise(sensor_mod, met_group, time_bin_edges) + + +def wrapper_initialise(sensor_mod_input, meteorology_input, time_bin_edges_input): + """Tests that the preprocessing class initialises successfully, and that the attached attributes have the correct + properties. + + Checks that: + - the time bin edges are correctly stored on the Preprocessor object. + - the same time stamps are assigned to the processed meteorology and sensor objects. + - the wind directions are between 358 and 2 degrees after averaging. + - the wind speeds are all between 1.9 and 2.1 m/s after averaging. + - there are no NaNs or Infs in the fields of the processed object. + + """ + preprocess = Preprocessor( + time_bin_edges=time_bin_edges_input, sensor_object=sensor_mod_input, met_object=meteorology_input + ) + + assert np.allclose( + np.array(preprocess.time_bin_edges.to_numpy() - time_bin_edges_input.to_numpy(), dtype=float), + np.zeros(preprocess.time_bin_edges.shape), + ) + + for sns, met in zip(preprocess.sensor_object.values(), preprocess.met_object.values()): + assert np.allclose(np.array(sns.time - met.time, dtype=float), np.zeros(sns.time.shape)) + + for met in preprocess.met_object.values(): + assert np.all( + np.logical_or( + np.logical_and(met.wind_direction >= 358.0, met.wind_direction <= 360.0), + np.logical_and(met.wind_direction >= 0.0, met.wind_direction <= 2.0), + ) + ) + assert np.all(np.logical_and(met.wind_speed >= 1.9, met.wind_speed <= 2.1)) + + check_field_values(data_object=preprocess.sensor_object, field_list=preprocess.sensor_fields) + check_field_values(data_object=preprocess.met_object, field_list=preprocess.met_fields) + + preprocess_limit_high = deepcopy(preprocess) + preprocess_limit_low = deepcopy(preprocess) + limit = 2.0 + preprocess_limit_low.filter_on_met(filter_variable=["wind_speed"], lower_limit=[limit]) + preprocess_limit_high.filter_on_met(filter_variable=["wind_speed"], upper_limit=[limit]) + + for met in preprocess_limit_high.met_object.values(): + assert np.all(met.wind_speed <= limit) + + for met in preprocess_limit_low.met_object.values(): + assert np.all(met.wind_speed >= limit) + + +def test_block_data(sensor_mod, meteorology, time_bin_edges, block_times): + """Test that the data blocking functionality returns expected results. + + Checks that: + - the field values after blocking do not contain any NaNs or Infs. + _ that empty SensorGroup and MeteorologyGroup objects are returned in the list elements for any time blocks + which lie entirely outside the time range of the data. + + """ + preprocess = Preprocessor(time_bin_edges=time_bin_edges, sensor_object=sensor_mod, met_object=meteorology) + + with pytest.raises(TypeError): + preprocess.block_data(block_times, data_object="bad_argument") + + sensor_list = preprocess.block_data(block_times, preprocess.sensor_object) + met_list = preprocess.block_data(block_times, preprocess.met_object) + + for sns in sensor_list: + check_field_values(data_object=sns, field_list=preprocess.sensor_fields) + for met in met_list: + check_field_values(data_object=met, field_list=preprocess.met_fields) + + min_time, max_time = get_time_lims(sensor_mod) + for k in range(len(block_times) - 1): + if ((block_times[k] < min_time) and (block_times[k + 1] < min_time)) or ( + (block_times[k] > max_time) and (block_times[k + 1] > max_time) + ): + assert not list(sensor_list[k].keys()) + assert not list(met_list[k].keys()) diff --git a/tests/test_source_map.py b/tests/test_source_map.py new file mode 100644 index 0000000..e59c624 --- /dev/null +++ b/tests/test_source_map.py @@ -0,0 +1,136 @@ +# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# -*- coding: utf-8 -*- +"""Test module for source map class. + +This module provides tests for the source map class in pyELQ. + +""" +import numpy as np +import pytest + +from pyelq import coordinate_system +from pyelq.coordinate_system import ENU, LLA +from pyelq.sensor.sensor import Sensor +from pyelq.source_map import SourceMap + + +def test_n_sources(): + """Test if nof_sources property works as expected.""" + n_samples = np.random.randint(1, 100) + array = np.random.random((n_samples, 2)) + + lla_object = LLA() + lla_object.from_array(array) + + source_object = SourceMap() + assert source_object.nof_sources == 0 + + source_object.location = lla_object + assert source_object.nof_sources == n_samples + + +@pytest.mark.parametrize("sourcemap_type", ["central", "hypercube", "grid", "grid_sphere", "error"]) +@pytest.mark.parametrize("dim", [2, 3]) +def test_generate_sources(sourcemap_type, dim): + """Test the generate_sources method. + + Checks if the not implement error gets raised. Checks for correct number of sources generated. Checks if all + sources are within the specified limits + + Args: + sourcemap_type (str): Type of source map to generate (1 or 2) + dim (int): Dimension of each source location (2 or 3) + + """ + source_object = SourceMap() + enu_object = ENU(ref_latitude=0, ref_longitude=0, ref_altitude=0) + sourcemap_limits = np.array([[-100, 100], [-100, 100], [-100, 100]]) + sourcemap_limits = sourcemap_limits[:dim, :] + + if sourcemap_type in ["central", "hypercube", "grid", "grid_sphere"]: + if sourcemap_type in ["central", "hypercube"]: + random_integer = np.random.randint(1, 100) + source_object.generate_sources( + coordinate_object=enu_object, + sourcemap_limits=sourcemap_limits, + sourcemap_type=sourcemap_type, + nof_sources=random_integer, + ) + if sourcemap_type == "central": + assert source_object.nof_sources == 1 + else: + assert source_object.nof_sources == random_integer + elif sourcemap_type in ["grid", "grid_sphere"]: + random_shape = np.random.randint(1, 100, size=dim) + source_object.generate_sources( + coordinate_object=enu_object, + sourcemap_limits=sourcemap_limits, + sourcemap_type=sourcemap_type, + grid_shape=random_shape, + ) + assert source_object.nof_sources == random_shape.prod() + + array_object = source_object.location.to_array() + for idx in range(dim): + assert np.all(array_object[:, idx] >= sourcemap_limits[idx, 0]) + assert np.all(array_object[:, idx] <= sourcemap_limits[idx, 1]) + else: + with pytest.raises(NotImplementedError): + source_object.generate_sources( + coordinate_object=enu_object, sourcemap_limits=sourcemap_limits, sourcemap_type=sourcemap_type + ) + + +@pytest.mark.parametrize("source_coordinate_system", ["LLA", "ENU", "ECEF"]) +@pytest.mark.parametrize("sensor_coordinate_system", ["LLA", "ENU", "ECEF"]) +def test_calculate_inclusion_idx(source_coordinate_system, sensor_coordinate_system): + """Test the calculate_inclusion_idx method. + + Defines a source map of 2 sources source far apart and observations close to one of those sources and far away, + checks if inclusion idx are correct and inclusion_n_obs are correct. + + Calculate the source and sensor locations first in an ENU system, then convert to the desired coordinate system and + then do the actual calculation. + + """ + source_object = SourceMap() + if source_coordinate_system == "ENU": + coordinate_object = getattr(coordinate_system, source_coordinate_system)( + ref_latitude=0, ref_longitude=0, ref_altitude=0 + ) + else: + coordinate_object = getattr(coordinate_system, source_coordinate_system)() + enu_coordinate = ENU(ref_latitude=0, ref_longitude=0, ref_altitude=0) + enu_coordinate.from_array(array=np.array([[0, 0, 0], [80, 80, 80]])) + source_object.location = enu_coordinate.to_object_type(coordinate_object) + + sensor_object = Sensor() + if sensor_coordinate_system == "ENU": + coordinate_object = getattr(coordinate_system, sensor_coordinate_system)( + ref_latitude=0, ref_longitude=0, ref_altitude=0 + ) + else: + coordinate_object = getattr(coordinate_system, sensor_coordinate_system)() + + points_inside = np.random.randint(1, 100) + inside_idx = list(range(points_inside)) + + points_outside = np.random.randint(1, 100) + outside_idx = list(range(points_outside)) + outside_idx = [value + points_inside for value in outside_idx] + + inside_locations = np.random.normal(0, 0.001, (points_inside, 3)) + outside_locations = np.random.normal(80, 0.001, (points_outside, 3)) + array = np.concatenate((inside_locations, outside_locations), axis=0) + + enu_coordinate = ENU(ref_latitude=0, ref_longitude=0, ref_altitude=0) + enu_coordinate.from_array(array) + sensor_object.location = enu_coordinate.to_object_type(coordinate_object) + source_object.calculate_inclusion_idx(sensor_object=sensor_object, inclusion_radius=100) + + assert np.all(source_object.inclusion_n_obs == np.array([points_inside, points_outside])) + assert np.all(source_object.inclusion_idx[0] == inside_idx) + assert np.all(source_object.inclusion_idx[1] == outside_idx)