From 1ad370e7f8ea093b70dec36aeb191f91cd7af9cc Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Sun, 7 Jul 2024 20:05:24 -0300 Subject: [PATCH 1/4] Gallery: Add Censored and Truncated --- docs/examples/censored_distribution.md | 97 +++++++++++++++++++++++++ docs/examples/truncated_distribution.md | 96 ++++++++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 docs/examples/censored_distribution.md create mode 100644 docs/examples/truncated_distribution.md diff --git a/docs/examples/censored_distribution.md b/docs/examples/censored_distribution.md new file mode 100644 index 00000000..8d3805f6 --- /dev/null +++ b/docs/examples/censored_distribution.md @@ -0,0 +1,97 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst +kernelspec: + display_name: Python 3 + language: python + name: python3 +--- +# Censored Distribution + +This is not a distribution per se, but a modifier of univariate distributions. + +A censored distribution arises when the observed data is limited to a certain range, and values outside this range are not recorded. For instance, in a study aiming to measure the impact of a drug on mortality rates it may be known that an individual's age at death is at least 75 years (but may be more). Such a situation could occur if the individual withdrew from the study at age 75, or if the individual is currently alive at the age of 75. Censoring can also happen when a value falls outside the range of a measuring instrument. For example, if a bathroom scale only measures up to 140 kg, and a 160-kg person is weighed, the observer would only know that the individual's weight is at least 140 kg. + + +## Probability Density Function (PDF): + +```{code-cell} +--- +tags: [remove-input] +mystnb: + image: + alt: Censored Distribution PDF +--- + +import arviz as az +from preliz import Normal, Censored +az.style.use('arviz-doc') +Censored(Normal(0, 1), -1, 1).plot_pdf(support=(-4, 4)) +Normal(0, 1).plot_pdf(alpha=0.5) +``` + +## Cumulative Distribution Function (CDF): + +```{code-cell} +--- +tags: [remove-input] +mystnb: + image: + alt: Censored Distribution CDF +--- + +Censored(Normal(0, 1), -1, 1).plot_cdf(support=(-4, 4)) +Normal(0, 1).plot_cdf(alpha=0.5) +``` + + +## Key properties and parameters: + + +**Probability Density Function (PDF):** + +Given a base distribution with cumulative distribution function (CDF) and probability density mass/function (PDF). The pdf of a Censored distribution is: + +$$ +\begin{cases} + 0 & \text{for } x < \text{lower}, \\ + \text{CDF}(lower) & \text{for } x = \text{lower}, \\ + \text{PDF}(x) & \text{for } \text{lower} < x < \text{upper}, \\ + 1-\text{CDF}(upper) & \text {for } x = \text{upper}, \\ + 0 & \text{for } x > \text{upper}, +\end{cases} +$$ + +where `lower` and `upper` are the lower and upper bounds of the censored distribution, respectively. + +**Cumulative Distribution Function (CDF):** + +The given expression can be written mathematically as: + + +$$ +\begin{cases} + 0 & \text{for } x < \text{lower}, \\ + \text{CDF}(x) & \text{for } \text{lower} < x < \text{upper}, \\ + 1 & \text{for } x > \text{upper}, +\end{cases} +$$ + +where `lower` and `upper` are the lower and upper bounds of the censored distribution, respectively. + + +```{seealso} +:class: seealso + + +**Related Distributions:** + +- [Truncated](truncated_distribution.md) - In a truncated distribution, values outside the range are set to the nearest bound, while in a censored distribution, they are not recorded. + +``` + +## References + +- Wikipedia - [Censored distribution](https://en.wikipedia.org/wiki/Censoring_(statistics)) diff --git a/docs/examples/truncated_distribution.md b/docs/examples/truncated_distribution.md new file mode 100644 index 00000000..99354e3c --- /dev/null +++ b/docs/examples/truncated_distribution.md @@ -0,0 +1,96 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst +kernelspec: + display_name: Python 3 + language: python + name: python3 +--- +# Truncated Distribution + +This is not a distribution per se, but a modifier of univariate distributions. + +Truncated distributions arise in cases where the ability to record, or even to know about, occurrences is limited to values which lie above or below a given threshold or within a specified range. For example, if the dates of birth of children in a school are examined, these would typically be subject to truncation relative to those of all children in the area given that the school accepts only children in a given age range on a specific date. There would be no information about how many children in the locality had dates of birth before or after the school's cutoff dates if only a direct approach to the school were used to obtain information. + +## Probability Density Function (PDF): + +```{code-cell} +--- +tags: [remove-input] +mystnb: + image: + alt: Truncated Distribution PDF +--- + +import arviz as az +from preliz import Gamma, Truncated +az.style.use('arviz-doc') +Truncated(Gamma(mu=2, sigma=1), 1, 4.5).plot_pdf() +Gamma(mu=2, sigma=1).plot_pdf() +``` + +## Cumulative Distribution Function (CDF): + +```{code-cell} +--- +tags: [remove-input] +mystnb: + image: + alt: Trucated Distribution CDF +--- + +Truncated(Gamma(mu=2, sigma=1), 1, 4.5).plot_cdf() +Gamma(mu=2, sigma=1).plot_cdf() +``` + + +## Key properties and parameters: + + +**Probability Density Function (PDF):** + +Given a base distribution with cumulative distribution function (CDF) and probability density mass/function (PDF). The pdf of a Truncated distribution is: + +$$ +\begin{cases} + 0 & \text{for } x < \text{lower}, \\ + \frac{\text{PDF}(x, dist)}{\text{CDF}(upper, dist) - \text{CDF}(lower, dist)} + & \text{for } \text{lower} <= x <= \text{upper}, \\ + 0 & \text{for } x > \text{upper}, +\end{cases} +$$ + +where `lower` and `upper` are the lower and upper bounds of the truncated distribution, respectively. + +**Cumulative Distribution Function (CDF):** + +The given expression can be written mathematically as: + + +$$ +\begin{cases} +0 & \text{if } x_i < \text{lower} \\ +1 & \text{if } x_i > \text{upper} \\ +\frac{\text{CDF}(x_i) - \text{CDF}(\text{lower})}{\text{CDF}(\text{upper}) - \text{CDF}(\text{lower})} & \text{if } \text{lower} \leq x_i \leq \text{upper} +\end{cases} +$$ + +where `lower` and `upper` are the lower and upper bounds of the truncated distribution, respectively. + + +```{seealso} +:class: seealso + + +**Related Distributions:** + +- [Censored](censored_distribution.md) - In a censored distribution, values outside the range are not recorded, while in a truncated distribution, they are set to the nearest bound. +- [TruncatedNormal](truncated_normal_distribution.md) - A truncated normal distribution is a normal distribution that has been restricted to a specific range. + +``` + +## References + +- Wikipedia - [Truncated distribution](https://en.wikipedia.org/wiki/Truncated_distribution) From 4cb1fa3c1325f6675403a57cf3ca2002187768a5 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Mon, 8 Jul 2024 09:29:24 -0300 Subject: [PATCH 2/4] minor fixes --- docs/examples/censored_distribution.md | 6 +++--- docs/examples/truncated_distribution.md | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/examples/censored_distribution.md b/docs/examples/censored_distribution.md index 8d3805f6..68bd6ca2 100644 --- a/docs/examples/censored_distribution.md +++ b/docs/examples/censored_distribution.md @@ -43,7 +43,7 @@ mystnb: --- Censored(Normal(0, 1), -1, 1).plot_cdf(support=(-4, 4)) -Normal(0, 1).plot_cdf(alpha=0.5) +Normal(0, 1).plot_cdf(alpha=0.5); ``` @@ -57,9 +57,9 @@ Given a base distribution with cumulative distribution function (CDF) and probab $$ \begin{cases} 0 & \text{for } x < \text{lower}, \\ - \text{CDF}(lower) & \text{for } x = \text{lower}, \\ + \text{CDF}(\text{lower}) & \text{for } x = \text{lower}, \\ \text{PDF}(x) & \text{for } \text{lower} < x < \text{upper}, \\ - 1-\text{CDF}(upper) & \text {for } x = \text{upper}, \\ + 1-\text{CDF}(\text{upper}) & \text {for } x = \text{upper}, \\ 0 & \text{for } x > \text{upper}, \end{cases} $$ diff --git a/docs/examples/truncated_distribution.md b/docs/examples/truncated_distribution.md index 99354e3c..4e87a892 100644 --- a/docs/examples/truncated_distribution.md +++ b/docs/examples/truncated_distribution.md @@ -42,7 +42,7 @@ mystnb: --- Truncated(Gamma(mu=2, sigma=1), 1, 4.5).plot_cdf() -Gamma(mu=2, sigma=1).plot_cdf() +Gamma(mu=2, sigma=1).plot_cdf(); ``` @@ -56,7 +56,7 @@ Given a base distribution with cumulative distribution function (CDF) and probab $$ \begin{cases} 0 & \text{for } x < \text{lower}, \\ - \frac{\text{PDF}(x, dist)}{\text{CDF}(upper, dist) - \text{CDF}(lower, dist)} + \frac{\text{PDF}(x)}{\text{CDF}(upper) - \text{CDF}(lower)} & \text{for } \text{lower} <= x <= \text{upper}, \\ 0 & \text{for } x > \text{upper}, \end{cases} From a1a06816bf7785ceffb091a767854bd825ebfb94 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Mon, 8 Jul 2024 09:33:00 -0300 Subject: [PATCH 3/4] minor style fix docstring --- preliz/distributions/censored.py | 10 +++++----- preliz/distributions/truncated.py | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/preliz/distributions/censored.py b/preliz/distributions/censored.py index ab4f1522..176c362c 100644 --- a/preliz/distributions/censored.py +++ b/preliz/distributions/censored.py @@ -19,11 +19,11 @@ class Censored(DistributionTransformer): .. math:: \begin{cases} - 0 & \text{for } x < lower, \\ - \text{CDF}(lower) & \text{for } x = lower, \\ - \text{PDF}(x) & \text{for } lower < x < upper, \\ - 1-\text{CDF}(upper) & \text {for} x = upper, \\ - 0 & \text{for } x > upper, + 0 & \text{for } x < \text{lower}, \\ + \text{CDF}(\text{lower}) & \text{for } x = \text{lower}, \\ + \text{PDF}(x) & \text{for } \text{lower} < x < \text{upper}, \\ + 1-\text{CDF}(\text{upper}) & \text {for } x = \text{upper}, \\ + 0 & \text{for } x > \text{upper}, \end{cases} .. plot:: diff --git a/preliz/distributions/truncated.py b/preliz/distributions/truncated.py index d61c38b6..e2fdd8d8 100644 --- a/preliz/distributions/truncated.py +++ b/preliz/distributions/truncated.py @@ -17,10 +17,10 @@ class Truncated(DistributionTransformer): .. math:: \begin{cases} - 0 & \text{for } x < lower, \\ - \frac{\text{PDF}(x, dist)}{\text{CDF}(upper, dist) - \text{CDF}(lower, dist)} - & \text{for } lower <= x <= upper, \\ - 0 & \text{for } x > upper, + 0 & \text{for } x < \text{lower}, \\ + \frac{\text{PDF}(x)}{\text{CDF}(upper) - \text{CDF}(lower)} + & \text{for } \text{lower} <= x <= \text{upper}, \\ + 0 & \text{for } x > \text{upper}, \end{cases} .. plot:: From d499c48a93ed4c9dc4f943bda0f2244ee86475b3 Mon Sep 17 00:00:00 2001 From: Osvaldo A Martin Date: Mon, 8 Jul 2024 14:41:53 -0300 Subject: [PATCH 4/4] Apply suggestions from code review --- docs/examples/censored_distribution.md | 2 +- docs/examples/truncated_distribution.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/examples/censored_distribution.md b/docs/examples/censored_distribution.md index 68bd6ca2..37fda683 100644 --- a/docs/examples/censored_distribution.md +++ b/docs/examples/censored_distribution.md @@ -29,7 +29,7 @@ import arviz as az from preliz import Normal, Censored az.style.use('arviz-doc') Censored(Normal(0, 1), -1, 1).plot_pdf(support=(-4, 4)) -Normal(0, 1).plot_pdf(alpha=0.5) +Normal(0, 1).plot_pdf(alpha=0.5); ``` ## Cumulative Distribution Function (CDF): diff --git a/docs/examples/truncated_distribution.md b/docs/examples/truncated_distribution.md index 4e87a892..dffb34c9 100644 --- a/docs/examples/truncated_distribution.md +++ b/docs/examples/truncated_distribution.md @@ -28,7 +28,7 @@ import arviz as az from preliz import Gamma, Truncated az.style.use('arviz-doc') Truncated(Gamma(mu=2, sigma=1), 1, 4.5).plot_pdf() -Gamma(mu=2, sigma=1).plot_pdf() +Gamma(mu=2, sigma=1).plot_pdf(); ``` ## Cumulative Distribution Function (CDF):