From b457ca06b6f59f40bf5fca64ce09ccc66e78a719 Mon Sep 17 00:00:00 2001 From: Damar Wicaksono Date: Fri, 15 Dec 2023 10:55:29 +0100 Subject: [PATCH] Add support for the exponential univariate dist. The exponential distribution is now supported as a `UnivDist` instance. The test suite and the docs are accordingly updated. This commit should resolve Issue #266. --- CHANGELOG.md | 1 + docs/_toc.yml | 2 + docs/prob-input/available.md | 23 +- .../univariate-distributions/exponential.md | 116 +++++++++ .../univariate_distributions/exponential.py | 224 ++++++++++++++++++ .../univariate_distributions/normal.py | 2 +- src/uqtestfuns/core/prob_input/utils.py | 2 + tests/conftest.py | 3 + .../prob_input/test_univariate_exponential.py | 108 +++++++++ .../core/prob_input/test_univariate_input.py | 3 + 10 files changed, 472 insertions(+), 12 deletions(-) create mode 100644 docs/prob-input/univariate-distributions/exponential.md create mode 100644 src/uqtestfuns/core/prob_input/univariate_distributions/exponential.py create mode 100644 tests/core/prob_input/test_univariate_exponential.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c608680..57c0f7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - The three-dimensional simple portfolio model from Saltelli et al. (2004). - The 20-dimensional polynomial test function from Alemazkoor and Meidani (2018). +- The exponential distribution as a distribution of `UnivDist` instances. ## [0.4.1] - 2023-10-27 diff --git a/docs/_toc.yml b/docs/_toc.yml index beaeb3b..ccd53e8 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -136,6 +136,8 @@ parts: sections: - file: prob-input/univariate-distributions/beta title: Beta + - file: prob-input/univariate-distributions/exponential + title: Exponential - file: prob-input/univariate-distributions/gumbel title: Gumbel (max.) - file: prob-input/univariate-distributions/logitnormal diff --git a/docs/prob-input/available.md b/docs/prob-input/available.md index 2a88f5b..ebe5a2f 100644 --- a/docs/prob-input/available.md +++ b/docs/prob-input/available.md @@ -5,14 +5,15 @@ The table below lists all the available univariate distribution types used to construct ``UnivDist`` instances. ``UnivDist`` are used to represent the univariate marginals of a (possibly, multivariate) probabilistic input model. -| Name | Keyword | Notation | Support | Number of parameters | -|:-------------------------------------------------------------------------------------:|:----------------:|:-------------------------------------------------:|:--------------------------------:|:--------------------:| -| {ref}`Beta ` | `"beta"` | $\mathrm{Beta}(\alpha, \beta, a, b)$ | $[a, b], \; a, b \in \mathbb{R}$ | 4 | -| {ref}`Gumbel (max.) ` | `"gumbel"` | $\mathrm{Gumbel}(\mu, \beta)$ | $(-\infty, \infty)$ | 2 | -| {ref}`Logit-Normal ` | `"logitnormal"` | $\mathcal{N}_{\mathrm{logit}}(\mu, \sigma)$ | $(0, 1)$ | 2 | -| {ref}`Log-Normal ` | `"lognormal"` | $\mathcal{N}_{\mathrm{log}} (\mu, \sigma)$ | $(0, \infty)$ | 2 | -| {ref}`Normal (Gaussian) ` | `"normal"` | $\mathcal{N}(\mu, \sigma)$ | $(-\infty, \infty)$ | 2 | -| {ref}`Triangular ` | `"triangular"` | $\mathcal{T}_r(a, b, c)$ | $[a, b], \; a, b \in \mathbb{R}$ | 3 | -| {ref}`Truncated Gumbel (max.) ` | `"trunc-gumbel"` | $\mathrm{Gumbel}_{\mathrm{Tr}}(\mu, \beta, a, b)$ | $[a, b], \; a, b \in \mathbb{R}$ | 4 | -| {ref}`Truncated Normal (Gaussian) ` | `"trunc-normal"` | $\mathcal{N}_{\mathrm{Tr}}(\mu, \sigma, a, b)$ | $[a, b], \; a, b \in \mathbb{R}$ | 4 | -| {ref}`Uniform ` | `"uniform"` | $\mathcal{U}(a, b)$ | $[a, b], \; a, b \in \mathbb{R}$ | 2 | +| Name | Keyword value for `distribution` | Notation | Support | Number of parameters | +|:-------------------------------------------------------------------------------------:|:--------------------------------:|:-------------------------------------------------:|:--------------------------------:|:--------------------:| +| {ref}`Beta ` | `"beta"` | $\mathrm{Beta}(\alpha, \beta, a, b)$ | $[a, b], \; a, b \in \mathbb{R}$ | 4 | +| {ref}`Exponential ` | `"exponential"` | $\mathcal{E}(\lambda)$ | $[0, \infty)$ | 1 | +| {ref}`Gumbel (max.) ` | `"gumbel"` | $\mathrm{Gumbel}(\mu, \beta)$ | $(-\infty, \infty)$ | 2 | +| {ref}`Logit-Normal ` | `"logitnormal"` | $\mathcal{N}_{\mathrm{logit}}(\mu, \sigma)$ | $(0, 1)$ | 2 | +| {ref}`Log-Normal ` | `"lognormal"` | $\mathcal{N}_{\mathrm{log}} (\mu, \sigma)$ | $(0, \infty)$ | 2 | +| {ref}`Normal (Gaussian) ` | `"normal"` | $\mathcal{N}(\mu, \sigma)$ | $(-\infty, \infty)$ | 2 | +| {ref}`Triangular ` | `"triangular"` | $\mathcal{T}_r(a, b, c)$ | $[a, b], \; a, b \in \mathbb{R}$ | 3 | +| {ref}`Truncated Gumbel (max.) ` | `"trunc-gumbel"` | $\mathrm{Gumbel}_{\mathrm{Tr}}(\mu, \beta, a, b)$ | $[a, b], \; a, b \in \mathbb{R}$ | 4 | +| {ref}`Truncated Normal (Gaussian) ` | `"trunc-normal"` | $\mathcal{N}_{\mathrm{Tr}}(\mu, \sigma, a, b)$ | $[a, b], \; a, b \in \mathbb{R}$ | 4 | +| {ref}`Uniform ` | `"uniform"` | $\mathcal{U}(a, b)$ | $[a, b], \; a, b \in \mathbb{R}$ | 2 | diff --git a/docs/prob-input/univariate-distributions/exponential.md b/docs/prob-input/univariate-distributions/exponential.md new file mode 100644 index 0000000..b4dd306 --- /dev/null +++ b/docs/prob-input/univariate-distributions/exponential.md @@ -0,0 +1,116 @@ +--- +jupytext: + formats: ipynb,md:myst + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.14.1 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +```{code-cell} ipython3 +:tags: [remove-cell] + +import uqtestfuns as uqtf +import matplotlib.pyplot as plt +import numpy as np +``` + +(prob-input:univariate-distributions:exponential)= +# Exponential Distribution + +The exponential distribution is a single-parameter continuous probability +distribution. + +The table below summarizes some important aspects of the distribution. + +| | | +|---------------------:|-------------------------------------------------------------| +| **Notation** | $X \sim \mathcal{E}(\lambda)$ | +| **Parameters** | $\lambda \in \mathbb{R}_{>0}$ (rate parameter) | +| **{term}`Support`** | $\mathcal{D}_X = [0.0, \infty)$ | +| **{term}`PDF`** | $f_X (x; \lambda) = \lambda e^{-\lambda x}$ | +| **{term}`CDF`** | $F_X (x; \lambda) = 1 - e^{-\lambda x}$ | +| **{term}`ICDF`** | $F^{-1}_X (x; \lambda) = - \frac{1}{\lambda} \ln{(1 - x)}$ | + +The plots of probability density functions (PDFs), +sample histogram (of $5'000$ points), +cumulative distribution functions (CDFs), +and inverse cumulative distribution functions (ICDFs) for different parameter +values are shown below. + +```{code-cell} ipython3 +:tags: [remove-input] + +import numpy as np +import matplotlib.pyplot as plt +import uqtestfuns as uqtf + +parameters = [[2.0], [1.5], [1.0], [0.5]] +colors = ["#a6cee3", "#1f78b4", "#b2df8a", "#33a02c"] + +univ_dists = [] +for parameter in parameters: + univ_dists.append(uqtf.UnivDist(distribution="exponential", parameters=parameter)) + +fig, axs = plt.subplots(2, 2, figsize=(10,10)) + +# --- PDF +xx = np.linspace(0, 5, 1000) +for i, univ_dist in enumerate(univ_dists): + axs[0, 0].plot( + xx, + univ_dist.pdf(xx), + color=colors[i], + label=f"$\\lambda = {univ_dist.parameters[0]}$", + linewidth=2, + ) +axs[0, 0].legend(); +axs[0, 0].grid(); +axs[0, 0].set_title("PDF"); + +# --- Sample histogram +sample_size = 5000 +np.random.seed(42) +for col, univ_dist in zip(reversed(colors), reversed(univ_dists)): + axs[0, 1].hist( + univ_dist.get_sample(sample_size), + color=col, + bins="auto", + alpha=0.75 + ) +axs[0, 1].grid(); +axs[0, 1].set_xlim([0, 5]); +axs[0, 1].set_title("Sample histogram"); + +# --- CDF +xx = np.linspace(0, 5, 1000) +for i, univ_dist in enumerate(univ_dists): + axs[1, 0].plot( + xx, + univ_dist.cdf(xx), + color=colors[i], + linewidth=2, + ) +axs[1, 0].grid(); +axs[1, 0].set_title("CDF"); + +# --- Inverse CDF +xx = np.linspace(0, 1, 1000) +for i, univ_dist in enumerate(univ_dists): + axs[1, 1].plot( + xx, + univ_dist.icdf(xx), + color=colors[i], + linewidth=2 + ) +axs[1, 1].grid(); +axs[1, 1].set_ylim([0, 6]); +axs[1, 1].set_title("Inverse CDF"); + +plt.gcf().set_dpi(150) +``` diff --git a/src/uqtestfuns/core/prob_input/univariate_distributions/exponential.py b/src/uqtestfuns/core/prob_input/univariate_distributions/exponential.py new file mode 100644 index 0000000..0e329eb --- /dev/null +++ b/src/uqtestfuns/core/prob_input/univariate_distributions/exponential.py @@ -0,0 +1,224 @@ +""" +Module with routines involving the exponential probability distribution. + +The exponential distribution in UQTestFuns is parametrized by a single +parameter: the rate parametere. + +The underlying implementation is based on the implementation from scipy.stats. +In the SciPy convention, the rate parameter corresponds to the reciprocal +of the ``scale`` parameter. +""" +import numpy as np +from scipy.stats import expon + +from .utils import verify_param_nums, postprocess_icdf +from ....global_settings import ARRAY_FLOAT + +DISTRIBUTION_NAME = "exponential" + +NUM_PARAMS = 1 + + +def verify_parameters(parameters: ARRAY_FLOAT) -> None: + """Verify the parameters of an exponential distribution. + + Parameters + ---------- + parameters : ARRAY_FLOAT + The parameter of an exponential distribution + (i.e., the rate parameter). + + Returns + ------ + None + The function exits without any return value when nothing is wrong. + + Raises + ------ + ValueError + If any of the parameter values are invalid + or the shapes are inconsistent. + """ + # Verify overall shape + verify_param_nums(parameters.size, NUM_PARAMS, DISTRIBUTION_NAME) + + if parameters[0] <= 0.0: + raise ValueError( + f"The corresponding rate parameter {parameters[0]}" + f"must be larger than 0.0!" + ) + + +def lower(parameters: ARRAY_FLOAT) -> float: + """Get the lower bound of an exponential distribution. + + Parameters + ---------- + parameters : ARRAY_FLOAT + The parameter of an exponential distribution. + + Returns + ------- + float + The lower bound of the exponential distribution. + + Notes + ----- + - The parameters are not used in determining the lower bound of + the distribution; it must, however, appear for interface consistency. + The lower bound of a lognormal distribution is finite; it is 0.0. + """ + lower_bound = 0.0 + + return lower_bound + + +def upper(parameters: ARRAY_FLOAT) -> float: + """Get the upper bound of an exponential distribution. + + Parameters + ---------- + parameters : ARRAY_FLOAT + The parameters of an exponential distribution + + Returns + ------- + float + The upper bound of the exponential distribution. + + Notes + ----- + - Strictly speaking, an exponential distribution is unbounded on the right. + However, for numerical reason an upper bound is set. + - The upper bound of the exponential distribution is chosen such that + tbe probability mass between the lower and upper bound is at least + 1 - 1e-15. + """ + # 36.7368005696771 is the quantile values with probability of 1-1e-16 + # for the exponential distribution with rate parameter value 1.0 + upper_bound = float(36.7368005696771 / parameters[0]) + + return upper_bound + + +def pdf( + xx: ARRAY_FLOAT, + parameters: ARRAY_FLOAT, + lower_bound: float, + upper_bound: float, +) -> ARRAY_FLOAT: + """Get the PDF values of an exponential distribution. + + Parameters + ---------- + xx : ARRAY_FLOAT + Sample values (realizations) of an exponential distribution. + parameters : ARRAY_FLOAT + Parameters of the exponential distribution. + lower_bound : float + Lower bound of the exponential distribution. + upper_bound : float + Upper bound of the exponential distribution. + + Returns + ------- + np.ndarray + PDF values of the exponential distribution on the sample values. + + Notes + ----- + - The PDF for sample with values outside the bounds are set to 0.0. + """ + yy = np.zeros(xx.shape) + idx = np.logical_and(xx >= lower_bound, xx <= upper_bound) + rate = parameters[0] + scale = 1 / rate + yy[idx] = expon.pdf(xx[idx], scale=scale) + + return yy + + +def cdf( + xx: ARRAY_FLOAT, + parameters: ARRAY_FLOAT, + lower_bound: float, + upper_bound: float, +) -> ARRAY_FLOAT: + """Get the CDF values of an exponential distribution. + + Parameters + ---------- + xx : ARRAY_FLOAT + Sample values (realizations) of an exponential distribution. + parameters : ARRAY_FLOAT + Parameters of the exponential distribution. + lower_bound : float + Lower bound of the exponential distribution. + upper_bound : float + Upper bound of the exponential distribution. + + Returns + ------- + ARRAY_FLOAT + CDF values of the exponential distribution on the sample values. + + Notes + ----- + - The CDF for sample with values smaller (resp. larger) + than the lower bound (resp. upper bound) are set to 0.0 (resp. 1.0). + """ + yy = np.empty(xx.shape) + idx_lower = xx < lower_bound + idx_upper = xx > upper_bound + idx_rest = np.logical_and( + np.logical_not(idx_lower), np.logical_not(idx_upper) + ) + + yy[idx_lower] = 0.0 + yy[idx_upper] = 1.0 + rate = parameters[0] + scale = 1 / rate + yy[idx_rest] = expon.cdf(xx[idx_rest], scale=scale) + + return yy + + +def icdf( + xx: ARRAY_FLOAT, + parameters: ARRAY_FLOAT, + lower_bound: float, + upper_bound: float, +) -> ARRAY_FLOAT: + """Get the inverse CDF values of an exponential distribution. + + Parameters + ---------- + xx : ARRAY_FLOAT + Sample values (realizations) in the [0, 1] domain. + parameters : ARRAY_FLOAT + Parameters of an exponential distribution. + lower_bound : float + Lower bound of the exponential distribution. + upper_bound : float + Upper bound of the exponential distribution. + + Returns + ------- + ARRAY_FLOAT + Transformed values in the domain of the normal distribution. + + Notes + ----- + - ICDF for sample values outside [0.0, 1.0] is set to NaN according to + the underlying SciPy implementation. + """ + + # Compute the ICDF + rate = parameters[0] + scale = 1 / rate + yy = expon.ppf(xx, scale=scale) + + # Check if values are within the set bounds + yy = postprocess_icdf(yy, lower_bound, upper_bound) + + return yy diff --git a/src/uqtestfuns/core/prob_input/univariate_distributions/normal.py b/src/uqtestfuns/core/prob_input/univariate_distributions/normal.py index 6ed7c06..7dfcd71 100644 --- a/src/uqtestfuns/core/prob_input/univariate_distributions/normal.py +++ b/src/uqtestfuns/core/prob_input/univariate_distributions/normal.py @@ -68,7 +68,7 @@ def lower(parameters: ARRAY_FLOAT) -> float: However, for numerical reason a lower bound is set. - The lower bound of the normal distribution is chosen such that the probability mass between the lower and upper bound is at least - 1 - 1e.15. + 1 - 1e-15. """ # -8.222082216130435 is the quantile values with probability of 1e-16 # for the standard Normal distribution (mu = 0.0, sigma = 1.0) diff --git a/src/uqtestfuns/core/prob_input/utils.py b/src/uqtestfuns/core/prob_input/utils.py index 524ab53..9360d3c 100644 --- a/src/uqtestfuns/core/prob_input/utils.py +++ b/src/uqtestfuns/core/prob_input/utils.py @@ -7,6 +7,7 @@ from .univariate_distributions import ( beta, + exponential, normal, gumbel, lognormal, @@ -20,6 +21,7 @@ SUPPORTED_MARGINALS = { beta.DISTRIBUTION_NAME: beta, + exponential.DISTRIBUTION_NAME: exponential, lognormal.DISTRIBUTION_NAME: lognormal, normal.DISTRIBUTION_NAME: normal, gumbel.DISTRIBUTION_NAME: gumbel, diff --git a/tests/conftest.py b/tests/conftest.py index 12dff3c..8933ec7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -55,6 +55,9 @@ def create_random_marginals(length: int) -> List[UnivDist]: distribution = random.choice(MARGINALS) if distribution == "beta": parameters = np.sort(1 + 2 * np.random.rand(4)) + elif distribution == "exponential": + # Single parameter, must be strictly positive + parameters = 1 + np.random.rand(1) elif distribution == "triangular": parameters = np.sort(1 + 2 * np.random.rand(2)) parameters = np.insert( diff --git a/tests/core/prob_input/test_univariate_exponential.py b/tests/core/prob_input/test_univariate_exponential.py new file mode 100644 index 0000000..508f9e9 --- /dev/null +++ b/tests/core/prob_input/test_univariate_exponential.py @@ -0,0 +1,108 @@ +""" +Test module specifically for UnivDist instances with the exponential dist. +""" +import pytest +import numpy as np + +from uqtestfuns.core.prob_input.univariate_distribution import UnivDist +from conftest import create_random_alphanumeric + +DISTRIBUTION = "exponential" + + +def test_wrong_number_of_parameters() -> None: + """Test the failure of specifying invalid parameter values.""" + name = create_random_alphanumeric(5) + # The distribution expects exactly 1 parameter! + num_params = np.random.randint(2, 10) + parameters = np.random.rand(num_params) + + with pytest.raises(ValueError): + UnivDist(name=name, distribution=DISTRIBUTION, parameters=parameters) + + +def test_failed_parameter_verification() -> None: + name = create_random_alphanumeric(10) + # The parameter of an exponential distribution must be stricly positive! + parameters = [-5] + + with pytest.raises(ValueError): + UnivDist(name=name, distribution=DISTRIBUTION, parameters=parameters) + + +def test_get_pdf_values() -> None: + """Test the PDF values from an instance of UnivDist.""" + + name = create_random_alphanumeric(5) + parameters = np.sort(np.random.rand(1)) + + my_univariate_input = UnivDist( + name=name, distribution=DISTRIBUTION, parameters=parameters + ) + + sample_size = 1000 + xx = my_univariate_input.get_sample(sample_size) + + # Assertions (PDF values are max. at the left bound and min. at the right) + assert np.all( + np.logical_and( + my_univariate_input.pdf(xx) + <= my_univariate_input.pdf(my_univariate_input.lower), + my_univariate_input.pdf(xx) + >= my_univariate_input.pdf(my_univariate_input.upper), + ) + ) + + +def test_mean() -> None: + """Test the mean values from an instance of UnivDist.""" + name = create_random_alphanumeric(5) + parameters = np.sort(np.random.rand(1)) + + my_univariate_input = UnivDist( + name=name, distribution=DISTRIBUTION, parameters=parameters + ) + + sample_size = 10000000 + xx = my_univariate_input.get_sample(sample_size) + + mean = np.mean(xx) + mean_ref = 1 / parameters[0] + + assert np.isclose(mean, mean_ref, rtol=1e-02, atol=1e-02) + + +def test_variance() -> None: + """Test the variance values from an instance of UnivDist.""" + name = create_random_alphanumeric(5) + parameters = np.sort(np.random.rand(1)) + + my_univariate_input = UnivDist( + name=name, distribution=DISTRIBUTION, parameters=parameters + ) + + sample_size = 10000000 + xx = my_univariate_input.get_sample(sample_size) + + var = np.var(xx) + var_ref = 1 / (parameters[0] ** 2) + + assert np.isclose(var, var_ref, rtol=1e-02, atol=1e-02) + + +def test_median() -> None: + """Test the median values from an instance of UnivDist.""" + name = create_random_alphanumeric(5) + parameters = np.sort(np.random.rand(1)) + + my_univariate_input = UnivDist( + name=name, distribution=DISTRIBUTION, parameters=parameters + ) + + sample_size = 10000000 + xx = my_univariate_input.get_sample(sample_size) + + median = np.median(xx) + median_ref = np.log(2) / parameters[0] + + assert np.isclose(median, median_ref, rtol=1e-02, atol=1e-02) diff --git a/tests/core/prob_input/test_univariate_input.py b/tests/core/prob_input/test_univariate_input.py index 345b929..3f99ecb 100644 --- a/tests/core/prob_input/test_univariate_input.py +++ b/tests/core/prob_input/test_univariate_input.py @@ -26,6 +26,9 @@ def univariate_input( parameters = np.sort(np.random.rand(2)) elif request.param == "beta": parameters = np.sort(np.random.rand(4)) + elif distribution == "exponential": + # Single parameter, must be strictly positive + parameters = 1 + np.random.rand(1) elif distribution == "triangular": parameters = np.sort(1 + 2 * np.random.rand(2)) # Append the mid point