Skip to content

Commit

Permalink
allow to compute chi2 values (#348)
Browse files Browse the repository at this point in the history
* allow to compute chi2 values

* add docs

* fix w602 errors

* fix docstring coverage

* fix codacy warnings
  • Loading branch information
yannikschaelte authored Feb 27, 2020
1 parent ad819e8 commit 1e39c5a
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 33 deletions.
44 changes: 44 additions & 0 deletions petab/calculate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Functions performing various calculations."""

import numpy as np
import pandas as pd
from functools import reduce
from typing import List, Union
Expand Down Expand Up @@ -185,3 +186,46 @@ def evaluate_noise_formula(
f"Cannot replace all parameters in noise formula {noise_value} "
f"for observable {observable_id}.")
return noise_value


def calculate_chi2(
measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame],
simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame],
observable_dfs: Union[List[pd.DataFrame], pd.DataFrame],
parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame],
normalize: bool = True,
scale: bool = True
) -> float:
"""Calculate the chi2 value.
Arguments:
measurement_dfs:
The problem measurement tables.
simulation_dfs:
Simulation tables corresponding to the measurement tables.
observable_dfs:
The problem observable tables.
parameter_dfs:
The problem parameter tables.
normalize:
Whether to normalize residuals by the noise standard deviation
terms.
scale:
Whether to calculate residuals of scaled values.
Returns:
chi2: The aggregated chi2 value.
"""
residual_dfs = calculate_residuals(
measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs,
normalize, scale)
chi2s = [calculate_chi2_for_table_from_residuals(df)
for df in residual_dfs]
chi2 = sum(chi2s)
return chi2


def calculate_chi2_for_table_from_residuals(
residual_df: pd.DataFrame) -> float:
"""Compute chi2 value for a single residual table."""
return (np.array(residual_df[RESIDUAL])**2).sum()
101 changes: 68 additions & 33 deletions tests/test_calculate.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
"""Tests related to petab.calculate."""

from petab import calculate_residuals
from petab import calculate_residuals, calculate_chi2
from petab.C import *
import pandas as pd
import numpy as np
import pytest


def test_calculate_residuals():
"""Test calculate.calculate_residuals."""
def model_simple():
"Simple model."""
measurement_df = pd.DataFrame(data={
OBSERVABLE_ID: ['obs_a', 'obs_a', 'obs_b', 'obs_b'],
SIMULATION_CONDITION_ID: ['c0', 'c1', 'c0', 'c1'],
Expand All @@ -31,23 +31,15 @@ def test_calculate_residuals():
columns={MEASUREMENT: SIMULATION})
simulation_df[SIMULATION] = [2, 2, 19, 20]

residual_dfs = calculate_residuals(
measurement_df, simulation_df, observable_df, parameter_df)

assert set(residual_dfs[0][RESIDUAL]) == pytest.approx(
{(2-0)/2, (2-1)/2, (19-20)/3, (20-22)/3})
expected_residuals = {(2-0)/2, (2-1)/2, (19-20)/3, (20-22)/3}
expected_residuals_nonorm = {2-0, 2-1, 19-20, 20-22}

# don't apply normalization
residual_dfs = calculate_residuals(
measurement_df, simulation_df, observable_df,
parameter_df, normalize=False)
return (measurement_df, observable_df, parameter_df,
simulation_df, expected_residuals, expected_residuals_nonorm)

assert set(residual_dfs[0][RESIDUAL]) == pytest.approx(
{(2-0), (2-1), (19-20), (20-22)})


def test_calculate_residuals_replicates():
"""Test calculate.calculate_residuals with replicates."""
def model_replicates():
"""Model with replicates."""
measurement_df = pd.DataFrame(data={
OBSERVABLE_ID: ['obs_a', 'obs_a'],
SIMULATION_CONDITION_ID: ['c0', 'c0'],
Expand All @@ -70,14 +62,15 @@ def test_calculate_residuals_replicates():
columns={MEASUREMENT: SIMULATION})
simulation_df[SIMULATION] = [2, 2]

residual_dfs = calculate_residuals(
measurement_df, simulation_df, observable_df, parameter_df)
expected_residuals = {(2-0)/2, (2-1)/2}
expected_residuals_nonorm = {2-0, 2-1}

assert set(residual_dfs[0][RESIDUAL]) == pytest.approx({(2-0)/2, (2-1)/2})
return (measurement_df, observable_df, parameter_df,
simulation_df, expected_residuals, expected_residuals_nonorm)


def test_calculate_residuals_scaling():
"""Test calculate.calculate_residuals with scaling."""
def model_scalings():
"""Model with scalings."""
measurement_df = pd.DataFrame(data={
OBSERVABLE_ID: ['obs_a', 'obs_a'],
SIMULATION_CONDITION_ID: ['c0', 'c0'],
Expand All @@ -101,16 +94,15 @@ def test_calculate_residuals_scaling():
columns={MEASUREMENT: SIMULATION})
simulation_df[SIMULATION] = [2, 3]

residual_dfs = calculate_residuals(
measurement_df, simulation_df, observable_df, parameter_df)
expected_residuals = {(np.log(2)-np.log(0.5))/2, (np.log(3)-np.log(1))/2}
expected_residuals_nonorm = {np.log(2)-np.log(0.5), np.log(3)-np.log(1)}

assert set(residual_dfs[0][RESIDUAL]) == pytest.approx(
{(np.log(2)-np.log(0.5))/2, (np.log(3)-np.log(1))/2})
return (measurement_df, observable_df, parameter_df,
simulation_df, expected_residuals, expected_residuals_nonorm)


def test_calculate_residuals_non_numeric_overrides():
"""Test calculate.calculate_residuals with non-numeric noise formula
overrides."""
def model_non_numeric_overrides():
"""Model with non-numeric overrides."""
measurement_df = pd.DataFrame(data={
OBSERVABLE_ID: ['obs_a', 'obs_a'],
SIMULATION_CONDITION_ID: ['c0', 'c0'],
Expand All @@ -136,8 +128,51 @@ def test_calculate_residuals_non_numeric_overrides():
columns={MEASUREMENT: SIMULATION})
simulation_df[SIMULATION] = [2, 3]

residual_dfs = calculate_residuals(
measurement_df, simulation_df, observable_df, parameter_df)
expected_residuals = {(np.log(2)-np.log(0.5))/(2*7+8+4),
(np.log(3)-np.log(1))/(2*2+3+4)}
expected_residuals_nonorm = {np.log(2)-np.log(0.5), np.log(3)-np.log(1)}

return (measurement_df, observable_df, parameter_df,
simulation_df, expected_residuals, expected_residuals_nonorm)

assert set(residual_dfs[0][RESIDUAL]) == pytest.approx(
{(np.log(2)-np.log(0.5))/(2*7+8+4), (np.log(3)-np.log(1))/(2*2+3+4)})

@pytest.fixture
def models():
"""Test model collection covering different features."""
return [model_simple(), model_replicates(),
model_scalings(), model_non_numeric_overrides()]


def test_calculate_residuals(models): # pylint: disable=W0621
"""Test calculate.calculate_residuals."""
for model in models:
(measurement_df, observable_df, parameter_df, simulation_df,
expected_residuals, _) = model
residual_dfs = calculate_residuals(
measurement_df, simulation_df, observable_df, parameter_df)
assert set(residual_dfs[0][RESIDUAL]) == pytest.approx(
expected_residuals)


def test_calculate_non_normalized_residuals(models): # pylint: disable=W0621
"""Test calculate.calculate_residuals without normalization."""
for model in models:
(measurement_df, observable_df, parameter_df, simulation_df,
_, expected_residuals_nonorm) = model
residual_dfs = calculate_residuals(
measurement_df, simulation_df, observable_df, parameter_df,
normalize=False)
assert set(residual_dfs[0][RESIDUAL]) == pytest.approx(
expected_residuals_nonorm)


def test_calculate_chi2(models): # pylint: disable=W0621
"""Test calculate.calculate_chi2."""
for model in models:
(measurement_df, observable_df, parameter_df, simulation_df,
expected_residuals, _) = model
chi2 = calculate_chi2(
measurement_df, simulation_df, observable_df, parameter_df)

expected = sum(np.array(list(expected_residuals))**2)
assert chi2 == pytest.approx(expected)

0 comments on commit 1e39c5a

Please sign in to comment.