-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SDK][Agreement] Add agreement package to sdk. (#850)
* add agreement package to sdk. * add agreement as optional extra. * take micro average for percentage calculation instead of averaging over row percentages.
- Loading branch information
1 parent
5f03c4a
commit 9283ae8
Showing
12 changed files
with
596 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 2 additions & 0 deletions
2
packages/sdk/python/human-protocol-sdk/human_protocol_sdk/agreement/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .measures import percent_agreement, cohens_kappa, fleiss_kappa | ||
from .bootstrap import bootstrap_ci |
110 changes: 110 additions & 0 deletions
110
packages/sdk/python/human-protocol-sdk/human_protocol_sdk/agreement/bootstrap.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import numpy as np | ||
from typing import Sequence, Callable, Optional | ||
from warnings import warn | ||
|
||
from human_protocol_sdk.agreement.utils import NormalDistribution | ||
|
||
|
||
def bootstrap_ci( | ||
data: Sequence, | ||
statistic_fn: Callable, | ||
n_iterations: int = 1000, | ||
n_sample: Optional[int] = None, | ||
ci=0.95, | ||
algorithm="bca", | ||
) -> tuple: | ||
"""Returns the confidence interval for the boostrap estimate of the given | ||
statistic. | ||
Args: | ||
data: Data to estimate the statistic. | ||
statistic_fn: Function to calculate the statistic. `f(data)` must | ||
return the statistic. | ||
n_iterations: Number of bootstrap samples to use for the estimate. | ||
n_sample: If provided, determines the size of each bootstrap sample | ||
drawn from the data. If omitted, is equal to the length of the | ||
data. | ||
ci: Size of the confidence interval. | ||
algorithm: Which algorithm to use for the confidence interval | ||
estimation. "bca" uses the "Bias Corrected Bootstrap with | ||
Acceleration", "percentile" simply takes the appropriate | ||
percentiles from the bootstrap distribution. | ||
""" | ||
data = np.asarray(data) | ||
|
||
if n_iterations < 1: | ||
raise ValueError( | ||
f"n_iterations must be a positive integer, but were {n_iterations}" | ||
) | ||
|
||
n_data = len(data) | ||
if n_data < 30: | ||
warn( | ||
"Dataset size is low, bootstrap estimate might be inaccurate. For accurate results, make sure to provide at least 30 data points." | ||
) | ||
|
||
if n_sample is None: | ||
n_sample = n_data | ||
elif n_sample < 1: | ||
raise ValueError(f"n_sample must be a positive integer, but was {n_sample}") | ||
|
||
if not (0.0 <= ci <= 1.0): | ||
raise ValueError(f"ci must be a float within [0.0, 1.0], but was {ci}") | ||
|
||
# bootstrap estimates | ||
theta_b = np.empty(n_iterations, dtype=float) | ||
for i in range(n_iterations): | ||
idx = np.random.randint(n_data - 1, size=(n_sample,)) | ||
sample = data[idx] | ||
theta_b[i] = statistic_fn(sample) | ||
|
||
match algorithm: | ||
case "percentile": | ||
alpha = 1.0 - ci | ||
alpha /= 2.0 | ||
q = np.asarray([alpha, 1.0 - alpha]) | ||
case "bca": | ||
# acceleration: estimate a from jackknife bootstrap | ||
theta_hat = statistic_fn(data) | ||
jn_idxs = ~np.eye(n_data, dtype=bool) | ||
theta_jn = np.empty(n_data, dtype=float) | ||
for i in range(n_data): | ||
theta_jn[i] = (n_data - 1) * ( | ||
theta_hat - statistic_fn(data[jn_idxs[i]]) | ||
) | ||
|
||
a = (np.sum(theta_jn**3) / np.sum(theta_jn**2, axis=-1) ** 1.5) / 6 | ||
|
||
alpha = 1.0 - ci | ||
alpha /= 2 | ||
q = np.asarray([alpha, 1.0 - alpha]) | ||
|
||
# bias correction | ||
N = NormalDistribution() | ||
ppf = np.vectorize(N.ppf) | ||
cdf = np.vectorize(N.cdf) | ||
|
||
# bias term. discrepancy between bootrap values and estimated value | ||
z_0 = ppf(np.mean(theta_b < theta_hat)) | ||
z_u = ppf(q) | ||
z_diff = z_0 + z_u | ||
|
||
q = cdf(z_0 + (z_diff / (1 - a * z_diff))) | ||
case _: | ||
raise ValueError(f"Algorithm '{algorithm}' is not available!") | ||
|
||
# sanity checks | ||
if np.any(np.isnan(q)): | ||
warn( | ||
f"q contains NaN values. Input data is probably invalid. Interval will be (nan, nan). data: {data}" | ||
) | ||
ci_low = ci_high = np.nan | ||
else: | ||
if np.any((q < 0.0) | (q > 1.0)): | ||
warn( | ||
f"q ({q}) out of bounds. Input data is probably invalid. q will be clipped into interval [0.0, 1.0]. data: {data}" | ||
) | ||
q = np.clip(q, 0.0, 1.0) | ||
ci_low, ci_high = np.percentile(theta_b, q * 100) | ||
|
||
return (ci_low, ci_high), theta_b |
89 changes: 89 additions & 0 deletions
89
packages/sdk/python/human-protocol-sdk/human_protocol_sdk/agreement/measures.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import numpy as np | ||
|
||
from .validations import ( | ||
validate_incidence_matrix, | ||
validate_confusion_matrix, | ||
) | ||
|
||
|
||
def percent_agreement( | ||
data: np.ndarray, data_format="im", invalid_return=np.nan | ||
) -> float: | ||
""" | ||
Returns the overall agreement percentage observed across the data. | ||
Args: | ||
data: Annotation data. | ||
data_format: The format of data. Options are 'im' for an incidence | ||
matrix and 'cm' for a confusion matrix. Defaults to 'im'. | ||
invalid_return: value to return if result is np.nan. Defaults to np.nan. | ||
""" | ||
data = np.asarray(data) | ||
|
||
match data_format: | ||
case "cm": | ||
validate_confusion_matrix(data) | ||
percent = np.diag(data).sum() / data.sum() | ||
case _: | ||
# implicitly assumes incidence matrix | ||
validate_incidence_matrix(data) | ||
|
||
n_raters = np.sum(data, 1) | ||
item_agreements = np.sum(data * data, 1) - n_raters | ||
max_item_agreements = n_raters * (n_raters - 1) | ||
percent = item_agreements.sum() / max_item_agreements.sum() | ||
|
||
if np.isnan(percent): | ||
percent = invalid_return | ||
|
||
return percent | ||
|
||
|
||
def cohens_kappa(data: np.ndarray, invalid_return=np.nan) -> float: | ||
""" | ||
Returns Cohen's Kappa for the provided annotations. | ||
Args: | ||
data: Annotation data, provided as K x K confusion matrix, with K = | ||
number of labels. | ||
invalid_return: value to return if result is np.nan. Defaults to np.nan. | ||
""" | ||
data = np.asarray(data) | ||
|
||
agreement_observed = percent_agreement(data, "cm") | ||
agreement_expected = np.matmul(data.sum(0), data.sum(1)) / data.sum() ** 2 | ||
|
||
kappa = (agreement_observed - agreement_expected) / (1 - agreement_expected) | ||
|
||
if np.isnan(kappa): | ||
kappa = invalid_return | ||
|
||
return kappa | ||
|
||
|
||
def fleiss_kappa(data: np.ndarray, invalid_return=np.nan) -> float: | ||
""" | ||
Returns Fleisss' Kappa for the provided annotations. | ||
Args: | ||
data: Annotation data, provided as I x K incidence matrix, with | ||
I = number of items and K = number of labels. | ||
invalid_return: value to return if result is np.nan. Defaults to np.nan. | ||
""" | ||
data = np.asarray(data) | ||
|
||
agreement_observed = percent_agreement(data, "im") | ||
|
||
class_probabilities = data.sum(0) / data.sum() | ||
agreement_expected = np.power(class_probabilities, 2).sum() | ||
|
||
# in case all votes have been for the same class return percentage | ||
if agreement_expected == agreement_observed == 1.0: | ||
return 1.0 | ||
|
||
kappa = (agreement_observed - agreement_expected) / (1 - agreement_expected) | ||
|
||
if np.isnan(kappa): | ||
kappa = invalid_return | ||
|
||
return kappa |
111 changes: 111 additions & 0 deletions
111
packages/sdk/python/human-protocol-sdk/human_protocol_sdk/agreement/utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import numpy as np | ||
from typing import Sequence, Optional | ||
|
||
from pyerf import erf, erfinv | ||
|
||
from .validations import ( | ||
validate_nd, | ||
validate_equal_shape, | ||
validate_same_dtype, | ||
) | ||
|
||
|
||
def confusion_matrix_from_sequence( | ||
a: Sequence, b: Sequence, labels: Optional[Sequence] = None | ||
): | ||
"""Generate an N X N confusion matrix from the given sequence of values | ||
a and b, where N is the number of unique labels. | ||
Args: | ||
a: A sequence of labels. | ||
b: Another sequence of labels. | ||
labels: The labels contained in the records. Must contain all labels in | ||
the given records and may contain labels that are not found in the | ||
records. | ||
""" | ||
a = np.asarray(a) | ||
b = np.asarray(b) | ||
|
||
validate_same_dtype(a, b) | ||
validate_nd(a, 1) | ||
validate_nd(b, 1) | ||
validate_equal_shape(a, b) | ||
|
||
# filter NaN values | ||
M = np.vstack((a, b)).T # 2 x N Matrix | ||
if M.dtype.kind in "UO": # string types | ||
mask = M != "nan" | ||
else: | ||
mask = ~np.isnan(M) | ||
a, b = M[np.all(mask, axis=1)].T | ||
|
||
# create list of unique labels | ||
if labels is None: | ||
labels = np.concatenate([a, b]) | ||
labels = np.unique(labels) | ||
|
||
# convert labels to indices | ||
label_to_id = {label: i for i, label in enumerate(labels)} | ||
map_fn = np.vectorize(lambda x: label_to_id[x]) | ||
a = map_fn(a) | ||
b = map_fn(b) | ||
|
||
# get indices and counts to populate confusion matrix | ||
confusion_matrix = np.zeros((labels.size, labels.size), dtype=int) | ||
ijs, counts = np.unique(np.vstack([a, b]), axis=1, return_counts=True) | ||
confusion_matrix[ijs[0], ijs[1]] = counts | ||
|
||
return confusion_matrix | ||
|
||
|
||
class NormalDistribution: | ||
"""Continuous Normal Distribution. | ||
See: https://en.wikipedia.org/wiki/Normal_distribution | ||
""" | ||
|
||
def __init__(self, location: float = 0.0, scale: float = 1.0): | ||
"""Creates a NormalDistribution from the given parameters. | ||
Args: | ||
location: Location of the distribution. | ||
scale: Scale of the distribution. Must be positive. | ||
""" | ||
if scale < 0.0: | ||
raise ValueError(f"scale parameter needs to be positive, but was {scale}") | ||
|
||
self.location = location | ||
self.scale = scale | ||
|
||
def cdf(self, x: float) -> float: | ||
"""Cumulative Distribution Function of the Normal Distribution. Returns | ||
the probability that a random sample will be less than the given | ||
point. | ||
Args: | ||
x: Point within the distribution's domain. | ||
""" | ||
return (1 + erf((x - self.location) / (self.scale * 2**0.5))) / 2 | ||
|
||
def pdf(self, x: float) -> float: | ||
"""Probability Density Function of the Normal Distribution. Returns the | ||
probability for observing the given sample in the distribution. | ||
Args: | ||
x: Point within the distribution's domain. | ||
""" | ||
return np.exp(-0.5 * (x - self.location / self.scale) ** 2) / ( | ||
self.scale * (2 * np.pi) ** 0.5 | ||
) | ||
|
||
def ppf(self, p: float) -> float: | ||
"""Probability Point function of the Normal Distribution. Returns | ||
the maximum point to which cumulated probabilities equal the given | ||
probability. Also called quantile. Inverse of the cdf. | ||
Args: | ||
p: Percentile of the distribution to be covered by the ppf. | ||
""" | ||
if not (0.0 <= p <= 1.0): | ||
raise ValueError(f"p must be a float within [0.0, 1.0], but was {p}") | ||
|
||
return self.location + self.scale * 2**0.5 * erfinv(2 * p - 1.0) |
67 changes: 67 additions & 0 deletions
67
packages/sdk/python/human-protocol-sdk/human_protocol_sdk/agreement/validations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import numpy as np | ||
|
||
|
||
def validate_nd(M: np.ndarray, n=2): | ||
"""Validates that M has n dimensions.""" | ||
if M.ndim != n: | ||
raise ValueError(f"Input must be a {n}-dimensional array-like.") | ||
|
||
|
||
def validate_dtype_is_subtype_of(M: np.ndarray, supertype: np.dtype): | ||
"""Validates the data type of M is a subtype of supertype.""" | ||
if not issubclass(M.dtype.type, supertype): | ||
raise ValueError( | ||
f"Input must have a data type that is a subtype of " f"{supertype}" | ||
) | ||
|
||
|
||
def validate_is_numeric(M: np.ndarray): | ||
"""Validates that the data type of M is a number type""" | ||
if ( | ||
M.dtype.kind not in np.typecodes["AllFloat"] | ||
and M.dtype.kind not in np.typecodes["AllInteger"] | ||
): | ||
raise ValueError("Input data type must be a numeric type.") | ||
|
||
|
||
def validate_all_positive(M: np.ndarray): | ||
""" | ||
Validates that all entries in M are positive (including 0). | ||
Raises a ValueError if not. | ||
""" | ||
if np.any(M < 0): | ||
raise ValueError("Inputs must all be positive") | ||
|
||
|
||
def validate_sufficient_annotations(M: np.ndarray, n=1): | ||
"""Validates that M contains enough annotations.""" | ||
if M.sum() <= n: | ||
raise ValueError(f"Input must have more than {1} annotation.") | ||
|
||
|
||
def validate_incidence_matrix(M: np.ndarray): | ||
"""Validates that M is an incidence matrix.""" | ||
validate_nd(M, n=2) | ||
validate_is_numeric(M) | ||
validate_all_positive(M) | ||
validate_sufficient_annotations(M, n=1) | ||
|
||
|
||
def validate_confusion_matrix(M): | ||
"""Validates that M is a confusion Matrix.""" | ||
validate_incidence_matrix(M) | ||
|
||
if M.shape[0] != M.shape[1]: | ||
raise ValueError("Input must be a square matrix.") | ||
|
||
|
||
def validate_equal_shape(a: np.ndarray, b: np.ndarray): | ||
"""Validates that a and b have the same shape.""" | ||
if a.shape != b.shape: | ||
raise ValueError("All inputs must have the same shape.") | ||
|
||
|
||
def validate_same_dtype(a: np.ndarray, b: np.ndarray): | ||
"""Validates that a and b share the same data type.""" | ||
if a.dtype.kind != b.dtype.kind: | ||
raise ValueError("All inputs must have the same kind of dtype.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.
9283ae8
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Successfully deployed to the following URLs:
job-launcher-server – ./packages/apps/job-launcher/server
job-launcher-server-humanprotocol.vercel.app
job-launcher-server-git-develop-humanprotocol.vercel.app
job-launcher-server-nine.vercel.app