Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: standardize variable names to X and Y for consistency #206

Merged
merged 2 commits into from
Sep 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion xeofs/models/_base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,6 @@ def load(
model = cls.deserialize(dt)
return model

def _validate_loaded_data(self, data: DataArray):
def _validate_loaded_data(self, X: DataArray):
"""Optionally check the loaded data for placeholders."""
pass
4 changes: 2 additions & 2 deletions xeofs/models/_base_model_cross_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,11 +343,11 @@ def transform(
data = self._transform_algorithm(X, Y, normalized=normalized)
data_list = []
if X is not None:
X = self.whitener1.inverse_transform_scores_unseen(data["data1"])
X = self.whitener1.inverse_transform_scores_unseen(data["X"])
X = self.preprocessor1.inverse_transform_scores_unseen(X)
data_list.append(X)
if Y is not None:
Y = self.whitener2.inverse_transform_scores_unseen(data["data2"])
Y = self.whitener2.inverse_transform_scores_unseen(data["Y"])
Y = self.preprocessor2.inverse_transform_scores_unseen(Y)
data_list.append(Y)

Expand Down
4 changes: 2 additions & 2 deletions xeofs/models/cpcca.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def _transform_algorithm(
scores1 = xr.dot(X, comps1)
if normalized:
scores1 = scores1 / norm1
results["data1"] = scores1
results["X"] = scores1

if Y is not None:
# Project data onto singular vectors
Expand All @@ -246,7 +246,7 @@ def _transform_algorithm(
scores2 = xr.dot(Y, comps2)
if normalized:
scores2 = scores2 / norm2
results["data2"] = scores2
results["Y"] = scores2

return results

Expand Down
40 changes: 20 additions & 20 deletions xeofs/models/eof.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class EOF(_BaseModelSingleSet):
Examples
--------
>>> model = xe.models.EOF(n_modes=5)
>>> model.fit(data)
>>> model.fit(X)
>>> scores = model.scores()

"""
Expand Down Expand Up @@ -83,32 +83,32 @@ def __init__(
)
self.attrs.update({"model": "EOF analysis"})

def _fit_algorithm(self, data: DataArray) -> Self:
def _fit_algorithm(self, X: DataArray) -> Self:
sample_name = self.sample_name
feature_name = self.feature_name

# Augment the data
data = self._augment_data(data)
X = self._augment_data(X)

# Compute the total variance
total_variance = compute_total_variance(data, dim=sample_name)
total_variance = compute_total_variance(X, dim=sample_name)

# Decompose the data
decomposer = Decomposer(**self._decomposer_kwargs)
decomposer.fit(data, dims=(sample_name, feature_name))
decomposer.fit(X, dims=(sample_name, feature_name))

singular_values = decomposer.s_
components = decomposer.V_
scores = decomposer.U_ * decomposer.s_
scores.name = "scores"

# Compute the explained variance per mode
n_samples = data.coords[self.sample_name].size
n_samples = X.coords[self.sample_name].size
exp_var = singular_values**2 / (n_samples - 1)
exp_var.name = "explained_variance"

# Store the results
self.data.add(data, "input_data", allow_compute=False)
self.data.add(X, "input_data", allow_compute=False)
self.data.add(components, "components")
self.data.add(scores, "scores")
self.data.add(singular_values, "norms")
Expand All @@ -118,16 +118,16 @@ def _fit_algorithm(self, data: DataArray) -> Self:
self.data.set_attrs(self.attrs)
return self

def _augment_data(self, data: DataArray) -> DataArray:
return data
def _augment_data(self, X: DataArray) -> DataArray:
return X

def _transform_algorithm(self, data: DataObject) -> DataArray:
def _transform_algorithm(self, X: DataObject) -> DataArray:
feature_name = self.preprocessor.feature_name

components = self.data["components"]

# Project the data
projections = xr.dot(data, components, dims=feature_name)
projections = xr.dot(X, components, dims=feature_name)
projections.name = "scores"

return projections
Expand Down Expand Up @@ -333,13 +333,13 @@ def __init__(
)
self.attrs.update({"model": "Complex EOF analysis"})

def _fit_algorithm(self, data: DataArray) -> Self:
if not np.iscomplexobj(data):
def _fit_algorithm(self, X: DataArray) -> Self:
if not np.iscomplexobj(X):
warnings.warn(
"Expected complex-valued data but found real-valued data. For Hilbert EOF analysis, use `HilbertEOF` model."
)

return super()._fit_algorithm(data)
return super()._fit_algorithm(X)

def components_amplitude(self) -> DataObject:
"""Return the amplitude of the (EOF) components.
Expand Down Expand Up @@ -496,7 +496,7 @@ class HilbertEOF(ComplexEOF):
Examples
--------
>>> model = HilbertEOF(n_modes=5, standardize=True)
>>> model.fit(data)
>>> model.fit(X)

"""

Expand Down Expand Up @@ -534,22 +534,22 @@ def __init__(
self.attrs.update({"model": "Hilbert EOF analysis"})
self._params.update({"padding": padding, "decay_factor": decay_factor})

def _augment_data(self, data: DataArray) -> DataArray:
def _augment_data(self, X: DataArray) -> DataArray:
# Apply hilbert transform:
padding = self._params["padding"]
decay_factor = self._params["decay_factor"]
return hilbert_transform(
data,
X,
dims=(self.sample_name, self.feature_name),
padding=padding,
decay_factor=decay_factor,
)

def _fit_algorithm(self, data: DataArray) -> Self:
EOF._fit_algorithm(self, data)
def _fit_algorithm(self, X: DataArray) -> Self:
EOF._fit_algorithm(self, X)
return self

def _transform_algorithm(self, data: DataArray) -> DataArray:
def _transform_algorithm(self, X: DataArray) -> DataArray:
raise NotImplementedError("Hilbert EOF does not support transform method.")

def _inverse_transform_algorithm(self, scores: DataArray) -> DataArray:
Expand Down
6 changes: 3 additions & 3 deletions xeofs/models/eof_rotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class EOFRotator(EOF):
Examples
--------
>>> model = xe.models.EOF(n_modes=10)
>>> model.fit(data)
>>> model.fit(X, "time")
>>> rotator = xe.models.EOFRotator(n_modes=10)
>>> rotator.fit(model)
>>> rotator.components()
Expand Down Expand Up @@ -222,7 +222,7 @@ def _sort_by_variance(self):
)
self.sorted = True

def _transform_algorithm(self, data: DataArray) -> DataArray:
def _transform_algorithm(self, X: DataArray) -> DataArray:
n_modes = self._params["n_modes"]

svals = self.model.singular_values().sel(mode=slice(1, self._params["n_modes"]))
Expand All @@ -231,7 +231,7 @@ def _transform_algorithm(self, data: DataArray) -> DataArray:
components = self.model.data["components"].sel(mode=slice(1, n_modes))

# Compute non-rotated scores by projecting the data onto non-rotated components
projections = xr.dot(data, components) / svals
projections = xr.dot(X, components) / svals
projections.name = "scores"

# Rotate the scores
Expand Down
12 changes: 6 additions & 6 deletions xeofs/models/opa.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class OPA(_BaseModelSingleSet):
--------
>>> from xeofs.models import OPA
>>> model = OPA(n_modes=10, tau_max=50, n_pca_modes=100)
>>> model.fit(data, dim=("time"))
>>> model.fit(X, dim=("time"))

Retrieve the optimally persistent patterns (OPP) and their time series:

Expand Down Expand Up @@ -127,8 +127,8 @@ def _compute_matrix_inverse(X, dims):
dask="allowed",
)

def _fit_algorithm(self, data: DataArray) -> Self:
assert_not_complex(data)
def _fit_algorithm(self, X: DataArray) -> Self:
assert_not_complex(X)

sample_name = self.sample_name
feature_name = self.feature_name
Expand All @@ -146,8 +146,8 @@ def _fit_algorithm(self, data: DataArray) -> Self:
check_nans=False,
solver_kwargs=self._params["solver_kwargs"],
)
pca.fit(data, dim=sample_name)
n_samples = data.coords[sample_name].size
pca.fit(X, dim=sample_name)
n_samples = X.coords[sample_name].size
comps = pca.data["components"] * np.sqrt(n_samples - 1)
# -> comps (feature x mode)
scores = pca.data["scores"] / np.sqrt(n_samples - 1)
Expand Down Expand Up @@ -270,7 +270,7 @@ def _fit_algorithm(self, data: DataArray) -> Self:
self._C0 = C0 # store C0 for testing purposes of orthogonality
return self

def _transform_algorithm(self, data: DataArray) -> DataArray:
def _transform_algorithm(self, X: DataArray) -> DataArray:
raise NotImplementedError("OPA does not (yet) support transform()")

def _inverse_transform_algorithm(self, scores) -> DataObject:
Expand Down
24 changes: 12 additions & 12 deletions xeofs/models/sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,24 +143,24 @@ def __init__(
}
)

def _fit_algorithm(self, data: DataArray) -> Self:
def _fit_algorithm(self, X: DataArray) -> Self:
sample_name = self.sample_name
feature_name = self.feature_name

# Check if the data is real
# NOTE: Complex data is not supported, it's likely possible but current numpy implementation
# of sparse_pca needs to be adpated, mainly changing matrix transpose to conjugate transpose.
# http://arxiv.org/abs/1804.00341
assert_not_complex(data)
assert_not_complex(X)

# Compute the total variance
total_variance = compute_total_variance(data, dim=sample_name)
total_variance = compute_total_variance(X, dim=sample_name)

# Compute matrix rank
rank = get_matrix_rank(data)
rank = get_matrix_rank(X)

# Decide whether to use exact or randomized algorithm
is_small_data = max(data.shape) < 500
is_small_data = max(X.shape) < 500
solver = self._params["solver"]

match solver:
Expand Down Expand Up @@ -209,7 +209,7 @@ def _fit_algorithm(self, data: DataArray) -> Self:
# exp_var : eigenvalues
components, components_normal, exp_var = xr.apply_ufunc(
decomposing_algorithm,
data,
X,
input_core_dims=[[sample_name, feature_name]],
output_core_dims=[[feature_name, "mode"], [feature_name, "mode"], ["mode"]],
dask="allowed",
Expand All @@ -223,21 +223,21 @@ def _fit_algorithm(self, data: DataArray) -> Self:
components.name = "sparse_weight_vectors"
components = components.assign_coords(
{
feature_name: data.coords[feature_name],
feature_name: X.coords[feature_name],
"mode": np.arange(1, self.n_modes + 1),
},
)

components_normal.name = "orthonormal_weight_vectors"
components_normal = components_normal.assign_coords(
{
feature_name: data.coords[feature_name],
feature_name: X.coords[feature_name],
"mode": np.arange(1, self.n_modes + 1),
},
)

# Transform the data
scores = xr.dot(data, components, dims=feature_name)
scores = xr.dot(X, components, dims=feature_name)
scores.name = "scores"

norms = xr.apply_ufunc(
Expand All @@ -253,7 +253,7 @@ def _fit_algorithm(self, data: DataArray) -> Self:
norms.name = "component_norms"

# Store the results
self.data.add(data, "input_data", allow_compute=False)
self.data.add(X, "input_data", allow_compute=False)
self.data.add(components, "components")
self.data.add(components_normal, "components_normal")
self.data.add(scores, "scores")
Expand All @@ -264,13 +264,13 @@ def _fit_algorithm(self, data: DataArray) -> Self:
self.data.set_attrs(self.attrs)
return self

def _transform_algorithm(self, data: DataObject) -> DataArray:
def _transform_algorithm(self, X: DataObject) -> DataArray:
feature_name = self.preprocessor.feature_name

components = self.data["components"]

# Project the data
projections = xr.dot(data, components, dims=feature_name)
projections = xr.dot(X, components, dims=feature_name)
projections.name = "scores"

return projections
Expand Down
Loading