From 02e13b229f9ef2e3818f0284bd829ef1148dd722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind=20Lind-Johansen?= Date: Thu, 19 May 2022 10:46:16 +0200 Subject: [PATCH] Moved get_matching_vector_names function to new utils module --- webviz_subsurface/_providers/__init__.py | 1 + .../ensemble_summary_provider.py | 19 ------------- .../ensemble_summary_provider/utils.py | 27 +++++++++++++++++++ .../_parameter_response_correlation.py | 9 ++++--- 4 files changed, 33 insertions(+), 23 deletions(-) create mode 100644 webviz_subsurface/_providers/ensemble_summary_provider/utils.py diff --git a/webviz_subsurface/_providers/__init__.py b/webviz_subsurface/_providers/__init__.py index a5ae863593..5a1ba9b2de 100644 --- a/webviz_subsurface/_providers/__init__.py +++ b/webviz_subsurface/_providers/__init__.py @@ -13,6 +13,7 @@ from .ensemble_summary_provider.ensemble_summary_provider_factory import ( EnsembleSummaryProviderFactory, ) +from .ensemble_summary_provider.utils import get_matching_vector_names from .ensemble_surface_provider import ( EnsembleSurfaceProvider, EnsembleSurfaceProviderFactory, diff --git a/webviz_subsurface/_providers/ensemble_summary_provider/ensemble_summary_provider.py b/webviz_subsurface/_providers/ensemble_summary_provider/ensemble_summary_provider.py index 88abdcac06..4022e62ec1 100644 --- a/webviz_subsurface/_providers/ensemble_summary_provider/ensemble_summary_provider.py +++ b/webviz_subsurface/_providers/ensemble_summary_provider/ensemble_summary_provider.py @@ -1,7 +1,5 @@ import abc import datetime -import fnmatch -import re from dataclasses import dataclass from enum import Enum from typing import List, Optional, Sequence @@ -118,20 +116,3 @@ def get_vectors_for_date_df( The returned DataFrame will always contain a 'REAL' column in addition to columns for all the requested vectors. """ - - def get_matching_vector_names(self, column_keys: List[str]) -> List[str]: - """Returns a list of vectors that match the input columns_keys that - can have unix shell wildcards. - - This function is almost the same as filter_vectorlist_on_column_keys in - parameter_analysis/models/ensemble_timeseries_datamodel.py and should be - generalized somewhere. - """ - try: - regex = re.compile( - "|".join([fnmatch.translate(col) for col in column_keys]), - flags=re.IGNORECASE, - ) - return [vec for vec in self.vector_names() if regex.fullmatch(vec)] - except re.error: - return [] diff --git a/webviz_subsurface/_providers/ensemble_summary_provider/utils.py b/webviz_subsurface/_providers/ensemble_summary_provider/utils.py new file mode 100644 index 0000000000..084b4ea8bf --- /dev/null +++ b/webviz_subsurface/_providers/ensemble_summary_provider/utils.py @@ -0,0 +1,27 @@ +import fnmatch +import re +from typing import List + +from .ensemble_summary_provider import EnsembleSummaryProvider + + +def get_matching_vector_names( + provider: EnsembleSummaryProvider, column_keys: List[str] +) -> List[str]: + """Returns a list of vectors that match the input columns_keys that + can have unix shell wildcards. + + Example of use: + column_keys = ["FOPT", "WGOR*"] + matching_vector_names = get_matching_vector_names(provider, column_keys) + df = provider.get_vectors_df(matching_vector_names, None) + + """ + try: + regex = re.compile( + "|".join([fnmatch.translate(col) for col in column_keys]), + flags=re.IGNORECASE, + ) + return [vec for vec in provider.vector_names() if regex.fullmatch(vec)] + except re.error: + return [] diff --git a/webviz_subsurface/plugins/_parameter_response_correlation.py b/webviz_subsurface/plugins/_parameter_response_correlation.py index 0232515589..006e30dfd8 100644 --- a/webviz_subsurface/plugins/_parameter_response_correlation.py +++ b/webviz_subsurface/plugins/_parameter_response_correlation.py @@ -20,6 +20,7 @@ EnsembleTableProviderFactory, EnsembleTableProviderSet, Frequency, + get_matching_vector_names, ) @@ -675,7 +676,7 @@ def read_csv(csv_file) -> pd.DataFrame: def create_df_from_table_provider(provider: EnsembleTableProviderSet) -> pd.DataFrame: - """This function is the same as in parameter analysis and could be generalized.""" + """Aggregates parameters from all ensemble into a common dataframe.""" dfs = [] for ens in provider.ensemble_names(): df = provider.ensemble_provider(ens).get_column_data( @@ -689,11 +690,11 @@ def create_df_from_table_provider(provider: EnsembleTableProviderSet) -> pd.Data def create_df_from_summary_provider( provider_set: Dict[str, EnsembleSummaryProvider], column_keys: List[str] ) -> pd.DataFrame: - """Descr""" + """Aggregates summary data from all ensembles into a common dataframe.""" dfs = [] for ens_name, provider in provider_set.items(): - all_sumvecs = provider.get_matching_vector_names(column_keys) - df = provider.get_vectors_df(all_sumvecs, None) + matching_sumvecs = get_matching_vector_names(provider, column_keys) + df = provider.get_vectors_df(matching_sumvecs, None) df["ENSEMBLE"] = ens_name dfs.append(df)