From 02e13b229f9ef2e3818f0284bd829ef1148dd722 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=98yvind=20Lind-Johansen?= <olind@equinor.com>
Date: Thu, 19 May 2022 10:46:16 +0200
Subject: [PATCH] Moved get_matching_vector_names function to new utils module

---
 webviz_subsurface/_providers/__init__.py      |  1 +
 .../ensemble_summary_provider.py              | 19 -------------
 .../ensemble_summary_provider/utils.py        | 27 +++++++++++++++++++
 .../_parameter_response_correlation.py        |  9 ++++---
 4 files changed, 33 insertions(+), 23 deletions(-)
 create mode 100644 webviz_subsurface/_providers/ensemble_summary_provider/utils.py

diff --git a/webviz_subsurface/_providers/__init__.py b/webviz_subsurface/_providers/__init__.py
index a5ae863593..5a1ba9b2de 100644
--- a/webviz_subsurface/_providers/__init__.py
+++ b/webviz_subsurface/_providers/__init__.py
@@ -13,6 +13,7 @@
 from .ensemble_summary_provider.ensemble_summary_provider_factory import (
     EnsembleSummaryProviderFactory,
 )
+from .ensemble_summary_provider.utils import get_matching_vector_names
 from .ensemble_surface_provider import (
     EnsembleSurfaceProvider,
     EnsembleSurfaceProviderFactory,
diff --git a/webviz_subsurface/_providers/ensemble_summary_provider/ensemble_summary_provider.py b/webviz_subsurface/_providers/ensemble_summary_provider/ensemble_summary_provider.py
index 88abdcac06..4022e62ec1 100644
--- a/webviz_subsurface/_providers/ensemble_summary_provider/ensemble_summary_provider.py
+++ b/webviz_subsurface/_providers/ensemble_summary_provider/ensemble_summary_provider.py
@@ -1,7 +1,5 @@
 import abc
 import datetime
-import fnmatch
-import re
 from dataclasses import dataclass
 from enum import Enum
 from typing import List, Optional, Sequence
@@ -118,20 +116,3 @@ def get_vectors_for_date_df(
         The returned DataFrame will always contain a 'REAL' column in addition to
         columns for all the requested vectors.
         """
-
-    def get_matching_vector_names(self, column_keys: List[str]) -> List[str]:
-        """Returns a list of vectors that match the input columns_keys that
-        can have unix shell wildcards.
-
-        This function is almost the same as filter_vectorlist_on_column_keys in
-        parameter_analysis/models/ensemble_timeseries_datamodel.py and should be
-        generalized somewhere.
-        """
-        try:
-            regex = re.compile(
-                "|".join([fnmatch.translate(col) for col in column_keys]),
-                flags=re.IGNORECASE,
-            )
-            return [vec for vec in self.vector_names() if regex.fullmatch(vec)]
-        except re.error:
-            return []
diff --git a/webviz_subsurface/_providers/ensemble_summary_provider/utils.py b/webviz_subsurface/_providers/ensemble_summary_provider/utils.py
new file mode 100644
index 0000000000..084b4ea8bf
--- /dev/null
+++ b/webviz_subsurface/_providers/ensemble_summary_provider/utils.py
@@ -0,0 +1,27 @@
+import fnmatch
+import re
+from typing import List
+
+from .ensemble_summary_provider import EnsembleSummaryProvider
+
+
+def get_matching_vector_names(
+    provider: EnsembleSummaryProvider, column_keys: List[str]
+) -> List[str]:
+    """Returns a list of vectors that match the input columns_keys that
+    can have unix shell wildcards.
+
+    Example of use:
+    column_keys = ["FOPT", "WGOR*"]
+    matching_vector_names = get_matching_vector_names(provider, column_keys)
+    df = provider.get_vectors_df(matching_vector_names, None)
+
+    """
+    try:
+        regex = re.compile(
+            "|".join([fnmatch.translate(col) for col in column_keys]),
+            flags=re.IGNORECASE,
+        )
+        return [vec for vec in provider.vector_names() if regex.fullmatch(vec)]
+    except re.error:
+        return []
diff --git a/webviz_subsurface/plugins/_parameter_response_correlation.py b/webviz_subsurface/plugins/_parameter_response_correlation.py
index 0232515589..006e30dfd8 100644
--- a/webviz_subsurface/plugins/_parameter_response_correlation.py
+++ b/webviz_subsurface/plugins/_parameter_response_correlation.py
@@ -20,6 +20,7 @@
     EnsembleTableProviderFactory,
     EnsembleTableProviderSet,
     Frequency,
+    get_matching_vector_names,
 )
 
 
@@ -675,7 +676,7 @@ def read_csv(csv_file) -> pd.DataFrame:
 
 
 def create_df_from_table_provider(provider: EnsembleTableProviderSet) -> pd.DataFrame:
-    """This function is the same as in parameter analysis and could be generalized."""
+    """Aggregates parameters from all ensemble into a common dataframe."""
     dfs = []
     for ens in provider.ensemble_names():
         df = provider.ensemble_provider(ens).get_column_data(
@@ -689,11 +690,11 @@ def create_df_from_table_provider(provider: EnsembleTableProviderSet) -> pd.Data
 def create_df_from_summary_provider(
     provider_set: Dict[str, EnsembleSummaryProvider], column_keys: List[str]
 ) -> pd.DataFrame:
-    """Descr"""
+    """Aggregates summary data from all ensembles into a common dataframe."""
     dfs = []
     for ens_name, provider in provider_set.items():
-        all_sumvecs = provider.get_matching_vector_names(column_keys)
-        df = provider.get_vectors_df(all_sumvecs, None)
+        matching_sumvecs = get_matching_vector_names(provider, column_keys)
+        df = provider.get_vectors_df(matching_sumvecs, None)
         df["ENSEMBLE"] = ens_name
         dfs.append(df)