Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New functionality in ParameterAnalysis #1207

Merged
merged 16 commits into from
Apr 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- [#1195](https://github.com/equinor/webviz-subsurface/pull/1195) - `RftPlotter` faultlines argument can now use fault polygons csv file with *X, Y, ID* header (fmu-dataio default)
- [#1196](https://github.com/equinor/webviz-subsurface/pull/1196) - `SwatinitQC` faultlines argument can now use fault polygons csv file with *X, Y, ID* header (fmu-dataio default)
- [#1201](https://github.com/equinor/webviz-subsurface/pull/1196) - `ParameterAnalysis` plugin converted to WLF (Webviz Layout Framework). Removed auto-detection of sensitivity ensembles.
- [#1201](https://github.com/equinor/webviz-subsurface/pull/1201) - `ParameterAnalysis` plugin converted to WLF (Webviz Layout Framework). Removed auto-detection of sensitivity ensembles.

### Added
- [#1199](https://github.com/equinor/webviz-subsurface/pull/1199) - Added more statistical options to the WellOverview tab in `WellAnalysis`, and the possibility to see injection rates.
- [#1207](https://github.com/equinor/webviz-subsurface/pull/1207) - New functionality in `ParameterAnalysis`: observations, resampling frequency and sensitivity filter.

## [0.2.17] - 2023-01-18

Expand Down
19 changes: 16 additions & 3 deletions tests/integration_tests/test_parameter_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,25 @@


def test_dataframe(testdata_folder) -> None:
# pylint: disable=protected-access
dframe = pd.read_csv(
testdata_folder / "reek_test_data" / "aggregated_data" / "parameters.csv"
)
component = ParameterFilter("test", dframe)
# pylint: disable=protected-access

expected_discrete_parameters = [
"FWL",
"MULTFLT_F1",
"INTERPOLATE_WO",
"COHIBA_MODEL_MODE",
"RMS_SEED",
]

component = ParameterFilter("test", dframe, include_sens_filter=False)
assert set(component._discrete_parameters) == set(expected_discrete_parameters)

component = ParameterFilter("test", dframe, include_sens_filter=True)
assert set(component._discrete_parameters) == set(
["FWL", "MULTFLT_F1", "INTERPOLATE_WO", "COHIBA_MODEL_MODE", "RMS_SEED"]
expected_discrete_parameters + ["SENSNAME"]
)

assert component.is_sensitivity_run is True
12 changes: 11 additions & 1 deletion webviz_subsurface/_components/parameter_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(
dframe: pd.DataFrame,
reset_on_ensemble_update: bool = False,
display_header: bool = True,
include_sens_filter: bool = False,
) -> None:
"""
* **`uuid`:** Unique id (use the plugin id).
Expand All @@ -43,6 +44,7 @@ def __init__(
drop_constants=True,
keep_numeric_only=False,
drop_parameters_with_nan=True,
include_sens_filter=include_sens_filter,
)
self._dframe = self._pmodel.dataframe
self._range_parameters = self._get_range_parameters()
Expand Down Expand Up @@ -361,7 +363,15 @@ def update_filtercomponents_and_apply(
ens_df = self._dframe[self._dframe["ENSEMBLE"].isin(ensembles)]

children = []
for col in self._pmodel.get_parameters_for_ensembles(ensembles):
ens_parameters = self._pmodel.get_parameters_for_ensembles(ensembles)

# if SENSNAME is among the parameters we want to place it first
if "SENSNAME" in ens_parameters:
ens_parameters = ["SENSNAME"] + [
p for p in ens_parameters if p != "SENSNAME"
]

for col in ens_parameters:
if col in self._range_parameters:
children.append(
make_range_slider(
Expand Down
37 changes: 36 additions & 1 deletion webviz_subsurface/_figures/timeseries_figure.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from enum import Enum
from typing import List, Optional
from typing import Dict, List, Optional

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -30,6 +30,7 @@ def __init__(
ensemble: str,
color_col: Optional[str],
line_shape_fallback: str,
observations: Optional[Dict] = None,
historical_vector_df: Optional[pd.DataFrame] = None,
dateline: Optional[datetime.datetime] = None,
):
Expand All @@ -40,10 +41,14 @@ def __init__(
self.visualization = visualization
self.historical_vector_df = historical_vector_df
self.date = dateline
self.observations = observations if observations is not None else {}
self.line_shape = self.get_line_shape(line_shape_fallback)

self.create_traces()

if self.observations:
self.create_vector_observation_traces()

@property
def figure(self) -> dict:
title = self.vector
Expand Down Expand Up @@ -214,6 +219,36 @@ def create_vectors_statistics_df(self) -> pd.DataFrame:
.reset_index()
)

def create_vector_observation_traces(self) -> None:
"""Adds observations to the plot"""

legend_group = "Observation"
name = "Observation"
color = "black"
show_legend = False

for observation in self.observations.get("observations", []):
hovertext = observation.get("comment")
hovertemplate = (
"(%{x}, %{y})<br>" + hovertext if hovertext else "(%{x}, %{y})<br>"
)
self.traces.append(
{
"name": name,
"legendgroup": legend_group,
"x": [observation.get("date"), []],
"y": [observation.get("value"), []],
"marker": {"color": color},
"hovertemplate": hovertemplate,
"showlegend": show_legend,
"error_y": {
"type": "data",
"array": [observation.get("error"), []],
"visible": True,
},
}
)

@staticmethod
def set_real_color(norm_value: float, mean_param_value: float) -> str:
"""
Expand Down
35 changes: 22 additions & 13 deletions webviz_subsurface/_models/parameter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,37 @@
class ParametersModel:
"""Class to process ensemble parameter data"""

POSSIBLE_SELECTORS = [
"ENSEMBLE",
"REAL",
"SENSNAME",
"SENSCASE",
"SENSTYPE",
"SENSNAME_CASE",
]

def __init__(
self,
dataframe: pd.DataFrame,
drop_constants: bool = True,
keep_numeric_only: bool = True,
drop_parameters_with_nan: bool = False,
include_sens_filter: bool = False,
) -> None:
self._dataframe = dataframe if dataframe is not None else pd.DataFrame()

self._possible_selectors = [
"ENSEMBLE",
"REAL",
"SENSNAME",
"SENSCASE",
"SENSTYPE",
"SENSNAME_CASE",
]

if include_sens_filter and "SENSNAME" in self._dataframe.columns:
# Remove SENSNAME from possible selectors
self._possible_selectors = [
col for col in self._possible_selectors if col != "SENSNAME"
]
self._dataframe["SENSNAME"].fillna("None")

self._validate_dframe()
self._sensrun = self._check_if_sensitivity_run()
self._prepare_data(drop_constants, keep_numeric_only, drop_parameters_with_nan)
self._parameters = [
x for x in self._dataframe if x not in self.POSSIBLE_SELECTORS
x for x in self._dataframe if x not in self._possible_selectors
]
self._parameters_per_ensemble = self._split_parameters_by_ensemble()

Expand All @@ -44,7 +53,7 @@ def parameters_per_ensemble(self) -> dict:

@property
def selectors(self) -> list:
return [col for col in self.POSSIBLE_SELECTORS if col in self.dataframe]
return [col for col in self._possible_selectors if col in self.dataframe]

@property
def sensitivities(self) -> list:
Expand All @@ -64,7 +73,7 @@ def mc_ensembles(self) -> list:

@property
def sens_df(self) -> pd.DataFrame:
return self.dataframe[self.POSSIBLE_SELECTORS]
return self.dataframe[self._possible_selectors]

@property
def dataframe(self) -> pd.DataFrame:
Expand Down Expand Up @@ -96,7 +105,7 @@ def _prepare_data(
param
for param in self._dataframe
if self._dataframe[param].dropna().nunique() == 1
and param not in self.POSSIBLE_SELECTORS
and param not in self._possible_selectors
]
self._dataframe = self._dataframe.drop(columns=constant_params)

Expand Down
49 changes: 40 additions & 9 deletions webviz_subsurface/plugins/_parameter_analysis/_plugin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from typing import Dict, List, Optional

import pandas as pd
Expand All @@ -11,6 +12,8 @@
Frequency,
)

from ..._utils.simulation_timeseries import check_and_format_observations
from ..._utils.webvizstore_functions import get_path
from ._utils import ParametersModel, ProviderTimeSeriesDataModel
from ._views._parameter_distributions_view import ParameterDistributionView
from ._views._parameter_response_view import ParameterResponseView
Expand All @@ -32,6 +35,9 @@ class ParameterAnalysis(WebvizPluginABC):
Default is True.
* **`column_keys`:** List of vectors to extract. If not given, all vectors \
from the simulations will be extracted. Wild card asterisk `*` can be used.
* **`obsfile`:** `.yaml` file with observations to be displayed in the time series plot \
* **`perform_presampled`:** Summary data will be presampled when loading the plugin, \
and the resampling dropdown will be disabled.
---

?> `Arrow` format for simulation time series data can be generated using the `ECL2CSV` forward \
Expand All @@ -56,19 +62,27 @@ class Ids(StrEnum):
PARAM_DIST_VIEW = "param-dist-view"
PARAM_RESP_VIEW = "param-resp-view"

# pylint: disable=too-many-arguments
def __init__(
self,
webviz_settings: WebvizSettings,
ensembles: List[str] = None,
time_index: str = "monthly",
time_index: str = Frequency.MONTHLY.value,
column_keys: Optional[list] = None,
drop_constants: bool = True,
rel_file_pattern: str = "share/results/unsmry/*.arrow",
obsfile: Path = None,
perform_presampling: bool = False,
):
super().__init__()

self._ensembles = ensembles
self._theme = webviz_settings.theme
self._obsfile = obsfile

self._observations = {}
if self._obsfile:
self._observations = check_and_format_observations(get_path(self._obsfile))

if ensembles is None:
raise ValueError('Incorrect argument, must provide "ensembles"')
Expand All @@ -85,14 +99,26 @@ def __init__(
resampling_frequency = Frequency(time_index)
provider_factory = EnsembleSummaryProviderFactory.instance()

provider_set = {
ens: provider_factory.create_from_arrow_unsmry_presampled(
str(ens_path), rel_file_pattern, resampling_frequency
)
for ens, ens_path in ensemble_paths.items()
}
if perform_presampling:
self._input_provider_set = {
ens: provider_factory.create_from_arrow_unsmry_presampled(
str(ens_path), rel_file_pattern, resampling_frequency
)
for ens, ens_path in ensemble_paths.items()
}
else:
self._input_provider_set = {
ens: provider_factory.create_from_arrow_unsmry_lazy(
str(ens_path), rel_file_pattern
)
for ens, ens_path in ensemble_paths.items()
}

self._vmodel = ProviderTimeSeriesDataModel(
provider_set=provider_set, column_keys=column_keys
provider_set=self._input_provider_set, column_keys=column_keys
)
self._vmodel.set_dates(
self._vmodel.get_dates(resampling_frequency=resampling_frequency)
)

parameter_df = create_df_from_table_provider(
Expand All @@ -112,7 +138,12 @@ def __init__(

self.add_view(
ParameterResponseView(
parametermodel=self._pmodel, vectormodel=self._vmodel, theme=self._theme
parametermodel=self._pmodel,
vectormodel=self._vmodel,
observations=self._observations,
selected_resampling_frequency=resampling_frequency,
disable_resampling_dropdown=perform_presampling,
theme=self._theme,
),
self.Ids.PARAM_RESP_VIEW,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,14 @@ def ensembles(self) -> List[str]:
@staticmethod
def _aggregate_ensemble_data(dframe: pd.DataFrame) -> pd.DataFrame:
"""Compute parameter statistics for the different ensembles"""
drop_columns = [
col
for col in ["REAL", "SENSNAME", "SENSTYPE", "SENSNAME_CASE", "SENSCASE"]
if col in dframe.columns
]

return (
dframe.drop(columns=["REAL"])
dframe.drop(columns=drop_columns)
.groupby(["ENSEMBLE"])
.agg(
[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd

from webviz_subsurface._abbreviations.reservoir_simulation import historical_vector
from webviz_subsurface._providers import EnsembleSummaryProvider
from webviz_subsurface._providers import EnsembleSummaryProvider, Frequency
from webviz_subsurface._utils.simulation_timeseries import (
set_simulation_line_shape_fallback,
)
Expand Down Expand Up @@ -34,8 +34,6 @@ def __init__(
if not self._vector_names:
raise ValueError("No vectors match the selected 'column_keys' criteria")

self._dates = self.all_dates()

# add vectors to vector selector
self.vector_selector_data: list = []
for vector in self.get_non_historical_vector_names():
Expand All @@ -56,15 +54,23 @@ def get_non_historical_vector_names(self) -> list:
if historical_vector(vector, None, False) not in self._vector_names
]

def all_dates(self) -> List[datetime.datetime]:
def get_dates(self, resampling_frequency: Frequency) -> List[datetime.datetime]:
"""List with the union of dates among providers"""
# TODO: Adjust when providers are updated!
dates_union: Set[datetime.datetime] = set()
for provider in list(self._provider_set.values()):
_dates = set(provider.dates(None))
_dates = set(provider.dates(resampling_frequency=resampling_frequency))
dates_union.update(_dates)
return list(sorted(dates_union))

def set_dates(self, dates: List[datetime.datetime]) -> None:
# pylint: disable=attribute-defined-outside-init
self._dates = dates

def get_closest_date(self, date: datetime.datetime) -> datetime.datetime:
# Returns the closest date to the input date in the dates list.
return min(self._dates, key=lambda dte: abs(dte - date))

@staticmethod
def _create_union_of_vector_names_from_providers(
providers: List[EnsembleSummaryProvider],
Expand Down Expand Up @@ -122,10 +128,15 @@ def get_vector_df(
ensemble: str,
realizations: List[int],
vectors: List[str],
resampling_frequency: Optional[Frequency],
) -> pd.DataFrame:
provider = self._provider_set[ensemble]
ens_vectors = [vec for vec in vectors if vec in provider.vector_names()]
return provider.get_vectors_df(ens_vectors, None, realizations)
return provider.get_vectors_df(
vector_names=ens_vectors,
resampling_frequency=resampling_frequency,
realizations=realizations,
)

def get_last_date(self, ensemble: str) -> datetime.datetime:
return max(self._provider_set[ensemble].dates(None))
Loading