From 19942e22915389437dbbd222ca44647a50c1d8e3 Mon Sep 17 00:00:00 2001 From: Andreas Eknes Lie <114403625+andreas-el@users.noreply.github.com> Date: Tue, 21 May 2024 11:00:25 +0200 Subject: [PATCH] Provide pandas 2 compatibility (#1286) * Unpin pandas * Supress pylint errors * Make pandas v2 compatible * Avoid correlating on non-number * Pin pandas <3 to postpone pandas release --- setup.py | 2 +- .../_datainput/from_timeseries_cumulatives.py | 2 +- webviz_subsurface/_datainput/history_match.py | 3 ++- webviz_subsurface/_datainput/pvt_data.py | 5 ++++- .../ensemble_summary_provider/_dataframe_utils.py | 2 +- webviz_subsurface/plugins/_history_match.py | 2 +- .../plugins/_parameter_response_correlation.py | 7 +++++-- webviz_subsurface/plugins/_running_time_analysis_fmu.py | 2 ++ 8 files changed, 17 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index a5142a2a6..180aea7bb 100644 --- a/setup.py +++ b/setup.py @@ -98,7 +98,7 @@ "geojson>=2.5.0", "jsonschema>=3.2.0", "opm>=2023.10; sys_platform=='linux'", - "pandas>=1.1.5,<2.0", + "pandas>=1.1.5,<3", "pillow>=6.1", "pyarrow>=5.0.0", "pyjwt>=2.6.0", diff --git a/webviz_subsurface/_datainput/from_timeseries_cumulatives.py b/webviz_subsurface/_datainput/from_timeseries_cumulatives.py index d29130349..4ca0c0484 100644 --- a/webviz_subsurface/_datainput/from_timeseries_cumulatives.py +++ b/webviz_subsurface/_datainput/from_timeseries_cumulatives.py @@ -103,7 +103,7 @@ def calc_from_cumulatives( def _verify_time_index( df: pd.DataFrame, time_index: str, time_index_input: str ) -> None: - freqs = {"D": "daily", "MS": "monthly", "AS-JAN": "yearly"} + freqs = {"D": "daily", "MS": "monthly", "AS-JAN": "yearly", "YS-JAN": "yearly"} valid_time_indices = { "daily": ["daily", "monthly", "yearly"], "monthly": ["monthly", "yearly"], diff --git a/webviz_subsurface/_datainput/history_match.py b/webviz_subsurface/_datainput/history_match.py index 4729fbcad..7c7755404 100644 --- a/webviz_subsurface/_datainput/history_match.py +++ b/webviz_subsurface/_datainput/history_match.py @@ -56,9 +56,10 @@ def extract_mismatch(ens_paths: dict, observation_file: Path) -> pd.DataFrame: # 5) Merge in the COUNT column. # 6) Rename columns such that the columns from fmu.ensemble corresponds # to those used in the webviz history match visualization. + return ( df_mismatch.groupby(["OBSKEY", "SIGN", "REAL", "ENSEMBLE"]) - .sum()[["NORMALISED_MISMATCH"]] + .sum(numeric_only=True)[["NORMALISED_MISMATCH"]] .pivot_table( index=["OBSKEY", "REAL", "ENSEMBLE"], columns="SIGN", diff --git a/webviz_subsurface/_datainput/pvt_data.py b/webviz_subsurface/_datainput/pvt_data.py index f078f5a83..4845acf3d 100644 --- a/webviz_subsurface/_datainput/pvt_data.py +++ b/webviz_subsurface/_datainput/pvt_data.py @@ -135,7 +135,10 @@ def filter_pvt_data_frame( if data_frame_stored: continue stored_data_frames.append(ens_merged_dataframe) - cleaned_data_frame = cleaned_data_frame.append(ens_merged_dataframe) + + cleaned_data_frame = pd.concat( + [cleaned_data_frame, ens_merged_dataframe], ignore_index=True + ) return cleaned_data_frame diff --git a/webviz_subsurface/_providers/ensemble_summary_provider/_dataframe_utils.py b/webviz_subsurface/_providers/ensemble_summary_provider/_dataframe_utils.py index 7f9b48369..60bc18382 100644 --- a/webviz_subsurface/_providers/ensemble_summary_provider/_dataframe_utils.py +++ b/webviz_subsurface/_providers/ensemble_summary_provider/_dataframe_utils.py @@ -18,7 +18,7 @@ def make_date_column_datetime_object(df: pd.DataFrame) -> pd.DataFrame: # Infer datatype (Pandas cannot answer it) based on the first element: if isinstance(sampled_date_value, pd.Timestamp): - df["DATE"] = pd.Series(pd.to_pydatetime(df["DATE"]), dtype="object") + df["DATE"] = pd.Series(pd.Series.to_pydatetime(df["DATE"]), dtype="object") elif isinstance(sampled_date_value, str): # Do not use pd.Series.apply() here, Pandas would try to convert it to diff --git a/webviz_subsurface/plugins/_history_match.py b/webviz_subsurface/plugins/_history_match.py index 8f7a48e80..43b5b2c34 100644 --- a/webviz_subsurface/plugins/_history_match.py +++ b/webviz_subsurface/plugins/_history_match.py @@ -79,7 +79,7 @@ def _prepare_data(self, data: pd.DataFrame) -> dict: iterations = [] for ensemble in self.ensembles: df = data[data.ensemble_name == ensemble] - iterations.append(df.groupby("obs_group_name").mean()) + iterations.append(df.groupby("obs_group_name").mean(numeric_only=True)) sorted_iterations = HistoryMatch._sort_iterations(iterations) diff --git a/webviz_subsurface/plugins/_parameter_response_correlation.py b/webviz_subsurface/plugins/_parameter_response_correlation.py index b760f2337..74d269d57 100644 --- a/webviz_subsurface/plugins/_parameter_response_correlation.py +++ b/webviz_subsurface/plugins/_parameter_response_correlation.py @@ -1,6 +1,7 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Tuple +import numpy as np import pandas as pd import webviz_core_components as wcc from dash import Input, Output, dcc, html @@ -633,9 +634,11 @@ def add_webvizstore(self) -> List[Tuple[Callable, List[Dict]]]: def correlate(inputdf, response, method="pearson") -> pd.DataFrame: """Returns the correlation matrix for a dataframe""" if method == "pearson": - corrdf = inputdf.corr(method=method) + numeric_df = inputdf.select_dtypes(include=[np.number]) + corrdf = numeric_df.corr(method=method) elif method == "spearman": - corrdf = inputdf.rank().corr(method="pearson") + numeric_df = inputdf.select_dtypes(include=[np.number]) + corrdf = numeric_df.rank().corr(method="pearson") else: raise ValueError( f"Correlation method {method} is invalid. " diff --git a/webviz_subsurface/plugins/_running_time_analysis_fmu.py b/webviz_subsurface/plugins/_running_time_analysis_fmu.py index 365b4695c..644a225b5 100644 --- a/webviz_subsurface/plugins/_running_time_analysis_fmu.py +++ b/webviz_subsurface/plugins/_running_time_analysis_fmu.py @@ -627,6 +627,7 @@ def ensemble_post_processing() -> list: ["ENSEMBLE", "REAL", "RUNTIME", "REAL_SCALED_RUNTIME", "name", "status"] ].rename(columns={"name": "JOB", "status": "STATUS"}) # Status DataFrame to be used with parallel coordinates + # pylint: disable=unsubscriptable-object if all(real_df["STATUS"] == "Success"): real_status.append( { @@ -649,6 +650,7 @@ def ensemble_post_processing() -> list: ) # Need unique job ids names to separate jobs in same realization with same name in json file + # pylint: disable=unsupported-assignment-operation real_df["JOB_ID"] = range(0, len(real_df["JOB"])) # Update max runtime for jobs in ensemble