From 19942e22915389437dbbd222ca44647a50c1d8e3 Mon Sep 17 00:00:00 2001
From: Andreas Eknes Lie <114403625+andreas-el@users.noreply.github.com>
Date: Tue, 21 May 2024 11:00:25 +0200
Subject: [PATCH] Provide pandas 2 compatibility (#1286)

* Unpin pandas

* Supress pylint errors

* Make pandas v2 compatible

* Avoid correlating on non-number

* Pin pandas <3 to postpone pandas release
---
 setup.py                                                   | 2 +-
 .../_datainput/from_timeseries_cumulatives.py              | 2 +-
 webviz_subsurface/_datainput/history_match.py              | 3 ++-
 webviz_subsurface/_datainput/pvt_data.py                   | 5 ++++-
 .../ensemble_summary_provider/_dataframe_utils.py          | 2 +-
 webviz_subsurface/plugins/_history_match.py                | 2 +-
 .../plugins/_parameter_response_correlation.py             | 7 +++++--
 webviz_subsurface/plugins/_running_time_analysis_fmu.py    | 2 ++
 8 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/setup.py b/setup.py
index a5142a2a6..180aea7bb 100644
--- a/setup.py
+++ b/setup.py
@@ -98,7 +98,7 @@
         "geojson>=2.5.0",
         "jsonschema>=3.2.0",
         "opm>=2023.10; sys_platform=='linux'",
-        "pandas>=1.1.5,<2.0",
+        "pandas>=1.1.5,<3",
         "pillow>=6.1",
         "pyarrow>=5.0.0",
         "pyjwt>=2.6.0",
diff --git a/webviz_subsurface/_datainput/from_timeseries_cumulatives.py b/webviz_subsurface/_datainput/from_timeseries_cumulatives.py
index d29130349..4ca0c0484 100644
--- a/webviz_subsurface/_datainput/from_timeseries_cumulatives.py
+++ b/webviz_subsurface/_datainput/from_timeseries_cumulatives.py
@@ -103,7 +103,7 @@ def calc_from_cumulatives(
 def _verify_time_index(
     df: pd.DataFrame, time_index: str, time_index_input: str
 ) -> None:
-    freqs = {"D": "daily", "MS": "monthly", "AS-JAN": "yearly"}
+    freqs = {"D": "daily", "MS": "monthly", "AS-JAN": "yearly", "YS-JAN": "yearly"}
     valid_time_indices = {
         "daily": ["daily", "monthly", "yearly"],
         "monthly": ["monthly", "yearly"],
diff --git a/webviz_subsurface/_datainput/history_match.py b/webviz_subsurface/_datainput/history_match.py
index 4729fbcad..7c7755404 100644
--- a/webviz_subsurface/_datainput/history_match.py
+++ b/webviz_subsurface/_datainput/history_match.py
@@ -56,9 +56,10 @@ def extract_mismatch(ens_paths: dict, observation_file: Path) -> pd.DataFrame:
     # 5) Merge in the COUNT column.
     # 6) Rename columns such that the columns from fmu.ensemble corresponds
     #    to those used in the webviz history match visualization.
+
     return (
         df_mismatch.groupby(["OBSKEY", "SIGN", "REAL", "ENSEMBLE"])
-        .sum()[["NORMALISED_MISMATCH"]]
+        .sum(numeric_only=True)[["NORMALISED_MISMATCH"]]
         .pivot_table(
             index=["OBSKEY", "REAL", "ENSEMBLE"],
             columns="SIGN",
diff --git a/webviz_subsurface/_datainput/pvt_data.py b/webviz_subsurface/_datainput/pvt_data.py
index f078f5a83..4845acf3d 100644
--- a/webviz_subsurface/_datainput/pvt_data.py
+++ b/webviz_subsurface/_datainput/pvt_data.py
@@ -135,7 +135,10 @@ def filter_pvt_data_frame(
             if data_frame_stored:
                 continue
             stored_data_frames.append(ens_merged_dataframe)
-        cleaned_data_frame = cleaned_data_frame.append(ens_merged_dataframe)
+
+        cleaned_data_frame = pd.concat(
+            [cleaned_data_frame, ens_merged_dataframe], ignore_index=True
+        )
 
     return cleaned_data_frame
 
diff --git a/webviz_subsurface/_providers/ensemble_summary_provider/_dataframe_utils.py b/webviz_subsurface/_providers/ensemble_summary_provider/_dataframe_utils.py
index 7f9b48369..60bc18382 100644
--- a/webviz_subsurface/_providers/ensemble_summary_provider/_dataframe_utils.py
+++ b/webviz_subsurface/_providers/ensemble_summary_provider/_dataframe_utils.py
@@ -18,7 +18,7 @@ def make_date_column_datetime_object(df: pd.DataFrame) -> pd.DataFrame:
 
     # Infer datatype (Pandas cannot answer it) based on the first element:
     if isinstance(sampled_date_value, pd.Timestamp):
-        df["DATE"] = pd.Series(pd.to_pydatetime(df["DATE"]), dtype="object")
+        df["DATE"] = pd.Series(pd.Series.to_pydatetime(df["DATE"]), dtype="object")
 
     elif isinstance(sampled_date_value, str):
         # Do not use pd.Series.apply() here, Pandas would try to convert it to
diff --git a/webviz_subsurface/plugins/_history_match.py b/webviz_subsurface/plugins/_history_match.py
index 8f7a48e80..43b5b2c34 100644
--- a/webviz_subsurface/plugins/_history_match.py
+++ b/webviz_subsurface/plugins/_history_match.py
@@ -79,7 +79,7 @@ def _prepare_data(self, data: pd.DataFrame) -> dict:
         iterations = []
         for ensemble in self.ensembles:
             df = data[data.ensemble_name == ensemble]
-            iterations.append(df.groupby("obs_group_name").mean())
+            iterations.append(df.groupby("obs_group_name").mean(numeric_only=True))
 
         sorted_iterations = HistoryMatch._sort_iterations(iterations)
 
diff --git a/webviz_subsurface/plugins/_parameter_response_correlation.py b/webviz_subsurface/plugins/_parameter_response_correlation.py
index b760f2337..74d269d57 100644
--- a/webviz_subsurface/plugins/_parameter_response_correlation.py
+++ b/webviz_subsurface/plugins/_parameter_response_correlation.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Tuple
 
+import numpy as np
 import pandas as pd
 import webviz_core_components as wcc
 from dash import Input, Output, dcc, html
@@ -633,9 +634,11 @@ def add_webvizstore(self) -> List[Tuple[Callable, List[Dict]]]:
 def correlate(inputdf, response, method="pearson") -> pd.DataFrame:
     """Returns the correlation matrix for a dataframe"""
     if method == "pearson":
-        corrdf = inputdf.corr(method=method)
+        numeric_df = inputdf.select_dtypes(include=[np.number])
+        corrdf = numeric_df.corr(method=method)
     elif method == "spearman":
-        corrdf = inputdf.rank().corr(method="pearson")
+        numeric_df = inputdf.select_dtypes(include=[np.number])
+        corrdf = numeric_df.rank().corr(method="pearson")
     else:
         raise ValueError(
             f"Correlation method {method} is invalid. "
diff --git a/webviz_subsurface/plugins/_running_time_analysis_fmu.py b/webviz_subsurface/plugins/_running_time_analysis_fmu.py
index 365b4695c..644a225b5 100644
--- a/webviz_subsurface/plugins/_running_time_analysis_fmu.py
+++ b/webviz_subsurface/plugins/_running_time_analysis_fmu.py
@@ -627,6 +627,7 @@ def ensemble_post_processing() -> list:
             ["ENSEMBLE", "REAL", "RUNTIME", "REAL_SCALED_RUNTIME", "name", "status"]
         ].rename(columns={"name": "JOB", "status": "STATUS"})
         # Status DataFrame to be used with parallel coordinates
+        # pylint: disable=unsubscriptable-object
         if all(real_df["STATUS"] == "Success"):
             real_status.append(
                 {
@@ -649,6 +650,7 @@ def ensemble_post_processing() -> list:
             )
 
         # Need unique job ids names to separate jobs in same realization with same name in json file
+        # pylint: disable=unsupported-assignment-operation
         real_df["JOB_ID"] = range(0, len(real_df["JOB"]))
 
         # Update max runtime for jobs in ensemble