Skip to content

Commit

Permalink
Provide pandas 2 compatibility (#1286)
Browse files Browse the repository at this point in the history
* Unpin pandas

* Supress pylint errors

* Make pandas v2 compatible

* Avoid correlating on non-number

* Pin pandas <3 to postpone pandas release
  • Loading branch information
andreas-el authored May 21, 2024
1 parent c3109ec commit 19942e2
Show file tree
Hide file tree
Showing 8 changed files with 17 additions and 8 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
"geojson>=2.5.0",
"jsonschema>=3.2.0",
"opm>=2023.10; sys_platform=='linux'",
"pandas>=1.1.5,<2.0",
"pandas>=1.1.5,<3",
"pillow>=6.1",
"pyarrow>=5.0.0",
"pyjwt>=2.6.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def calc_from_cumulatives(
def _verify_time_index(
df: pd.DataFrame, time_index: str, time_index_input: str
) -> None:
freqs = {"D": "daily", "MS": "monthly", "AS-JAN": "yearly"}
freqs = {"D": "daily", "MS": "monthly", "AS-JAN": "yearly", "YS-JAN": "yearly"}
valid_time_indices = {
"daily": ["daily", "monthly", "yearly"],
"monthly": ["monthly", "yearly"],
Expand Down
3 changes: 2 additions & 1 deletion webviz_subsurface/_datainput/history_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ def extract_mismatch(ens_paths: dict, observation_file: Path) -> pd.DataFrame:
# 5) Merge in the COUNT column.
# 6) Rename columns such that the columns from fmu.ensemble corresponds
# to those used in the webviz history match visualization.

return (
df_mismatch.groupby(["OBSKEY", "SIGN", "REAL", "ENSEMBLE"])
.sum()[["NORMALISED_MISMATCH"]]
.sum(numeric_only=True)[["NORMALISED_MISMATCH"]]
.pivot_table(
index=["OBSKEY", "REAL", "ENSEMBLE"],
columns="SIGN",
Expand Down
5 changes: 4 additions & 1 deletion webviz_subsurface/_datainput/pvt_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,10 @@ def filter_pvt_data_frame(
if data_frame_stored:
continue
stored_data_frames.append(ens_merged_dataframe)
cleaned_data_frame = cleaned_data_frame.append(ens_merged_dataframe)

cleaned_data_frame = pd.concat(
[cleaned_data_frame, ens_merged_dataframe], ignore_index=True
)

return cleaned_data_frame

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def make_date_column_datetime_object(df: pd.DataFrame) -> pd.DataFrame:

# Infer datatype (Pandas cannot answer it) based on the first element:
if isinstance(sampled_date_value, pd.Timestamp):
df["DATE"] = pd.Series(pd.to_pydatetime(df["DATE"]), dtype="object")
df["DATE"] = pd.Series(pd.Series.to_pydatetime(df["DATE"]), dtype="object")

elif isinstance(sampled_date_value, str):
# Do not use pd.Series.apply() here, Pandas would try to convert it to
Expand Down
2 changes: 1 addition & 1 deletion webviz_subsurface/plugins/_history_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _prepare_data(self, data: pd.DataFrame) -> dict:
iterations = []
for ensemble in self.ensembles:
df = data[data.ensemble_name == ensemble]
iterations.append(df.groupby("obs_group_name").mean())
iterations.append(df.groupby("obs_group_name").mean(numeric_only=True))

sorted_iterations = HistoryMatch._sort_iterations(iterations)

Expand Down
7 changes: 5 additions & 2 deletions webviz_subsurface/plugins/_parameter_response_correlation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path
from typing import Any, Callable, Dict, List, Tuple

import numpy as np
import pandas as pd
import webviz_core_components as wcc
from dash import Input, Output, dcc, html
Expand Down Expand Up @@ -633,9 +634,11 @@ def add_webvizstore(self) -> List[Tuple[Callable, List[Dict]]]:
def correlate(inputdf, response, method="pearson") -> pd.DataFrame:
"""Returns the correlation matrix for a dataframe"""
if method == "pearson":
corrdf = inputdf.corr(method=method)
numeric_df = inputdf.select_dtypes(include=[np.number])
corrdf = numeric_df.corr(method=method)
elif method == "spearman":
corrdf = inputdf.rank().corr(method="pearson")
numeric_df = inputdf.select_dtypes(include=[np.number])
corrdf = numeric_df.rank().corr(method="pearson")
else:
raise ValueError(
f"Correlation method {method} is invalid. "
Expand Down
2 changes: 2 additions & 0 deletions webviz_subsurface/plugins/_running_time_analysis_fmu.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,7 @@ def ensemble_post_processing() -> list:
["ENSEMBLE", "REAL", "RUNTIME", "REAL_SCALED_RUNTIME", "name", "status"]
].rename(columns={"name": "JOB", "status": "STATUS"})
# Status DataFrame to be used with parallel coordinates
# pylint: disable=unsubscriptable-object
if all(real_df["STATUS"] == "Success"):
real_status.append(
{
Expand All @@ -649,6 +650,7 @@ def ensemble_post_processing() -> list:
)

# Need unique job ids names to separate jobs in same realization with same name in json file
# pylint: disable=unsupported-assignment-operation
real_df["JOB_ID"] = range(0, len(real_df["JOB"]))

# Update max runtime for jobs in ensemble
Expand Down

0 comments on commit 19942e2

Please sign in to comment.