From 36013228bc2a99d96cc924dc104982819a9f51da Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 4 Sep 2020 21:55:39 -0400 Subject: [PATCH] fix PX timezone treatment --- CHANGELOG.md | 8 ++- .../python/plotly/plotly/express/_core.py | 63 +++++++++---------- .../tests/test_core/test_px/test_px_hover.py | 7 +++ .../tests/test_core/test_px/test_px_input.py | 8 +++ .../tests/test_core/test_px/test_trendline.py | 2 + packages/python/plotly/tox.ini | 30 ++++----- 6 files changed, 68 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c0bbd992cf..7d0858e1e00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,15 @@ This project adheres to [Semantic Versioning](http://semver.org/). `binary_backend`, `binary_format` and `binary_compression_level` control how to generate the b64 string ([#2691](https://github.com/plotly/plotly.py/pull/2691) - `px.imshow` has a new `constrast_rescaling` argument in order to choose how - to set data values corresponding to the bounds of the color range + to set data values corresponding to the bounds of the color range ([#2691](https://github.com/plotly/plotly.py/pull/2691) +### Fixed + +- Plotly Express no longer converts datetime columns of input dataframes to UTC ([#2749](https://github.com/plotly/plotly.py/pull/2749)) +- Plotly Express has more complete support for datetimes as additional `hover_data` ([#2749](https://github.com/plotly/plotly.py/pull/2749)) + + ## [4.9.0] - 2020-07-16 ### Added diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index f283cb3cdef..7ad2fb4eb01 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -222,7 +222,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): trace_patch = trace_spec.trace_patch.copy() or {} fit_results = None hover_header = "" - custom_data_len = 0 for attr_name in trace_spec.attrs: attr_value = args[attr_name] attr_label = get_decorated_label(args, attr_value, attr_name) @@ -243,7 +242,7 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): ) ] trace_patch["dimensions"] = [ - dict(label=get_label(args, name), values=column.values) + dict(label=get_label(args, name), values=column) for (name, column) in dims ] if trace_spec.constructor == go.Splom: @@ -287,10 +286,8 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): y = sorted_trace_data[args["y"]].values x = sorted_trace_data[args["x"]].values - x_is_date = False if x.dtype.type == np.datetime64: x = x.astype(int) / 10 ** 9 # convert to unix epoch seconds - x_is_date = True elif x.dtype.type == np.object_: try: x = x.astype(np.float64) @@ -308,11 +305,15 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): "Could not convert value of 'y' into a numeric type." ) + # preserve original values of "x" in case they're dates + trace_patch["x"] = sorted_trace_data[args["x"]][ + np.logical_not(np.logical_or(np.isnan(y), np.isnan(x))) + ] + if attr_value == "lowess": # missing ='drop' is the default value for lowess but not for OLS (None) # we force it here in case statsmodels change their defaults trendline = sm.nonparametric.lowess(y, x, missing="drop") - trace_patch["x"] = trendline[:, 0] trace_patch["y"] = trendline[:, 1] hover_header = "LOWESS trendline

" elif attr_value == "ols": @@ -320,9 +321,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): y, sm.add_constant(x), missing="drop" ).fit() trace_patch["y"] = fit_results.predict() - trace_patch["x"] = x[ - np.logical_not(np.logical_or(np.isnan(y), np.isnan(x))) - ] hover_header = "OLS trendline
" if len(fit_results.params) == 2: hover_header += "%s = %g * %s + %g
" % ( @@ -339,8 +337,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): hover_header += ( "R2=%f

" % fit_results.rsquared ) - if x_is_date: - trace_patch["x"] = pd.to_datetime(trace_patch["x"] * 10 ** 9) mapping_labels[get_label(args, args["x"])] = "%{x}" mapping_labels[get_label(args, args["y"])] = "%{y} (trend)" elif attr_name.startswith("error"): @@ -350,8 +346,9 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): trace_patch[error_xy] = {} trace_patch[error_xy][arr] = trace_data[attr_value] elif attr_name == "custom_data": - trace_patch["customdata"] = trace_data[attr_value].values - custom_data_len = len(attr_value) # number of custom data columns + # here we store a data frame in customdata, and it's serialized + # as a list of row lists, which is what we want + trace_patch["customdata"] = trace_data[attr_value] elif attr_name == "hover_name": if trace_spec.constructor not in [ go.Histogram, @@ -368,29 +365,23 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): go.Histogram2dContour, ]: hover_is_dict = isinstance(attr_value, dict) + customdata_cols = args.get("custom_data") or [] for col in attr_value: if hover_is_dict and not attr_value[col]: continue try: position = args["custom_data"].index(col) except (ValueError, AttributeError, KeyError): - position = custom_data_len - custom_data_len += 1 - if "customdata" in trace_patch: - trace_patch["customdata"] = np.hstack( - ( - trace_patch["customdata"], - trace_data[col].values[:, None], - ) - ) - else: - trace_patch["customdata"] = trace_data[col].values[ - :, None - ] + position = len(customdata_cols) + customdata_cols.append(col) attr_label_col = get_decorated_label(args, col, None) mapping_labels[attr_label_col] = "%%{customdata[%d]}" % ( position ) + + # here we store a data frame in customdata, and it's serialized + # as a list of row lists, which is what we want + trace_patch["customdata"] = trace_data[customdata_cols] elif attr_name == "color": if trace_spec.constructor in [go.Choropleth, go.Choroplethmapbox]: trace_patch["z"] = trace_data[attr_value] @@ -1029,6 +1020,16 @@ def _escape_col_name(df_input, col_name, extra): return col_name +def to_unindexed_series(x): + """ + assuming x is list-like or even an existing pd.Series, return a new pd.Series with + no index, without extracting the data from an existing Series via numpy, which + seems to mangle datetime columns. Stripping the index from existing pd.Series is + required to get things to match up right in the new DataFrame we're building + """ + return pd.Series(x).reset_index(drop=True) + + def process_args_into_dataframe(args, wide_mode, var_name, value_name): """ After this function runs, the `all_attrables` keys of `args` all contain only @@ -1140,10 +1141,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): length, ) ) - if hasattr(real_argument, "values"): - df_output[col_name] = real_argument.values - else: - df_output[col_name] = np.array(real_argument) + df_output[col_name] = to_unindexed_series(real_argument) elif not df_provided: raise ValueError( "String or int arguments are only possible when a " @@ -1178,7 +1176,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): ) else: col_name = str(argument) - df_output[col_name] = df_input[argument].values + df_output[col_name] = to_unindexed_series(df_input[argument]) # ----------------- argument is likely a column / array / list.... ------- else: if df_provided and hasattr(argument, "name"): @@ -1207,10 +1205,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): "length of previously-processed arguments %s is %d" % (field, len(argument), str(list(df_output.columns)), length) ) - if hasattr(argument, "values"): - df_output[str(col_name)] = argument.values - else: - df_output[str(col_name)] = np.array(argument) + df_output[str(col_name)] = to_unindexed_series(argument) # Finally, update argument with column name now that column exists assert col_name is not None, ( diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py index 6e1b57cba34..66d7bd01048 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_hover.py @@ -163,3 +163,10 @@ def test_sunburst_hoverdict_color(): hover_data={"pop": ":,"}, ) assert "color" in fig.data[0].hovertemplate + + +def test_date_in_hover(): + df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]}) + df["date"] = pd.to_datetime(df["date"]) + fig = px.scatter(df, x="value", y="value", hover_data=["date"]) + assert str(fig.data[0].customdata[0][0]) == str(df["date"][0]) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py index e440fefbe89..eeb28283649 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py @@ -233,6 +233,14 @@ def test_build_df_with_index(): assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"]) +def test_timezones(): + df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]}) + df["date"] = pd.to_datetime(df["date"]) + args = dict(data_frame=df, x="date", y="value") + out = build_dataframe(args, go.Scatter) + assert str(out["data_frame"]["date"][0]) == str(df["date"][0]) + + def test_non_matching_index(): df = pd.DataFrame(dict(y=[1, 2, 3]), index=["a", "b", "c"]) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py index e908d7dee12..41064bd19df 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py @@ -102,9 +102,11 @@ def test_trendline_on_timeseries(mode): ) df["date"] = pd.to_datetime(df["date"]) + df["date"] = df["date"].dt.tz_localize("CET") # force a timezone fig = px.scatter(df, x="date", y="GOOG", trendline=mode) assert len(fig.data) == 2 assert len(fig.data[0].x) == len(fig.data[1].x) assert type(fig.data[0].x[0]) == datetime assert type(fig.data[1].x[0]) == datetime assert np.all(fig.data[0].x == fig.data[1].x) + assert str(fig.data[0].x[0]) == str(fig.data[1].x[0]) diff --git a/packages/python/plotly/tox.ini b/packages/python/plotly/tox.ini index ead29ecca82..72169192a08 100644 --- a/packages/python/plotly/tox.ini +++ b/packages/python/plotly/tox.ini @@ -24,13 +24,13 @@ ; PASSING ADDITONAL ARGUMENTS TO TEST COMMANDS ; The {posargs} is tox-specific and passes in any command line args after `--`. ; For example, given the testing command in *this* file: -; pytest {posargs} -x plotly/tests/test_core +; pytest {posargs} plotly/tests/test_core ; ; The following command: ; tox -- -k 'not nodev' ; ; Tells tox to call: -; pytest -k 'not nodev' -x plotly/tests/test_core +; pytest -k 'not nodev' plotly/tests/test_core ; [tox] @@ -81,25 +81,25 @@ deps= basepython={env:PLOTLY_TOX_PYTHON_27:} commands= python --version - pytest {posargs} -x plotly/tests/test_core + pytest {posargs} plotly/tests/test_core [testenv:py35-core] basepython={env:PLOTLY_TOX_PYTHON_35:} commands= python --version - pytest {posargs} -x plotly/tests/test_core + pytest {posargs} plotly/tests/test_core [testenv:py36-core] basepython={env:PLOTLY_TOX_PYTHON_36:} commands= python --version - pytest {posargs} -x plotly/tests/test_core + pytest {posargs} plotly/tests/test_core [testenv:py37-core] basepython={env:PLOTLY_TOX_PYTHON_37:} commands= python --version - pytest {posargs} -x plotly/tests/test_core + pytest {posargs} plotly/tests/test_core pytest {posargs} -x test_init/test_dependencies_not_imported.py pytest {posargs} -x test_init/test_lazy_imports.py @@ -111,15 +111,15 @@ commands= ;; Do some coverage reporting. No need to do this for all environments. ; mkdir -p {envbindir}/../../coverage-reports/{envname} ; coverage erase -; coverage run --include="*/plotly/*" --omit="*/tests*" {envbindir}/nosetests {posargs} -x plotly/tests +; coverage run --include="*/plotly/*" --omit="*/tests*" {envbindir}/nosetests {posargs} plotly/tests ; coverage html -d "{envbindir}/../../coverage-reports/{envname}" --title={envname} [testenv:py27-optional] basepython={env:PLOTLY_TOX_PYTHON_27:} commands= python --version - pytest {posargs} -x plotly/tests/test_core - pytest {posargs} -x plotly/tests/test_optional + pytest {posargs} plotly/tests/test_core + pytest {posargs} plotly/tests/test_optional pytest _plotly_utils/tests/ pytest plotly/tests/test_io @@ -127,8 +127,8 @@ commands= basepython={env:PLOTLY_TOX_PYTHON_35:} commands= python --version - pytest {posargs} -x plotly/tests/test_core - pytest {posargs} -x plotly/tests/test_optional + pytest {posargs} plotly/tests/test_core + pytest {posargs} plotly/tests/test_optional pytest _plotly_utils/tests/ pytest plotly/tests/test_io @@ -136,8 +136,8 @@ commands= basepython={env:PLOTLY_TOX_PYTHON_36:} commands= python --version - pytest {posargs} -x plotly/tests/test_core - pytest {posargs} -x plotly/tests/test_optional + pytest {posargs} plotly/tests/test_core + pytest {posargs} plotly/tests/test_optional pytest _plotly_utils/tests/ pytest plotly/tests/test_io @@ -145,7 +145,7 @@ commands= basepython={env:PLOTLY_TOX_PYTHON_37:} commands= python --version - pytest {posargs} -x plotly/tests/test_core - pytest {posargs} -x plotly/tests/test_optional + pytest {posargs} plotly/tests/test_core + pytest {posargs} plotly/tests/test_optional pytest _plotly_utils/tests/ pytest plotly/tests/test_io