From 6887fb089e399afc3f8824b5818829f67fa011f5 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 12 Apr 2023 21:36:35 +0200
Subject: [PATCH 001/176] FEAT-#5936: support pandas 2.0.0

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 docs/supported_apis/dataframe_supported.rst   |  1 -
 environment-dev.yml                           |  2 +-
 .../storage_formats/base/query_compiler.py    | 14 ------
 .../storage_formats/pandas/query_compiler.py  |  1 -
 modin/pandas/__init__.py                      |  2 +-
 modin/pandas/base.py                          | 20 --------
 modin/pandas/groupby.py                       | 11 -----
 modin/pandas/test/dataframe/test_default.py   | 26 ----------
 modin/pandas/test/test_groupby.py             | 47 -------------------
 modin/pandas/test/test_series.py              | 11 -----
 requirements/env_hdk.yml                      |  2 +-
 requirements/env_unidist.yml                  |  2 +-
 requirements/requirements-no-engine.yml       |  2 +-
 setup.py                                      |  2 +-
 14 files changed, 6 insertions(+), 137 deletions(-)

diff --git a/docs/supported_apis/dataframe_supported.rst b/docs/supported_apis/dataframe_supported.rst
index 37dd7be6bfd..d9b9462ab5b 100644
--- a/docs/supported_apis/dataframe_supported.rst
+++ b/docs/supported_apis/dataframe_supported.rst
@@ -582,7 +582,6 @@ default to pandas.
 .. _`loc`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html#pandas.DataFrame.loc
 .. _`lookup`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.lookup.html#pandas.DataFrame.lookup
 .. _`lt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.lt.html#pandas.DataFrame.lt
-.. _`mad`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mad.html#pandas.DataFrame.mad
 .. _`mask`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mask.html#pandas.DataFrame.mask
 .. _`max`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.max.html#pandas.DataFrame.max
 .. _`mean`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mean.html#pandas.DataFrame.mean
diff --git a/environment-dev.yml b/environment-dev.yml
index ef2f5a4fce5..14839085ae1 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -2,7 +2,7 @@ name: modin
 channels:
   - conda-forge
 dependencies:
-  - pandas==1.5.3
+  - pandas==2.0.0
   - numpy>=1.18.5
   - ray-default>=1.13.0
   - pyarrow
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 9469cd2e612..c00c4060611 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -5139,20 +5139,6 @@ def invert(self):
         """
         return DataFrameDefault.register(pandas.DataFrame.__invert__)(self)
 
-    @doc_utils.doc_reduce_agg(
-        method="mean absolute deviation",
-        params="""
-        axis : {0, 1}
-        skipna : bool
-        level : None, default: None
-            Serves the compatibility purpose. Always has to be None.""",
-        refer_to="mad",
-    )
-    def mad(self, axis, skipna, level=None):
-        return DataFrameDefault.register(pandas.DataFrame.mad)(
-            self, axis=axis, skipna=skipna, level=level
-        )
-
     @doc_utils.doc_reduce_agg(
         method="unbiased kurtosis", refer_to="kurt", extra_params=["skipna", "**kwargs"]
     )
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 4eb622d280e..a608ff81339 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -933,7 +933,6 @@ def reduce_fn(df, **kwargs):
     sum_min_count = Reduce.register(pandas.DataFrame.sum)
     prod_min_count = Reduce.register(pandas.DataFrame.prod)
     quantile_for_single_value = Reduce.register(pandas.DataFrame.quantile)
-    mad = Reduce.register(pandas.DataFrame.mad)
 
     def to_datetime(self, *args, **kwargs):
         if len(self.columns) == 1:
diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
index 1ee04138ee9..2cf4b56c8be 100644
--- a/modin/pandas/__init__.py
+++ b/modin/pandas/__init__.py
@@ -14,7 +14,7 @@
 import pandas
 import warnings
 
-__pandas_version__ = "1.5.3"
+__pandas_version__ = "2.0.0"
 
 if pandas.__version__ != __pandas_version__:
     warnings.warn(
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index ba6f2845459..09ac413945b 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1836,26 +1836,6 @@ def loc(self):  # noqa: RT01, D200
 
         return _LocIndexer(self)
 
-    def mad(self, axis=None, skipna=True, level=None):  # noqa: PR01, RT01, D200
-        """
-        Return the mean absolute deviation of the values over the requested axis.
-        """
-        validate_bool_kwarg(skipna, "skipna")
-        axis = self._get_axis_number(axis)
-        if level is not None:
-            if (
-                not self._query_compiler.has_multiindex(axis=axis)
-                and level > 0
-                or level < -1
-                and level != self.index.name
-            ):
-                raise ValueError("level > 0 or level < -1 only valid with MultiIndex")
-            return self.groupby(level=level, axis=axis, sort=False).mad()
-
-        return self._reduce_dimension(
-            self._query_compiler.mad(axis=axis, skipna=skipna, level=level)
-        )
-
     def mask(
         self,
         cond,
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 037a1e27e65..734a555f237 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -728,17 +728,6 @@ def do_relabel(obj_to_relabel):
     def last(self, **kwargs):
         return self._default_to_pandas(lambda df: df.last(**kwargs))
 
-    def mad(self, **kwargs):
-        warnings.warn(
-            (
-                "The 'mad' method is deprecated and will be removed in a future version. "
-                + "To compute the same result, you may do `(df - df.mean()).abs().mean()`."
-            ),
-            FutureWarning,
-            stacklevel=2,
-        )
-        return self._default_to_pandas(lambda df: df.mad(**kwargs))
-
     def rank(self, **kwargs):
         result = self._wrap_aggregation(
             type(self._query_compiler).groupby_rank,
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index c3a2fb1a2c6..dd4cc49046b 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -450,32 +450,6 @@ def test_last():
     df_equals(modin_df.last("20D"), pandas_df.last("20D"))
 
 
-@pytest.mark.parametrize("data", test_data_values)
-@pytest.mark.parametrize("axis", [None, 0, 1])
-@pytest.mark.parametrize("skipna", [None, True, False])
-def test_mad(data, axis, skipna):
-    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
-    df_equals(
-        modin_df.mad(axis=axis, skipna=skipna, level=None),
-        pandas_df.mad(axis=axis, skipna=skipna, level=None),
-    )
-
-
-@pytest.mark.parametrize("level", [-1, 0, 1])
-def test_mad_level(level):
-    data = test_data_values[0]
-    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
-
-    index = generate_multiindex(len(data.keys()))
-    modin_df.columns = index
-    pandas_df.columns = index
-    eval_general(
-        modin_df,
-        pandas_df,
-        lambda df: df.mad(axis=1, level=level),
-    )
-
-
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 @pytest.mark.parametrize(
     "id_vars", [lambda df: df.columns[0], lambda df: df.columns[:4], None]
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index c41c0e713ba..64a0cc3da25 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -194,12 +194,6 @@ def test_mixed_dtypes_groupby(as_index):
             eval_aggregate(modin_groupby, pandas_groupby, func)
 
         eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
-        eval_general(
-            modin_groupby,
-            pandas_groupby,
-            lambda df: df.mad(),
-            modin_df_almost_equals_pandas,
-        )
         eval_max(modin_groupby, pandas_groupby)
         eval_len(modin_groupby, pandas_groupby)
         eval_sum(modin_groupby, pandas_groupby)
@@ -411,12 +405,6 @@ def maybe_get_columns(df, by):
         )
 
     eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
-    eval_general(
-        modin_groupby,
-        pandas_groupby,
-        lambda df: df.mad(),
-        modin_df_almost_equals_pandas,
-    )
     eval_general(modin_groupby, pandas_groupby, lambda df: df.rank())
     eval_max(modin_groupby, pandas_groupby)
     eval_len(modin_groupby, pandas_groupby)
@@ -586,12 +574,6 @@ def test_single_group_row_groupby():
         eval_aggregate(modin_groupby, pandas_groupby, func)
 
     eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
-    eval_general(
-        modin_groupby,
-        pandas_groupby,
-        lambda df: df.mad(),
-        modin_df_almost_equals_pandas,
-    )
     eval_rank(modin_groupby, pandas_groupby)
     eval_max(modin_groupby, pandas_groupby)
     eval_var(modin_groupby, pandas_groupby)
@@ -705,12 +687,6 @@ def test_large_row_groupby(is_by_category):
         eval_aggregate(modin_groupby, pandas_groupby, func)
 
     eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
-    eval_general(
-        modin_groupby,
-        pandas_groupby,
-        lambda df: df.mad(),
-        modin_df_almost_equals_pandas,
-    )
     eval_rank(modin_groupby, pandas_groupby)
     eval_max(modin_groupby, pandas_groupby)
     eval_var(modin_groupby, pandas_groupby)
@@ -812,12 +788,6 @@ def test_simple_col_groupby():
     eval_prod(modin_groupby, pandas_groupby)
     eval_std(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
-    eval_general(
-        modin_groupby,
-        pandas_groupby,
-        lambda df: df.mad(),
-        modin_df_almost_equals_pandas,
-    )
     eval_max(modin_groupby, pandas_groupby)
     eval_var(modin_groupby, pandas_groupby)
     eval_len(modin_groupby, pandas_groupby)
@@ -955,12 +925,6 @@ def test_series_groupby(by, as_index_series_or_dataframe):
             eval_aggregate(modin_groupby, pandas_groupby, func)
 
         eval_general(modin_groupby, pandas_groupby, lambda df: df.last())
-        eval_general(
-            modin_groupby,
-            pandas_groupby,
-            lambda df: df.mad(),
-            modin_df_almost_equals_pandas,
-        )
         eval_rank(modin_groupby, pandas_groupby)
         eval_max(modin_groupby, pandas_groupby)
         eval_len(modin_groupby, pandas_groupby)
@@ -2200,17 +2164,6 @@ def test_mean_with_datetime(by_func):
     eval_general(modin_df, pandas_df, lambda df: df.groupby(by=by_func(df)).mean())
 
 
-def test_groupby_mad_warn():
-    modin_df, pandas_df = create_test_dfs(test_groupby_data)
-    md_grp = modin_df.groupby(by=modin_df.columns[0])
-    pd_grp = pandas_df.groupby(by=pandas_df.columns[0])
-
-    msg = "The 'mad' method is deprecated and will be removed in a future version."
-    for grp_obj in (md_grp, pd_grp):
-        with pytest.warns(FutureWarning, match=msg):
-            grp_obj.mad()
-
-
 def test_groupby_backfill_warn():
     modin_df = pd.DataFrame(test_groupby_data)
     md_grp = modin_df.groupby(by=modin_df.columns[0])
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 6181607245f..669275b4756 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -2325,17 +2325,6 @@ def test_lt(data):
     inter_df_math_helper(modin_series, pandas_series, "lt")
 
 
-@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
-@pytest.mark.parametrize("axis", [None, 0])
-@pytest.mark.parametrize("skipna", [None, True, False])
-@pytest.mark.parametrize("level", [0, -1, None])
-def test_mad(level, data, axis, skipna):
-    eval_general(
-        *create_test_series(data),
-        lambda df: df.mad(axis=axis, skipna=skipna, level=level),
-    )
-
-
 @pytest.mark.parametrize("na_values", ["ignore", None], ids=["na_ignore", "na_none"])
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_map(data, na_values):
diff --git a/requirements/env_hdk.yml b/requirements/env_hdk.yml
index f908e6e5a2d..d76770fb93c 100644
--- a/requirements/env_hdk.yml
+++ b/requirements/env_hdk.yml
@@ -2,7 +2,7 @@ name: modin_on_hdk
 channels:
   - conda-forge
 dependencies:
-  - pandas==1.5.3
+  - pandas==2.0.0
   - pyarrow
   - numpy>=1.18.5
   - fsspec
diff --git a/requirements/env_unidist.yml b/requirements/env_unidist.yml
index fc72960952d..960c06ee600 100644
--- a/requirements/env_unidist.yml
+++ b/requirements/env_unidist.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
 dependencies:
   - unidist-mpi>=0.2.1
-  - pandas==1.5.3
+  - pandas==2.0.0
   - numpy>=1.18.5
   - pyarrow
   - fsspec
diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml
index 204545abc3a..795af8dc565 100644
--- a/requirements/requirements-no-engine.yml
+++ b/requirements/requirements-no-engine.yml
@@ -1,7 +1,7 @@
 channels:
   - conda-forge
 dependencies:
-  - pandas==1.5.3
+  - pandas==2.0.0
   - numpy>=1.18.5
   - pyarrow>=4.0.1
   - fsspec
diff --git a/setup.py b/setup.py
index c652a7b1335..12642d72b89 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@ def make_distribution(self):
     long_description=long_description,
     long_description_content_type="text/markdown",
     install_requires=[
-        "pandas==1.5.3",
+        "pandas==2.0.0",
         "packaging",
         "numpy>=1.18.5",
         "fsspec",

From c0c9cf25c7754daad3617e36735b78bb7f351b86 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 12 Apr 2023 21:47:14 +0200
Subject: [PATCH 002/176] remove Int64Index, UInt64Index, Float64Index

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 docs/supported_apis/utilities_supported.rst         | 3 ---
 modin/core/storage_formats/pandas/query_compiler.py | 4 ++--
 modin/pandas/__init__.py                            | 6 ------
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/docs/supported_apis/utilities_supported.rst b/docs/supported_apis/utilities_supported.rst
index 9b3bdb78cf8..f12e896e8fa 100644
--- a/docs/supported_apis/utilities_supported.rst
+++ b/docs/supported_apis/utilities_supported.rst
@@ -90,9 +90,6 @@ contributing a distributed version of any of these objects, feel free to open a
 * IntervalDtype
 * PeriodDtype
 * RangeIndex
-* Int64Index
-* UInt64Index
-* Float64Index
 * TimedeltaIndex
 * IntervalIndex
 * IndexSlice
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index a608ff81339..bf5fbfef986 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -2178,9 +2178,9 @@ def quantile_builder(df, **kwargs):
         # correctness and cleanliness of the code.
         if axis == 1:
             q_index = new_columns
-            new_columns = pandas.Float64Index(q)
+            new_columns = pandas.Index(q)
         else:
-            q_index = pandas.Float64Index(q)
+            q_index = pandas.Index(q)
         new_modin_frame = query_compiler._modin_frame.apply_full_axis(
             axis,
             lambda df: quantile_builder(df, **kwargs),
diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
index 2cf4b56c8be..91c9429ac09 100644
--- a/modin/pandas/__init__.py
+++ b/modin/pandas/__init__.py
@@ -65,9 +65,6 @@
         IntervalDtype,
         PeriodDtype,
         RangeIndex,
-        Int64Index,
-        UInt64Index,
-        Float64Index,
         TimedeltaIndex,
         IntervalIndex,
         IndexSlice,
@@ -327,9 +324,6 @@ def init_remote_ray(partition):
     "StringDtype",
     "NA",
     "RangeIndex",
-    "Int64Index",
-    "UInt64Index",
-    "Float64Index",
     "TimedeltaIndex",
     "IntervalIndex",
     "IndexSlice",

From 93b7f66c8578c2fc53b9ee036a6b3301a007478e Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 12 Apr 2023 21:56:29 +0200
Subject: [PATCH 003/176] remove pandas.datetime

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 docs/supported_apis/utilities_supported.rst | 2 --
 modin/__init__.py                           | 5 -----
 modin/pandas/__init__.py                    | 2 --
 modin/pandas/test/test_io.py                | 2 +-
 4 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/docs/supported_apis/utilities_supported.rst b/docs/supported_apis/utilities_supported.rst
index f12e896e8fa..dcc1f11adf7 100644
--- a/docs/supported_apis/utilities_supported.rst
+++ b/docs/supported_apis/utilities_supported.rst
@@ -54,8 +54,6 @@ default to pandas.
 +---------------------------+---------------------------------+----------------------------------------------------+
 | ``pd.options``            | Y                               |                                                    |
 +---------------------------+---------------------------------+----------------------------------------------------+
-| ``pd.datetime``           | D                               |                                                    |
-+---------------------------+---------------------------------+----------------------------------------------------+
 
 Other objects & structures
 --------------------------
diff --git a/modin/__init__.py b/modin/__init__.py
index 94836772278..e40b324afab 100644
--- a/modin/__init__.py
+++ b/modin/__init__.py
@@ -35,11 +35,6 @@ def custom_formatwarning(
 # Filter numpy version warnings because they are not relevant
 warnings.filterwarnings("ignore", message="numpy.dtype size changed")
 warnings.filterwarnings("ignore", message="Large object of size")
-warnings.filterwarnings(
-    "ignore",
-    message="The pandas.datetime class is deprecated and will be removed from pandas in a future version. "
-    + "Import from datetime module instead.",
-)
 
 
 def set_execution(
diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
index 91c9429ac09..0d9928035ff 100644
--- a/modin/pandas/__init__.py
+++ b/modin/pandas/__init__.py
@@ -76,7 +76,6 @@
         infer_freq,
         interval_range,
         ExcelWriter,
-        datetime,
         NamedAgg,
         NA,
         api,
@@ -352,7 +351,6 @@ def init_remote_ray(partition):
     "to_numeric",
     "unique",
     "value_counts",
-    "datetime",
     "NamedAgg",
     "api",
     "read_xml",
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 7f39d773521..6ded0f0e3c6 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -534,7 +534,7 @@ def test_read_csv_nans_handling(
     @pytest.mark.parametrize("infer_datetime_format", [True, False])
     @pytest.mark.parametrize("keep_date_col", [True, False])
     @pytest.mark.parametrize(
-        "date_parser", [None, lambda x: pandas.datetime.strptime(x, "%Y-%m-%d")]
+        "date_parser", [None, lambda x: pandas.to_datetime(x, format="%Y-%m-%d")]
     )
     @pytest.mark.parametrize("dayfirst", [True, False])
     @pytest.mark.parametrize("cache_dates", [True, False])

From eca0422c653b4ca3d7f3fa0e3b53497edc096208 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 12 Apr 2023 22:06:28 +0200
Subject: [PATCH 004/176] remove convert_to_index_sliceable

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 09ac413945b..c8bf649f7d4 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -31,7 +31,6 @@
 import pandas.core.window.rolling
 import pandas.core.resample
 import pandas.core.generic
-from pandas.core.indexing import convert_to_index_sliceable
 from pandas.util._validators import (
     validate_percentile,
     validate_bool_kwarg,
@@ -3665,9 +3664,7 @@ def __getitem__(self, key):
         if isinstance(key, slice) or (
             isinstance(key, str) and (not self._is_dataframe or key not in self.columns)
         ):
-            indexer = convert_to_index_sliceable(
-                pandas.DataFrame(index=self.index), key
-            )
+            indexer = self.index._convert_slice_indexer(key, kind="getitem")
         if indexer is not None:
             return self._getitem_slice(indexer)
         else:
@@ -3772,7 +3769,7 @@ def _setitem_slice(self, key: slice, value):
         value : object
             Value to assing to the rows.
         """
-        indexer = convert_to_index_sliceable(pandas.DataFrame(index=self.index), key)
+        indexer = self.index._convert_slice_indexer(key, kind="getitem")
         self.iloc[indexer] = value
 
     def _getitem_slice(self, key: slice):

From 929edcc22bef994b83a7140b09955370f4b3c01d Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 00:18:59 +0200
Subject: [PATCH 005/176] remove 'ensure_clean_dir'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/experimental/pandas/test/test_io_exp.py | 17 ++--
 modin/pandas/test/test_io.py                  | 90 ++++++++-----------
 2 files changed, 42 insertions(+), 65 deletions(-)

diff --git a/modin/experimental/pandas/test/test_io_exp.py b/modin/experimental/pandas/test/test_io_exp.py
index 325ffc13799..477ca6f31cb 100644
--- a/modin/experimental/pandas/test/test_io_exp.py
+++ b/modin/experimental/pandas/test/test_io_exp.py
@@ -12,13 +12,12 @@
 # governing permissions and limitations under the License.
 
 from contextlib import nullcontext
-import os
 import glob
 import json
 
 import numpy as np
 import pandas
-from pandas._testing import ensure_clean, ensure_clean_dir
+from pandas._testing import ensure_clean
 import pytest
 
 import modin.experimental.pandas as pd
@@ -38,10 +37,9 @@
     reason=f"{Engine.get()} does not have experimental API",
 )
 def test_from_sql_distributed(make_sql_connection):
-    with ensure_clean_dir() as dirname:
-        filename = "test_from_sql_distributed.db"
+    with ensure_clean("test_from_sql_distributed.db") as filename:
         table = "test_from_sql_distributed"
-        conn = make_sql_connection(os.path.join(dirname, filename), table)
+        conn = make_sql_connection(filename, table)
         query = "select * from {0}".format(table)
 
         pandas_df = pandas.read_sql(query, conn)
@@ -71,10 +69,9 @@ def test_from_sql_distributed(make_sql_connection):
     reason=f"{Engine.get()} does not have experimental API",
 )
 def test_from_sql_defaults(make_sql_connection):
-    with ensure_clean_dir() as dirname:
-        filename = "test_from_sql_distributed.db"
+    with ensure_clean("test_from_sql_distributed.db") as filename:
         table = "test_from_sql_distributed"
-        conn = make_sql_connection(os.path.join(dirname, filename), table)
+        conn = make_sql_connection(filename, table)
         query = "select * from {0}".format(table)
 
         pandas_df = pandas.read_sql(query, conn)
@@ -308,7 +305,7 @@ def _custom_parser(io_input, **kwargs):
         )
         if AsyncReadMode.get():
             # If read operations are asynchronous, then the dataframes
-            # check should be inside `ensure_clean_dir` context
+            # check should be inside `ensure_clean` context
             # because the file may be deleted before actual reading starts
             df_equals(df1, df2)
     if not AsyncReadMode.get():
@@ -365,7 +362,7 @@ def columns_callback(io_input, **kwargs):
         )
         if AsyncReadMode.get():
             # If read operations are asynchronous, then the dataframes
-            # check should be inside `ensure_clean_dir` context
+            # check should be inside `ensure_clean` context
             # because the file may be deleted before actual reading starts
             df_equals(df1, df2)
     if not AsyncReadMode.get():
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 6ded0f0e3c6..6c0b1c85f43 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -19,7 +19,7 @@
 from pandas.errors import ParserWarning
 import pandas._libs.lib as lib
 from pandas.core.dtypes.common import is_list_like
-from pandas._testing import ensure_clean, ensure_clean_dir
+from pandas._testing import ensure_clean
 from pathlib import Path
 from collections import OrderedDict
 from modin.config.envvars import MinPartitionSize
@@ -125,6 +125,18 @@ def _nullcontext():
     yield
 
 
+@contextlib.contextmanager
+def ensure_clean_two_files(suffix):
+    if isinstance(suffix, tuple):
+        assert len(suffix) == 2
+        suffix1, suffix2 = suffix
+    else:
+        suffix1, suffix2 = suffix, suffix
+    with ensure_clean(suffix1) as unique_filename1:
+        with ensure_clean(suffix2) as unique_filename2:
+            yield unique_filename1, unique_filename2
+
+
 def assert_files_eq(path1, path2):
     with open(path1, "rb") as file1, open(path2, "rb") as file2:
         file1_content = file1.read()
@@ -156,13 +168,8 @@ def parquet_eval_to_file(modin_obj, pandas_obj, fn, extension, **fn_kwargs):
     extension : str
         Extension of the test file.
     """
-    with ensure_clean_dir() as dirname:
-        unique_filename_modin = get_unique_filename(
-            extension=extension, data_dir=dirname
-        )
-        unique_filename_pandas = get_unique_filename(
-            extension=extension, data_dir=dirname
-        )
+    with ensure_clean_two_files(extension) as filenames:
+        unique_filename_modin, unique_filename_pandas = filenames
 
         engine = fn_kwargs.get("engine", "auto")
 
@@ -183,13 +190,8 @@ def eval_to_file(modin_obj, pandas_obj, fn, extension, **fn_kwargs):
         fn: name of the method, that should be tested.
         extension: Extension of the test file.
     """
-    with ensure_clean_dir() as dirname:
-        unique_filename_modin = get_unique_filename(
-            extension=extension, data_dir=dirname
-        )
-        unique_filename_pandas = get_unique_filename(
-            extension=extension, data_dir=dirname
-        )
+    with ensure_clean_two_files(extension) as filenames:
+        unique_filename_modin, unique_filename_pandas = filenames
 
         # parameter `max_retries=0` is set for `to_csv` function on Ray engine,
         # in order to increase the stability of tests, we repeat the call of
@@ -1211,7 +1213,7 @@ def test_read_csv_file_handle(
                 modin_df = pd.read_csv(buffer)
             if AsyncReadMode.get():
                 # If read operations are asynchronous, then the dataframes
-                # check should be inside `ensure_clean_dir` context
+                # check should be inside `ensure_clean` context
                 # because the file may be deleted before actual reading starts
                 df_equals(modin_df, pandas_df)
         if not AsyncReadMode.get():
@@ -1303,7 +1305,7 @@ def test_read_csv_issue_5150(self, set_async_read_mode):
             actual_pandas_df = modin_df._to_pandas()
             if AsyncReadMode.get():
                 # If read operations are asynchronous, then the dataframes
-                # check should be inside `ensure_clean_dir` context
+                # check should be inside `ensure_clean` context
                 # because the file may be deleted before actual reading starts
                 df_equals(expected_pandas_df, actual_pandas_df)
         if not AsyncReadMode.get():
@@ -1341,7 +1343,7 @@ def wrapped_read_table(file, method):
 
             if AsyncReadMode.get():
                 # If read operations are asynchronous, then the dataframes
-                # check should be inside `ensure_clean_dir` context
+                # check should be inside `ensure_clean` context
                 # because the file may be deleted before actual reading starts
                 df_equals(modin_df, pandas_df)
         if not AsyncReadMode.get():
@@ -1406,8 +1408,7 @@ def test_read_parquet_indexing_by_column(self, engine, make_parquet_file):
         nrows = (
             MinPartitionSize.get() + 1
         )  # Use the minimal guaranteed failing value for nrows.
-        with ensure_clean_dir() as dirname:
-            unique_filename = get_unique_filename(extension="parquet", data_dir=dirname)
+        with ensure_clean(".parquet") as unique_filename:
             make_parquet_file(filename=unique_filename, nrows=nrows)
 
             parquet_df = pd.read_parquet(unique_filename, engine=engine)
@@ -1462,8 +1463,7 @@ def test_read_parquet_directory(
     def test_read_parquet_partitioned_directory(
         self, make_parquet_file, columns, engine
     ):
-        with ensure_clean_dir() as dirname:
-            unique_filename = get_unique_filename(extension=None, data_dir=dirname)
+        with ensure_clean() as unique_filename:
             make_parquet_file(filename=unique_filename, partitioned_columns=["col1"])
 
             eval_io(
@@ -1546,8 +1546,7 @@ def test_read_parquet_pandas_index_partitioned(self, engine):
                 "C": ["c"] * 2000,
             }
         )
-        with ensure_clean_dir() as dirname:
-            unique_filename = get_unique_filename(extension="parquet", data_dir=dirname)
+        with ensure_clean(".parquet") as unique_filename:
             pandas_df.set_index("idx").to_parquet(unique_filename, partition_cols=["A"])
             # read the same parquet using modin.pandas
             eval_io(
@@ -1609,9 +1608,8 @@ def test_read_parquet_without_metadata(self, engine):
         from pyarrow import csv
         from pyarrow import parquet
 
-        with ensure_clean_dir() as dirname:
-            parquet_fname = get_unique_filename(extension="parquet", data_dir=dirname)
-            csv_fname = get_unique_filename(extension="parquet", data_dir=dirname)
+        with ensure_clean_two_files(".parquet") as filenames:
+            parquet_fname, csv_fname = filenames
             pandas_df = pandas.DataFrame(
                 {
                     "idx": np.random.randint(0, 100_000, size=2000),
@@ -1990,9 +1988,8 @@ def test_ExcelFile(self, make_excel_file):
     def test_to_excel(self):
         modin_df, pandas_df = create_test_dfs(TEST_DATA)
 
-        with ensure_clean_dir() as dir:
-            unique_filename_modin = get_unique_filename(extension="xlsx", data_dir=dir)
-            unique_filename_pandas = get_unique_filename(extension="xlsx", data_dir=dir)
+        with ensure_clean_two_files(".xlsx") as filenames:
+            unique_filename_modin, unique_filename_pandas = filenames
 
             modin_writer = pandas.ExcelWriter(unique_filename_modin)
             pandas_writer = pandas.ExcelWriter(unique_filename_pandas)
@@ -2040,13 +2037,8 @@ def test_read_hdf(self, make_hdf_file, format):
         reason="The reason of tests fail in `cloud` mode is unknown for now - issue #3264",
     )
     def test_HDFStore(self):
-        with ensure_clean_dir() as dirname:
-            unique_filename_modin = get_unique_filename(
-                extension="hdf", data_dir=dirname
-            )
-            unique_filename_pandas = get_unique_filename(
-                extension="hdf", data_dir=dirname
-            )
+        with ensure_clean_two_files(".hdf") as filenames:
+            unique_filename_modin, unique_filename_pandas = filenames
 
             modin_store = pd.HDFStore(unique_filename_modin)
             pandas_store = pandas.HDFStore(unique_filename_pandas)
@@ -2099,10 +2091,9 @@ class TestSql:
     )
     @pytest.mark.parametrize("read_sql_engine", ["Pandas", "Connectorx"])
     def test_read_sql(self, make_sql_connection, read_sql_engine):
-        with ensure_clean_dir() as dirname:
-            filename = get_unique_filename(".db")
+        with ensure_clean(".db") as filename:
             table = "test_read_sql"
-            conn = make_sql_connection(os.path.join(dirname, filename), table)
+            conn = make_sql_connection(filename, table)
             query = f"select * from {table}"
 
             eval_io(
@@ -2223,17 +2214,17 @@ def test_read_sql_with_chunksize(self, make_sql_connection):
     def test_to_sql(self, make_sql_connection, index):
         table_name = f"test_to_sql_{str(index)}"
         modin_df, pandas_df = create_test_dfs(TEST_DATA)
-
-        with ensure_clean_dir() as dirname:
+        suffixes = (f"{table_name}_modin.db", f"{table_name}_pandas.db")
+        with ensure_clean_two_files(suffixes) as filenames:
             # We do not pass the table name so the fixture won't generate a table
-            conn = make_sql_connection(os.path.join(dirname, f"{table_name}_modin.db"))
+            conn = make_sql_connection(filenames[0])
             modin_df.to_sql(table_name, conn, index=index)
             df_modin_sql = pandas.read_sql(
                 table_name, con=conn, index_col="index" if index else None
             )
 
             # We do not pass the table name so the fixture won't generate a table
-            conn = make_sql_connection(os.path.join(dirname, f"{table_name}_pandas.db"))
+            conn = make_sql_connection(filenames[1])
             pandas_df.to_sql(table_name, conn, index=index)
             df_pandas_sql = pandas.read_sql(
                 table_name, con=conn, index_col="index" if index else None
@@ -2596,17 +2587,6 @@ def test_to_pickle(self):
         eval_to_file(
             modin_obj=modin_df, pandas_obj=pandas_df, fn="to_pickle", extension="pkl"
         )
-        with ensure_clean_dir() as dirname:
-            unique_filename_modin = get_unique_filename(
-                extension="pkl", data_dir=dirname
-            )
-            unique_filename_pandas = get_unique_filename(
-                extension="pkl", data_dir=dirname
-            )
-            pd.to_pickle(modin_df, unique_filename_modin)
-            pandas.to_pickle(pandas_df, unique_filename_pandas)
-
-            assert assert_files_eq(unique_filename_modin, unique_filename_pandas)
 
 
 @pytest.mark.xfail(

From 417bf0c89aa9aa9e82b9767db93bc955f5901083 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 01:13:44 +0200
Subject: [PATCH 006/176] disable 'xarray' until it adds support for pandas
 2.0.0

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 environment-dev.yml                         | 2 +-
 modin/pandas/test/dataframe/test_default.py | 2 +-
 modin/pandas/test/test_series.py            | 1 +
 requirements/env_hdk.yml                    | 2 +-
 requirements/env_unidist.yml                | 2 +-
 requirements/requirements-no-engine.yml     | 2 +-
 6 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 14839085ae1..e76bb1597a0 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -12,7 +12,7 @@ dependencies:
   - dask>=2.22.0
   - distributed>=2.22.0
   - fsspec
-  - xarray
+  # - xarray
   - Jinja2
   - scipy
   - pip
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index dd4cc49046b..12d2a736a71 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -73,7 +73,7 @@
         ("lookup", lambda df: {"row_labels": [0], "col_labels": ["int_col"]}),
         ("mask", lambda df: {"cond": df != 0}),
         ("pct_change", None),
-        ("to_xarray", None),
+        # ("to_xarray", None),
         ("flags", None),
         ("set_flags", lambda df: {"allows_duplicate_labels": False}),
     ],
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 669275b4756..5d701693a48 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -3436,6 +3436,7 @@ def test_to_timestamp():
         series.to_period().to_timestamp()
 
 
+@pytest.mark.skip
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_to_xarray(data):
     modin_series, _ = create_test_series(data)  # noqa: F841
diff --git a/requirements/env_hdk.yml b/requirements/env_hdk.yml
index d76770fb93c..d0ac142c793 100644
--- a/requirements/env_hdk.yml
+++ b/requirements/env_hdk.yml
@@ -22,7 +22,7 @@ dependencies:
   - xgboost>=1.7.1,<2.0.0
   - scikit-learn-intelex
   - matplotlib
-  - xarray
+  # - xarray
   - pytables
   - fastparquet
   # code linters
diff --git a/requirements/env_unidist.yml b/requirements/env_unidist.yml
index 960c06ee600..80270ab0a7e 100644
--- a/requirements/env_unidist.yml
+++ b/requirements/env_unidist.yml
@@ -7,7 +7,7 @@ dependencies:
   - numpy>=1.18.5
   - pyarrow
   - fsspec
-  - xarray
+  # - xarray
   - Jinja2
   - scipy
   - pip
diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml
index 795af8dc565..7889356175b 100644
--- a/requirements/requirements-no-engine.yml
+++ b/requirements/requirements-no-engine.yml
@@ -5,7 +5,7 @@ dependencies:
   - numpy>=1.18.5
   - pyarrow>=4.0.1
   - fsspec
-  - xarray
+  # - xarray
   - Jinja2
   - scipy
   - pip

From b9a83b5cf23ce23aa15a2b55fa2e0ed9c33ad1e1 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 01:51:37 +0200
Subject: [PATCH 007/176] remove 'mad' [2]

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 docs/supported_apis/dataframe_supported.rst  | 2 --
 docs/supported_apis/series_supported.rst     | 6 ++----
 modin/pandas/test/dataframe/test_indexing.py | 2 +-
 modin/pandas/test/test_series.py             | 2 +-
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/docs/supported_apis/dataframe_supported.rst b/docs/supported_apis/dataframe_supported.rst
index d9b9462ab5b..7be4b452ef5 100644
--- a/docs/supported_apis/dataframe_supported.rst
+++ b/docs/supported_apis/dataframe_supported.rst
@@ -233,8 +233,6 @@ default to pandas.
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``lt``                     | `lt`_                     | Y                      | See ``add``                                        |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
-| ``mad``                    | `mad`_                    | Y                      |                                                    |
-+----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``mask``                   | `mask`_                   | D                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``max``                    | `max`_                    | Y                      | **Hdk**: ``P``, only default params supported,     |
diff --git a/docs/supported_apis/series_supported.rst b/docs/supported_apis/series_supported.rst
index ca7c6974751..93b061e3760 100644
--- a/docs/supported_apis/series_supported.rst
+++ b/docs/supported_apis/series_supported.rst
@@ -258,8 +258,6 @@ the related section on :doc:`Defaulting to pandas </supported_apis/index>`.
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``lt``                      | Y                               | See ``add``                                        |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``mad``                     | Y                               |                                                    |
-+-----------------------------+---------------------------------+----------------------------------------------------+
 | ``map``                     | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``mask``                    | D                               |                                                    |
@@ -401,8 +399,8 @@ the related section on :doc:`Defaulting to pandas </supported_apis/index>`.
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``sort_index``              | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``sort_values``             | Y                               | Order of indexes that have the same sort key       |
-|                             |                                 | is not guaranteed to be the same across sorts;     |
+| ``sort_values``             | Y                               | Order of indexes that have the same sort key       |
+|                             |                                 | is not guaranteed to be the same across sorts;     |
 |                             |                                 | **Hdk**: ``Y``                                     |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``sparse``                  | Y                               |                                                    |
diff --git a/modin/pandas/test/dataframe/test_indexing.py b/modin/pandas/test/dataframe/test_indexing.py
index f2c6cbd01b4..64bda85cd51 100644
--- a/modin/pandas/test/dataframe/test_indexing.py
+++ b/modin/pandas/test/dataframe/test_indexing.py
@@ -2408,7 +2408,7 @@ def test_index_order():
     df_modin.index = index
     df_pandas.index = index
 
-    for func in ["all", "any", "mad", "count"]:
+    for func in ["all", "any", "count"]:
         df_equals(
             getattr(df_modin, func)(level=0).index,
             getattr(df_pandas, func)(level=0).index,
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 5d701693a48..b096c1f6c4e 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -2231,7 +2231,7 @@ def test_last():
     df_equals(modin_series.last("20D"), pandas_series.last("20D"))
 
 
-@pytest.mark.parametrize("func", ["all", "any", "mad", "count"])
+@pytest.mark.parametrize("func", ["all", "any", "count"])
 def test_index_order(func):
     # see #1708 and #1869 for details
     s_modin, s_pandas = create_test_series(test_data["float_nan_data"])

From af9b32e3b6cc05ff97ac1f2e995c11f4223779b1 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 15:24:26 +0200
Subject: [PATCH 008/176] add 'dtype_backend' parameter for all functions and
 remove is_monotonic

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 docs/supported_apis/series_supported.rst      |  2 -
 .../storage_formats/base/query_compiler.py    |  9 ++-
 .../implementations/hdk_on_native/io/io.py    |  1 +
 modin/pandas/base.py                          |  3 +
 modin/pandas/general.py                       | 17 ++++-
 modin/pandas/io.py                            | 73 ++++++++-----------
 modin/pandas/series.py                        |  4 +-
 modin/pandas/test/test_series.py              |  6 --
 8 files changed, 57 insertions(+), 58 deletions(-)

diff --git a/docs/supported_apis/series_supported.rst b/docs/supported_apis/series_supported.rst
index 93b061e3760..82e2a5aeafe 100644
--- a/docs/supported_apis/series_supported.rst
+++ b/docs/supported_apis/series_supported.rst
@@ -219,8 +219,6 @@ the related section on :doc:`Defaulting to pandas </supported_apis/index>`.
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``interpolate``             | D                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``is_monotonic``            | Y                               |                                                    |
-+-----------------------------+---------------------------------+----------------------------------------------------+
 | ``is_monotonic_decreasing`` | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``is_monotonic_increasing`` | Y                               |                                                    |
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index c00c4060611..23ea634a77b 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -39,7 +39,7 @@
 from pandas.core.dtypes.common import is_scalar, is_number
 import pandas.core.resample
 import pandas
-from pandas._typing import IndexLabel, Suffixes
+from pandas._typing import IndexLabel, Suffixes, DtypeBackend
 import numpy as np
 from typing import List, Hashable, Optional
 
@@ -1627,6 +1627,7 @@ def convert_dtypes(
         convert_integer: bool = True,
         convert_boolean: bool = True,
         convert_floating: bool = True,
+        dtype_backend: DtypeBackend = "numpy_nullable",
     ):
         """
         Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
@@ -1645,6 +1646,11 @@ def convert_dtypes(
             Whether, if possible, conversion can be done to floating extension types.
             If `convert_integer` is also True, preference will be give to integer dtypes
             if the floats can be faithfully casted to integers.
+        dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable"
+            Which dtype_backend to use, e.g. whether a DataFrame should use nullable
+            dtypes for all dtypes that have a nullable
+            implementation when "numpy_nullable" is set, pyarrow is used for all
+            dtypes if "pyarrow" is set.
 
         Returns
         -------
@@ -1658,6 +1664,7 @@ def convert_dtypes(
             convert_integer=convert_integer,
             convert_boolean=convert_boolean,
             convert_floating=convert_floating,
+            dtype_backend=dtype_backend,
         )
 
     @property
diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py
index 80a37094b32..86a3fb5a51f 100644
--- a/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py
+++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py
@@ -100,6 +100,7 @@ class HdkOnNativeIO(BaseIO, TextFileDispatcher):
         "memory_map",
         "float_precision",
         "storage_options",
+        "dtype_backend",
     ]
 
     @classmethod
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index c8bf649f7d4..7b065bdf882 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -49,6 +49,7 @@
     TimedeltaConvertibleTypes,
     TimestampConvertibleTypes,
     RandomState,
+    DtypeBackend,
 )
 import pickle as pkl
 import re
@@ -1719,6 +1720,7 @@ def convert_dtypes(
         convert_integer: bool = True,
         convert_boolean: bool = True,
         convert_floating: bool = True,
+        dtype_backend: DtypeBackend = "numpy_nullable",
     ):  # noqa: PR01, RT01, D200
         """
         Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
@@ -1730,6 +1732,7 @@ def convert_dtypes(
                 convert_integer=convert_integer,
                 convert_boolean=convert_boolean,
                 convert_floating=convert_floating,
+                dtype_backend=dtype_backend,
             )
         )
 
diff --git a/modin/pandas/general.py b/modin/pandas/general.py
index 67a87c0a889..845c3c2406b 100644
--- a/modin/pandas/general.py
+++ b/modin/pandas/general.py
@@ -18,6 +18,8 @@
 
 from typing import Hashable, Iterable, Mapping, Union
 from pandas.core.dtypes.common import is_list_like
+from pandas._libs.lib import no_default, NoDefault
+from pandas._typing import DtypeBackend
 
 from modin.error_message import ErrorMessage
 from .base import BasePandasDataset
@@ -262,13 +264,22 @@ def pivot(data, index=None, columns=None, values=None):  # noqa: PR01, RT01, D20
 
 @_inherit_docstrings(pandas.to_numeric, apilink="pandas.to_numeric")
 @enable_logging
-def to_numeric(arg, errors="raise", downcast=None):  # noqa: PR01, RT01, D200
+def to_numeric(
+    arg,
+    errors="raise",
+    downcast=None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
+):  # noqa: PR01, RT01, D200
     """
     Convert argument to a numeric type.
     """
     if not isinstance(arg, Series):
-        return pandas.to_numeric(arg, errors=errors, downcast=downcast)
-    return arg._to_numeric(errors=errors, downcast=downcast)
+        return pandas.to_numeric(
+            arg, errors=errors, downcast=downcast, dtype_backend=dtype_backend
+        )
+    return arg._to_numeric(
+        errors=errors, downcast=downcast, dtype_backend=dtype_backend
+    )
 
 
 @_inherit_docstrings(pandas.qcut, apilink="pandas.qcut")
diff --git a/modin/pandas/io.py b/modin/pandas/io.py
index d2ee8aa5586..70147e74d10 100644
--- a/modin/pandas/io.py
+++ b/modin/pandas/io.py
@@ -41,6 +41,7 @@
     ConvertersArg,
     ParseDatesArg,
     XMLParsers,
+    DtypeBackend,
 )
 import pathlib
 import pickle
@@ -116,27 +117,11 @@ def read_xml(
     iterparse: dict[str, list[str]] | None = None,
     compression: CompressionOptions = "infer",
     storage_options: StorageOptions = None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ) -> DataFrame:
     ErrorMessage.default_to_pandas("read_xml")
-    return DataFrame(
-        pandas.read_xml(
-            path_or_buffer,
-            xpath=xpath,
-            namespaces=namespaces,
-            elems_only=elems_only,
-            attrs_only=attrs_only,
-            names=names,
-            dtype=dtype,
-            converters=converters,
-            parse_dates=parse_dates,
-            encoding=encoding,
-            parser=parser,
-            stylesheet=stylesheet,
-            iterparse=iterparse,
-            compression=compression,
-            storage_options=storage_options,
-        )
-    )
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+    return DataFrame(pandas.read_xml(**kwargs))
 
 
 @_inherit_docstrings(pandas.read_csv, apilink="pandas.read_csv")
@@ -202,6 +187,7 @@ def read_csv(
     memory_map: bool = False,
     float_precision: Literal["high", "legacy"] | None = None,
     storage_options: StorageOptions = None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ) -> DataFrame | TextFileReader:
     # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args
     _pd_read_csv_signature = {
@@ -337,6 +323,7 @@ def read_json(
     compression: CompressionOptions = "infer",
     nrows: int | None = None,
     storage_options: StorageOptions = None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ) -> DataFrame | Series | pandas.io.json._json.JsonReader:
     _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
 
@@ -389,37 +376,25 @@ def read_html(
     keep_default_na: bool = True,
     displayed_only: bool = True,
     extract_links: Literal[None, "header", "footer", "body", "all"] = None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ) -> list[DataFrame]:  # noqa: PR01, RT01, D200
     """
     Read HTML tables into a ``DataFrame`` object.
     """
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
     from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
 
-    return DataFrame(
-        query_compiler=FactoryDispatcher.read_html(
-            io,
-            match=match,
-            flavor=flavor,
-            header=header,
-            index_col=index_col,
-            skiprows=skiprows,
-            attrs=attrs,
-            parse_dates=parse_dates,
-            thousands=thousands,
-            encoding=encoding,
-            decimal=decimal,
-            converters=converters,
-            na_values=na_values,
-            keep_default_na=keep_default_na,
-            displayed_only=displayed_only,
-            extract_links=extract_links,
-        )
-    )
+    return DataFrame(query_compiler=FactoryDispatcher.read_html(**kwargs))
 
 
 @_inherit_docstrings(pandas.read_clipboard, apilink="pandas.read_clipboard")
 @enable_logging
-def read_clipboard(sep=r"\s+", **kwargs):  # pragma: no cover  # noqa: PR01, RT01, D200
+def read_clipboard(
+    sep=r"\s+",
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
+    **kwargs,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
     """
     Read text from clipboard and pass to read_csv.
     """
@@ -466,6 +441,7 @@ def read_excel(
     convert_float: bool | None = None,
     mangle_dupe_cols: bool = True,
     storage_options: StorageOptions = None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ) -> DataFrame | dict[IntStrT, DataFrame]:
     _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
     # mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
@@ -517,6 +493,7 @@ def read_feather(
     columns: Sequence[Hashable] | None = None,
     use_threads: bool = True,
     storage_options: StorageOptions = None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ):
     _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
 
@@ -602,6 +579,7 @@ def read_sql(
     parse_dates=None,
     columns=None,
     chunksize=None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ):  # noqa: PR01, RT01, D200
     """
     Read SQL query or database table into a DataFrame.
@@ -626,6 +604,7 @@ def read_fwf(
     colspecs="infer",
     widths=None,
     infer_nrows=100,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
     **kwds,
 ):  # noqa: PR01, RT01, D200
     """
@@ -660,6 +639,7 @@ def read_sql_table(
     parse_dates=None,
     columns=None,
     chunksize=None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ):  # noqa: PR01, RT01, D200
     """
     Read SQL database table into a DataFrame.
@@ -682,6 +662,7 @@ def read_sql_query(
     parse_dates: list[str] | dict[str, str] | None = None,
     chunksize: int | None = None,
     dtype: DtypeArg | None = None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ) -> DataFrame | Iterator[DataFrame]:
     _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
 
@@ -718,6 +699,7 @@ def read_spss(
     path: Union[str, pathlib.Path],
     usecols: Union[Sequence[str], type(None)] = None,
     convert_categoricals: bool = True,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ):  # noqa: PR01, RT01, D200
     """
     Load an SPSS file from the file path, returning a DataFrame.
@@ -725,7 +707,9 @@ def read_spss(
     from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
 
     return DataFrame(
-        query_compiler=FactoryDispatcher.read_spss(path, usecols, convert_categoricals)
+        query_compiler=FactoryDispatcher.read_spss(
+            path, usecols, convert_categoricals, dtype_backend
+        )
     )
 
 
@@ -755,13 +739,16 @@ def json_normalize(
 @_inherit_docstrings(pandas.read_orc, apilink="pandas.read_orc")
 @enable_logging
 def read_orc(
-    path, columns: Optional[List[str]] = None, **kwargs
+    path,
+    columns: Optional[List[str]] = None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
+    **kwargs,
 ) -> DataFrame:  # noqa: PR01, RT01, D200
     """
     Load an ORC object from the file path, returning a DataFrame.
     """
     ErrorMessage.default_to_pandas("read_orc")
-    return DataFrame(pandas.read_orc(path, columns, **kwargs))
+    return DataFrame(pandas.read_orc(path, columns, dtype_backend, **kwargs))
 
 
 @_inherit_docstrings(pandas.HDFStore)
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 1121a75fe13..344eea77c11 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -2208,14 +2208,12 @@ def hasnans(self):  # noqa: RT01, D200
         return self.isna().sum() > 0
 
     @property
-    def is_monotonic(self):  # noqa: RT01, D200
+    def is_monotonic_increasing(self):  # noqa: RT01, D200
         """
         Return True if values in the Series are monotonic_increasing.
         """
         return self._reduce_dimension(self._query_compiler.is_monotonic_increasing())
 
-    is_monotonic_increasing = is_monotonic
-
     @property
     def is_monotonic_decreasing(self):  # noqa: RT01, D200
         """
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index b096c1f6c4e..142af8b7c13 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -2091,12 +2091,6 @@ def test_interpolate(data):
         modin_series.interpolate()
 
 
-@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
-def test_is_monotonic(data):
-    modin_series, pandas_series = create_test_series(data)
-    assert modin_series.is_monotonic == pandas_series.is_monotonic
-
-
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_is_monotonic_decreasing(data):
     modin_series, pandas_series = create_test_series(data)

From 5626de0b1b137c2d567038350c2b32990e1d20b2 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 16:03:38 +0200
Subject: [PATCH 009/176] remove 'tshift' and 'iteritems'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 docs/supported_apis/dataframe_supported.rst |  6 ------
 docs/supported_apis/series_supported.rst    |  4 ----
 modin/pandas/base.py                        | 12 +++---------
 modin/pandas/dataframe.py                   |  6 ------
 modin/pandas/groupby.py                     |  4 ----
 modin/pandas/series.py                      |  6 ------
 modin/pandas/test/dataframe/test_default.py |  8 --------
 modin/pandas/test/dataframe/test_iter.py    |  4 ++--
 modin/pandas/test/test_api.py               | 15 ++++++++-------
 modin/pandas/test/test_series.py            | 21 ---------------------
 10 files changed, 13 insertions(+), 73 deletions(-)

diff --git a/docs/supported_apis/dataframe_supported.rst b/docs/supported_apis/dataframe_supported.rst
index 7be4b452ef5..e25f04ff159 100644
--- a/docs/supported_apis/dataframe_supported.rst
+++ b/docs/supported_apis/dataframe_supported.rst
@@ -204,8 +204,6 @@ default to pandas.
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``items``                  | `items`_                  | Y                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
-| ``iteritems``              | `iteritems`_              | P                      | Modin does not parallelize iteration in Python     |
-+----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``iterrows``               | `iterrows`_               | P                      | Modin does not parallelize iteration in Python     |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``itertuples``             | `itertuples`_             | P                      | Modin does not parallelize iteration in Python     |
@@ -457,8 +455,6 @@ default to pandas.
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``truncate``               | `truncate`_               | Y                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
-| ``tshift``                 | `tshift`_                 | Y                      |                                                    |
-+----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``tz_convert``             | `tz_convert`_             | Y                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``tz_localize``            | `tz_localize`_            | Y                      |                                                    |
@@ -566,7 +562,6 @@ default to pandas.
 .. _`isna`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.isna.html#pandas.DataFrame.isna
 .. _`isnull`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.isnull.html#pandas.DataFrame.isnull
 .. _`items`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.items.html#pandas.DataFrame.items
-.. _`iteritems`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iteritems.html#pandas.DataFrame.iteritems
 .. _`iterrows`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows
 .. _`itertuples`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.itertuples.html#pandas.DataFrame.itertuples
 .. _`ix`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ix.html#pandas.DataFrame.ix
@@ -678,7 +673,6 @@ default to pandas.
 .. _`transpose`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.transpose.html#pandas.DataFrame.transpose
 .. _`truediv`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.truediv.html#pandas.DataFrame.truediv
 .. _`truncate`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.truncate.html#pandas.DataFrame.truncate
-.. _`tshift`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tshift.html#pandas.DataFrame.tshift
 .. _`tz_convert`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tz_convert.html#pandas.DataFrame.tz_convert
 .. _`tz_localize`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tz_localize.html#pandas.DataFrame.tz_localize
 .. _`unstack`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.unstack.html#pandas.DataFrame.unstack
diff --git a/docs/supported_apis/series_supported.rst b/docs/supported_apis/series_supported.rst
index 82e2a5aeafe..8cb25d6a651 100644
--- a/docs/supported_apis/series_supported.rst
+++ b/docs/supported_apis/series_supported.rst
@@ -237,8 +237,6 @@ the related section on :doc:`Defaulting to pandas </supported_apis/index>`.
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``itemsize``                | D                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``iteritems``               | Y                               |                                                    |
-+-----------------------------+---------------------------------+----------------------------------------------------+
 | ``keys``                    | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``kurt``                    | Y                               |                                                    |
@@ -469,8 +467,6 @@ the related section on :doc:`Defaulting to pandas </supported_apis/index>`.
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``truncate``                | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``tshift``                  | Y                               |                                                    |
-+-----------------------------+---------------------------------+----------------------------------------------------+
 | ``tz_convert``              | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``tz_localize``             | Y                               |                                                    |
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 7b065bdf882..a3acb28132b 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2844,7 +2844,9 @@ def shift(
                     new_frame.columns = res_columns
                     return new_frame
         else:
-            return self.tshift(periods, freq)
+            axis = self._get_axis_number(axis)
+            new_labels = self.axes[axis].shift(periods, freq=freq)
+            return self.set_axis(new_labels, axis=axis)
 
     def skew(
         self,
@@ -3401,14 +3403,6 @@ def truncate(
         slice_obj = s if axis == 0 else (slice(None), s)
         return self.iloc[slice_obj]
 
-    def tshift(self, periods=1, freq=None, axis=0):  # noqa: PR01, RT01, D200
-        """
-        Shift the time index, using the index's frequency if available.
-        """
-        axis = self._get_axis_number(axis)
-        new_labels = self.axes[axis].shift(periods, freq=freq)
-        return self.set_axis(new_labels, axis=axis)
-
     def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
         """
         Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self.
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 2e56eb92b92..a5012defbb0 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1303,12 +1303,6 @@ def items_builder(s):
         for v in partition_iterator:
             yield v
 
-    def iteritems(self):  # noqa: RT01, D200
-        """
-        Iterate over (column name, ``Series``) pairs.
-        """
-        return self.items()
-
     def itertuples(self, index=True, name="Pandas"):  # noqa: PR01, D200
         """
         Iterate over ``DataFrame`` rows as ``namedtuple``-s.
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 734a555f237..42d53523279 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -294,10 +294,6 @@ def __bytes__(self):
         """
         return self._default_to_pandas(lambda df: df.__bytes__())
 
-    @property
-    def tshift(self):
-        return self._default_to_pandas(lambda df: df.tshift)
-
     _groups_cache = no_default
 
     # TODO: since python 3.9:
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 344eea77c11..cce6b3d96af 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1232,12 +1232,6 @@ def item_builder(s):
         for v in partition_iterator:
             yield v
 
-    def iteritems(self):  # noqa: RT01, D200
-        """
-        Lazily iterate over (index, value) tuples.
-        """
-        return self.items()
-
     def keys(self):  # noqa: RT01, D200
         """
         Return alias for index.
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index 12d2a736a71..00b61ac4f89 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -1046,14 +1046,6 @@ def test_truncate(data):
         df_equals(modin_result, pandas_result)
 
 
-def test_tshift():
-    idx = pd.date_range("1/1/2012", periods=5, freq="M")
-    data = np.random.randint(0, 100, size=(len(idx), 4))
-    modin_df = pd.DataFrame(data, index=idx)
-    pandas_df = pandas.DataFrame(data, index=idx)
-    df_equals(modin_df.tshift(4), pandas_df.tshift(4))
-
-
 def test_tz_convert():
     modin_idx = pd.date_range(
         "1/1/2012", periods=500, freq="2D", tz="America/Los_Angeles"
diff --git a/modin/pandas/test/dataframe/test_iter.py b/modin/pandas/test/dataframe/test_iter.py
index 79e9d5d7fec..7c4e0b70abc 100644
--- a/modin/pandas/test/dataframe/test_iter.py
+++ b/modin/pandas/test/dataframe/test_iter.py
@@ -41,8 +41,8 @@
 matplotlib.use("Agg")
 
 
-@pytest.mark.parametrize("method", ["items", "iteritems", "iterrows"])
-def test_items_iteritems_iterrows(method):
+@pytest.mark.parametrize("method", ["items", "iterrows"])
+def test_items_iterrows(method):
     data = test_data["float_nan_data"]
     modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
 
diff --git a/modin/pandas/test/test_api.py b/modin/pandas/test/test_api.py
index 9b17d90bf01..3b59c6cce03 100644
--- a/modin/pandas/test/test_api.py
+++ b/modin/pandas/test/test_api.py
@@ -316,13 +316,14 @@ def test_series_api_equality():
     pandas_dir = [obj for obj in dir(pandas.Series) if obj[0] != "_"]
 
     ignore = ["timetuple"]
-    missing_from_modin = set(pandas_dir) - set(modin_dir)
-    assert not len(
-        missing_from_modin - set(ignore)
-    ), "Differences found in API: {}".format(len(missing_from_modin - set(ignore)))
-    assert not len(
-        set(modin_dir) - set(pandas_dir)
-    ), "Differences found in API: {}".format(set(modin_dir) - set(pandas_dir))
+    missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)
+    assert not len(missing_from_modin), "Differences found in API: {}".format(
+        missing_from_modin
+    )
+    extra_in_modin = set(modin_dir) - set(pandas_dir)
+    assert not len(extra_in_modin), "Differences found in API: {}".format(
+        extra_in_modin
+    )
 
     # These have to be checked manually
     allowed_different = ["to_hdf", "hist"]
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 142af8b7c13..ce39750a4c4 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -2157,19 +2157,6 @@ def test_items(data):
         assert pandas_index == modin_index
 
 
-@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
-def test_iteritems(data):
-    modin_series, pandas_series = create_test_series(data)
-
-    modin_items = modin_series.iteritems()
-    pandas_items = pandas_series.iteritems()
-    for modin_item, pandas_item in zip(modin_items, pandas_items):
-        modin_index, modin_scalar = modin_item
-        pandas_index, pandas_scalar = pandas_item
-        df_equals(modin_scalar, pandas_scalar)
-        assert pandas_index == modin_index
-
-
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_keys(data):
     modin_series, pandas_series = create_test_series(data)
@@ -3500,14 +3487,6 @@ def test_truncate(data):
     )
 
 
-def test_tshift():
-    idx = pd.date_range("1/1/2012", periods=5, freq="M")
-    data = np.random.randint(0, 100, size=len(idx))
-    modin_series = pd.Series(data, index=idx)
-    pandas_series = pandas.Series(data, index=idx)
-    df_equals(modin_series.tshift(4), pandas_series.tshift(4))
-
-
 def test_tz_convert():
     modin_idx = pd.date_range(
         "1/1/2012", periods=400, freq="2D", tz="America/Los_Angeles"

From 9ee440c17d64166d1e97aad76b63f17fe6bde2d0 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 16:17:15 +0200
Subject: [PATCH 010/176] remove 'slice_shift'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 docs/supported_apis/dataframe_supported.rst |  3 ---
 docs/supported_apis/series_supported.rst    |  2 --
 modin/pandas/dataframe.py                   | 28 ---------------------
 modin/pandas/series.py                      | 23 -----------------
 modin/pandas/test/dataframe/test_default.py |  6 +----
 modin/pandas/test/test_series.py            |  6 +----
 6 files changed, 2 insertions(+), 66 deletions(-)

diff --git a/docs/supported_apis/dataframe_supported.rst b/docs/supported_apis/dataframe_supported.rst
index e25f04ff159..98230130b5d 100644
--- a/docs/supported_apis/dataframe_supported.rst
+++ b/docs/supported_apis/dataframe_supported.rst
@@ -367,8 +367,6 @@ default to pandas.
 | ``skew``                   | `skew`_                   | P                      | Modin defaults to pandas if given the ``level``    |
 |                            |                           |                        | param                                              |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
-| ``slice_shift``            | `slice_shift`_            | Y                      |                                                    |
-+----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``sort_index``             | `sort_index`_             | Y                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``sort_values``            | `sort_values`_            | Y                      | Shuffles data. Order of indexes that have the      |
@@ -634,7 +632,6 @@ default to pandas.
 .. _`shift`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.shift.html#pandas.DataFrame.shift
 .. _`size`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.size.html#pandas.DataFrame.size
 .. _`skew`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.skew.html#pandas.DataFrame.skew
-.. _`slice_shift`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.slice_shift.html#pandas.DataFrame.slice_shift
 .. _`sort_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_index.html#pandas.DataFrame.sort_index
 .. _`sort_values`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html#pandas.DataFrame.sort_values
 .. _`sparse`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sparse.html#pandas-dataframe-sparse
diff --git a/docs/supported_apis/series_supported.rst b/docs/supported_apis/series_supported.rst
index 8cb25d6a651..60928dff83a 100644
--- a/docs/supported_apis/series_supported.rst
+++ b/docs/supported_apis/series_supported.rst
@@ -391,8 +391,6 @@ the related section on :doc:`Defaulting to pandas </supported_apis/index>`.
 | ``skew``                    | P                               | Modin defaults to pandas if given the ``level``    |
 |                             |                                 | param.                                             |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``slice_shift``             | Y                               |                                                    |
-+-----------------------------+---------------------------------+----------------------------------------------------+
 | ``sort_index``              | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``sort_values``             | Y                               | Order of indexes that have the same sort key       |
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index a5012defbb0..f3e6d69cfe1 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1589,34 +1589,6 @@ def nsmallest(self, n, columns, keep="first"):  # noqa: PR01, RT01, D200
             )
         )
 
-    def slice_shift(self, periods=1, axis=0):  # noqa: PR01, RT01, D200
-        """
-        Equivalent to `shift` without copying data.
-        """
-        if periods == 0:
-            return self.copy()
-
-        if axis == "index" or axis == 0:
-            if abs(periods) >= len(self.index):
-                return self.__constructor__(columns=self.columns)
-            else:
-                new_df = self.iloc[:-periods] if periods > 0 else self.iloc[-periods:]
-                new_df.index = (
-                    self.index[periods:] if periods > 0 else self.index[:periods]
-                )
-                return new_df
-        else:
-            if abs(periods) >= len(self.columns):
-                return self.__constructor__(index=self.index)
-            else:
-                new_df = (
-                    self.iloc[:, :-periods] if periods > 0 else self.iloc[:, -periods:]
-                )
-                new_df.columns = (
-                    self.columns[periods:] if periods > 0 else self.columns[:periods]
-                )
-                return new_df
-
     def unstack(self, level=-1, fill_value=None):  # noqa: PR01, RT01, D200
         """
         Pivot a level of the (necessarily hierarchical) index labels.
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index cce6b3d96af..5dadbc6db8a 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1386,29 +1386,6 @@ def nsmallest(self, n=5, keep="first"):  # noqa: PR01, RT01, D200
             query_compiler=self._query_compiler.nsmallest(n=n, keep=keep)
         )
 
-    def slice_shift(self, periods=1, axis=0):  # noqa: PR01, RT01, D200
-        """
-        Equivalent to `shift` without copying data.
-        """
-        if periods == 0:
-            return self.copy()
-
-        if axis == "index" or axis == 0:
-            if abs(periods) >= len(self.index):
-                return self.__constructor__(dtype=self.dtype, name=self.name)
-            else:
-                new_df = self.iloc[:-periods] if periods > 0 else self.iloc[-periods:]
-                new_df.index = (
-                    self.index[periods:] if periods > 0 else self.index[:periods]
-                )
-                return new_df
-        else:
-            raise ValueError(
-                "No axis named {axis} for object type {type}".format(
-                    axis=axis, type=type(self)
-                )
-            )
-
     def shift(
         self, periods=1, freq=None, axis=0, fill_value=None
     ):  # noqa: PR01, RT01, D200
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index 00b61ac4f89..a245f276dae 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -816,7 +816,7 @@ def test_resample_getitem(columns):
 @pytest.mark.parametrize("index", ["default", "ndarray", "has_duplicates"])
 @pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.parametrize("periods", [0, 1, -1, 10, -10, 1000000000, -1000000000])
-def test_shift_slice_shift(data, index, axis, periods):
+def test_shift(data, index, axis, periods):
     modin_df, pandas_df = create_test_dfs(data)
     if index == "ndarray":
         data_column_length = len(data[next(iter(data))])
@@ -832,10 +832,6 @@ def test_shift_slice_shift(data, index, axis, periods):
         modin_df.shift(periods=periods, axis=axis, fill_value=777),
         pandas_df.shift(periods=periods, axis=axis, fill_value=777),
     )
-    df_equals(
-        modin_df.slice_shift(periods=periods, axis=axis),
-        pandas_df.slice_shift(periods=periods, axis=axis),
-    )
 
 
 @pytest.mark.parametrize("is_multi_idx", [True, False], ids=["idx_multi", "idx_index"])
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index ce39750a4c4..4a1e8ee849a 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -3118,7 +3118,7 @@ def test_skew(data, skipna):
 @pytest.mark.parametrize("index", ["default", "ndarray", "has_duplicates"])
 @pytest.mark.parametrize("periods", [0, 1, -1, 10, -10, 1000000000, -1000000000])
 @pytest.mark.parametrize("name", [None, "foo"])
-def test_shift_slice_shift(data, index, periods, name):
+def test_shift(data, index, periods, name):
     modin_series, pandas_series = create_test_series(data, name=name)
     if index == "ndarray":
         data_column_length = len(data[next(iter(data))])
@@ -3139,10 +3139,6 @@ def test_shift_slice_shift(data, index, periods, name):
         pandas_series.shift(periods=periods, fill_value=777),
     )
     eval_general(modin_series, pandas_series, lambda df: df.shift(axis=1))
-    df_equals(
-        modin_series.slice_shift(periods=periods),
-        pandas_series.slice_shift(periods=periods),
-    )
 
 
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)

From 3cb0007f7b7dad634f208ba9021e525d3c4d877b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 16:50:03 +0200
Subject: [PATCH 011/176] remove 'append'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 asv_bench/benchmarks/benchmarks.py            | 17 -----
 docs/supported_apis/dataframe_supported.rst   |  4 -
 docs/supported_apis/series_supported.rst      |  3 -
 modin/pandas/dataframe.py                     | 59 --------------
 modin/pandas/series.py                        | 76 -------------------
 modin/pandas/test/dataframe/test_binary.py    |  4 +-
 .../test/dataframe/test_map_metadata.py       | 65 ----------------
 .../storage_formats/pandas/test_internals.py  |  2 +-
 8 files changed, 3 insertions(+), 227 deletions(-)

diff --git a/asv_bench/benchmarks/benchmarks.py b/asv_bench/benchmarks/benchmarks.py
index a0a2e0caab3..7390ecb7ebd 100644
--- a/asv_bench/benchmarks/benchmarks.py
+++ b/asv_bench/benchmarks/benchmarks.py
@@ -276,23 +276,6 @@ def time_concat(self, shapes, how, axis, ignore_index):
         )
 
 
-class TimeAppend:
-    param_names = ["shapes", "sort"]
-    params = [
-        get_benchmark_shapes("TimeAppend"),
-        [False, True],
-    ]
-
-    def setup(self, shapes, sort):
-        self.df1 = generate_dataframe("int", *shapes[0], RAND_LOW, RAND_HIGH)
-        self.df2 = generate_dataframe("int", *shapes[1], RAND_LOW, RAND_HIGH)
-        if sort:
-            self.df1.columns = self.df1.columns[::-1]
-
-    def time_append(self, shapes, sort):
-        execute(self.df1.append(self.df2, sort=sort))
-
-
 class TimeBinaryOp:
     param_names = ["shapes", "binary_op", "axis"]
     params = [
diff --git a/docs/supported_apis/dataframe_supported.rst b/docs/supported_apis/dataframe_supported.rst
index 98230130b5d..967a291a640 100644
--- a/docs/supported_apis/dataframe_supported.rst
+++ b/docs/supported_apis/dataframe_supported.rst
@@ -45,9 +45,6 @@ default to pandas.
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``any``                    | `any`_                    | Y                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
-| ``append``                 | `append`_                 | Y                      | **Hdk**: ``Y`` but ``sort`` and                    |
-|                            |                           |                        | ``ignore_index`` parameters ignored                |
-+----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``apply``                  | `apply`_                  | Y                      | See ``agg``                                        |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``applymap``               | `applymap`_               | Y                      |                                                    |
@@ -482,7 +479,6 @@ default to pandas.
 .. _`align`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.align.html#pandas.DataFrame.align
 .. _`all`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.all.html#pandas.DataFrame.all
 .. _`any`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.any.html#pandas.DataFrame.any
-.. _`append`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.append.html#pandas.DataFrame.append
 .. _`apply`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.apply.html#pandas.DataFrame.apply
 .. _`applymap`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.applymap.html#pandas.DataFrame.applymap
 .. _`asfreq`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq
diff --git a/docs/supported_apis/series_supported.rst b/docs/supported_apis/series_supported.rst
index 60928dff83a..e392dfd3043 100644
--- a/docs/supported_apis/series_supported.rst
+++ b/docs/supported_apis/series_supported.rst
@@ -41,9 +41,6 @@ the related section on :doc:`Defaulting to pandas </supported_apis/index>`.
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``any``                     | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``append``                  | Y                               | **Hdk**: ``Y`` but ``sort`` and                    |
-|                             |                                 | ``ignore_index`` parameters ignored                |
-+-----------------------------+---------------------------------+----------------------------------------------------+
 | ``apply``                   | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``argmax``                  | Y                               |                                                    |
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index f3e6d69cfe1..26e41a1e3df 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -573,65 +573,6 @@ def add(
             broadcast=isinstance(other, Series),
         )
 
-    def append(
-        self, other, ignore_index=False, verify_integrity=False, sort=False
-    ):  # noqa: PR01, RT01, D200
-        """
-        Append rows of `other` to the end of caller, returning a new object.
-        """
-        if sort is False:
-            warnings.warn(
-                "Due to https://github.com/pandas-dev/pandas/issues/35092, "
-                + "Pandas ignores sort=False; Modin correctly does not sort."
-            )
-        if isinstance(other, (Series, dict)):
-            if isinstance(other, dict):
-                other = Series(other)
-            if other.name is None and not ignore_index:
-                raise TypeError(
-                    "Can only append a Series if ignore_index=True"
-                    + " or if the Series has a name"
-                )
-            if other.name is not None:
-                # other must have the same index name as self, otherwise
-                # index name will be reset
-                name = other.name
-                # We must transpose here because a Series becomes a new row, and the
-                # structure of the query compiler is currently columnar
-                other = other._query_compiler.transpose()
-                other.index = pandas.Index([name], name=self.index.name)
-            else:
-                # See note above about transpose
-                other = other._query_compiler.transpose()
-        elif isinstance(other, list):
-            if not all(isinstance(o, BasePandasDataset) for o in other):
-                other = self.__constructor__(pandas.DataFrame(other))._query_compiler
-            else:
-                other = [obj._query_compiler for obj in other]
-        else:
-            other = other._query_compiler
-
-        # If ignore_index is False, by definition the Index will be correct.
-        # We also do this first to ensure that we don't waste compute/memory.
-        if verify_integrity and not ignore_index:
-            appended_index = (
-                self.index.append(other.index)
-                if not isinstance(other, list)
-                else self.index.append([o.index for o in other])
-            )
-            is_valid = next((False for idx in appended_index.duplicated() if idx), True)
-            if not is_valid:
-                raise ValueError(
-                    "Indexes have overlapping values: {}".format(
-                        appended_index[appended_index.duplicated()]
-                    )
-                )
-
-        query_compiler = self._query_compiler.concat(
-            0, other, ignore_index=ignore_index, sort=sort
-        )
-        return self.__constructor__(query_compiler=query_compiler)
-
     def assign(self, **kwargs):  # noqa: PR01, RT01, D200
         """
         Assign new columns to a ``DataFrame``.
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 5dadbc6db8a..29492ae1544 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -538,82 +538,6 @@ def add_suffix(self, suffix):  # noqa: PR01, RT01, D200
             query_compiler=self._query_compiler.add_suffix(suffix, axis=0)
         )
 
-    def append(
-        self, to_append, ignore_index=False, verify_integrity=False
-    ):  # noqa: PR01, RT01, D200
-        """
-        Concatenate two or more Series.
-        """
-        from .dataframe import DataFrame
-
-        bad_type_msg = (
-            'cannot concatenate object of type "{}"; only pd.Series, '
-            + "pd.DataFrame, and pd.Panel (deprecated) objs are valid"
-        )
-        if isinstance(to_append, list):
-            if not all(isinstance(o, BasePandasDataset) for o in to_append):
-                raise TypeError(
-                    bad_type_msg.format(
-                        type(
-                            next(
-                                o
-                                for o in to_append
-                                if not isinstance(o, BasePandasDataset)
-                            )
-                        )
-                    )
-                )
-            elif all(isinstance(o, Series) for o in to_append):
-                self.name = None
-                for i in range(len(to_append)):
-                    to_append[i].name = None
-                    to_append[i] = to_append[i]._query_compiler
-            else:
-                # Matching pandas behavior of naming the Series columns 0
-                self.name = 0
-                for i in range(len(to_append)):
-                    if isinstance(to_append[i], Series):
-                        to_append[i].name = 0
-                        to_append[i] = DataFrame(to_append[i])
-                return DataFrame(self.copy()).append(
-                    to_append,
-                    ignore_index=ignore_index,
-                    verify_integrity=verify_integrity,
-                )
-        elif isinstance(to_append, Series):
-            self.name = None
-            to_append.name = None
-            to_append = [to_append._query_compiler]
-        elif isinstance(to_append, DataFrame):
-            self.name = 0
-            return DataFrame(self.copy()).append(
-                to_append, ignore_index=ignore_index, verify_integrity=verify_integrity
-            )
-        else:
-            raise TypeError(bad_type_msg.format(type(to_append)))
-        # If ignore_index is False, by definition the Index will be correct.
-        # We also do this first to ensure that we don't waste compute/memory.
-        if verify_integrity and not ignore_index:
-            appended_index = (
-                self.index.append(to_append.index)
-                if not isinstance(to_append, list)
-                else self.index.append([o.index for o in to_append])
-            )
-            is_valid = next((False for idx in appended_index.duplicated() if idx), True)
-            if not is_valid:
-                raise ValueError(
-                    "Indexes have overlapping values: {}".format(
-                        appended_index[appended_index.duplicated()]
-                    )
-                )
-        query_compiler = self._query_compiler.concat(
-            0, to_append, ignore_index=ignore_index, sort=None
-        )
-        if len(query_compiler.columns) > 1:
-            return DataFrame(query_compiler=query_compiler)
-        else:
-            return self.__constructor__(query_compiler=query_compiler)
-
     def aggregate(self, func=None, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
         """
         Aggregate using one or more operations over the specified axis.
diff --git a/modin/pandas/test/dataframe/test_binary.py b/modin/pandas/test/dataframe/test_binary.py
index 8b5c00c163c..5d14072837d 100644
--- a/modin/pandas/test/dataframe/test_binary.py
+++ b/modin/pandas/test/dataframe/test_binary.py
@@ -255,8 +255,8 @@ def test_mismatched_row_partitions(is_idx_aligned, op_type, is_more_other_partit
     modin_df1, pandas_df1 = create_test_dfs({"a": data, "b": data})
     modin_df, pandas_df = modin_df1.loc[:2], pandas_df1.loc[:2]
 
-    modin_df2 = modin_df.append(modin_df)
-    pandas_df2 = pandas_df.append(pandas_df)
+    modin_df2 = pd.concat((modin_df, modin_df))
+    pandas_df2 = pd.concat((pandas_df, pandas_df))
     if is_more_other_partitions:
         modin_df2, modin_df1 = modin_df1, modin_df2
         pandas_df2, pandas_df1 = pandas_df1, pandas_df2
diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py
index e2abd03c306..ba437a86a91 100644
--- a/modin/pandas/test/dataframe/test_map_metadata.py
+++ b/modin/pandas/test/dataframe/test_map_metadata.py
@@ -383,71 +383,6 @@ def test_isnull(data):
     df_equals(modin_result, pandas_result)
 
 
-@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
-def test_append(data):
-    modin_df = pd.DataFrame(data)
-    pandas_df = pandas.DataFrame(data)
-
-    data_to_append = {"append_a": 2, "append_b": 1000}
-
-    ignore_idx_values = [True, False]
-
-    for ignore in ignore_idx_values:
-        try:
-            pandas_result = pandas_df.append(data_to_append, ignore_index=ignore)
-        except Exception as err:
-            with pytest.raises(type(err)):
-                modin_df.append(data_to_append, ignore_index=ignore)
-        else:
-            modin_result = modin_df.append(data_to_append, ignore_index=ignore)
-            df_equals(modin_result, pandas_result)
-
-    try:
-        pandas_result = pandas_df.append(pandas_df.iloc[-1])
-    except Exception as err:
-        with pytest.raises(type(err)):
-            modin_df.append(modin_df.iloc[-1])
-    else:
-        modin_result = modin_df.append(modin_df.iloc[-1])
-        df_equals(modin_result, pandas_result)
-
-    try:
-        pandas_result = pandas_df.append(list(pandas_df.iloc[-1]))
-    except Exception as err:
-        with pytest.raises(type(err)):
-            modin_df.append(list(modin_df.iloc[-1]))
-    else:
-        modin_result = modin_df.append(list(modin_df.iloc[-1]))
-        df_equals(modin_result, pandas_result)
-
-    verify_integrity_values = [True, False]
-
-    for verify_integrity in verify_integrity_values:
-        try:
-            pandas_result = pandas_df.append(
-                [pandas_df, pandas_df], verify_integrity=verify_integrity
-            )
-        except Exception as err:
-            with pytest.raises(type(err)):
-                modin_df.append([modin_df, modin_df], verify_integrity=verify_integrity)
-        else:
-            modin_result = modin_df.append(
-                [modin_df, modin_df], verify_integrity=verify_integrity
-            )
-            df_equals(modin_result, pandas_result)
-
-        try:
-            pandas_result = pandas_df.append(
-                pandas_df, verify_integrity=verify_integrity
-            )
-        except Exception as err:
-            with pytest.raises(type(err)):
-                modin_df.append(modin_df, verify_integrity=verify_integrity)
-        else:
-            modin_result = modin_df.append(modin_df, verify_integrity=verify_integrity)
-            df_equals(modin_result, pandas_result)
-
-
 def test_astype():
     td = pandas.DataFrame(test_data["int_data"])[["col1", "index", "col3", "col4"]]
     modin_df = pd.DataFrame(td.values, index=td.index, columns=td.columns)
diff --git a/modin/test/storage_formats/pandas/test_internals.py b/modin/test/storage_formats/pandas/test_internals.py
index afc773a1b24..b3494f1b3a9 100644
--- a/modin/test/storage_formats/pandas/test_internals.py
+++ b/modin/test/storage_formats/pandas/test_internals.py
@@ -159,7 +159,7 @@ def test_aligning_partitions():
     modin_df1, _ = create_test_dfs({"a": data, "b": data})
     modin_df = modin_df1.loc[:2]
 
-    modin_df2 = modin_df.append(modin_df)
+    modin_df2 = pd.concat((modin_df, modin_df))
 
     modin_df2["c"] = modin_df1["b"]
     repr(modin_df2)

From 2b0fbfc84635495d650b0a64fbafcc95f7ea8914 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 17:00:41 +0200
Subject: [PATCH 012/176] add new parameter: 'axis' for 'add_suffix',
 'add_prefix'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 8 ++++----
 modin/pandas/series.py    | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 26e41a1e3df..6f878f89ad5 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -354,20 +354,20 @@ def shape(self):  # noqa: RT01, D200
         """
         return len(self.index), len(self.columns)
 
-    def add_prefix(self, prefix):  # noqa: PR01, RT01, D200
+    def add_prefix(self, prefix, axis=None):  # noqa: PR01, RT01, D200
         """
         Prefix labels with string `prefix`.
         """
         return self.__constructor__(
-            query_compiler=self._query_compiler.add_prefix(prefix)
+            query_compiler=self._query_compiler.add_prefix(prefix, axis or 1)
         )
 
-    def add_suffix(self, suffix):  # noqa: PR01, RT01, D200
+    def add_suffix(self, suffix, axis=None):  # noqa: PR01, RT01, D200
         """
         Suffix labels with string `suffix`.
         """
         return self.__constructor__(
-            query_compiler=self._query_compiler.add_suffix(suffix)
+            query_compiler=self._query_compiler.add_suffix(suffix, axis or 1)
         )
 
     def applymap(self, func, na_action: Optional[str] = None, **kwargs):
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 29492ae1544..d6c3183640e 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -522,20 +522,20 @@ def radd(
             new_other, level=level, fill_value=fill_value, axis=axis
         )
 
-    def add_prefix(self, prefix):  # noqa: PR01, RT01, D200
+    def add_prefix(self, prefix, axis=None):  # noqa: PR01, RT01, D200
         """
         Prefix labels with string `prefix`.
         """
         return self.__constructor__(
-            query_compiler=self._query_compiler.add_prefix(prefix, axis=0)
+            query_compiler=self._query_compiler.add_prefix(prefix, axis=axis or 0)
         )
 
-    def add_suffix(self, suffix):  # noqa: PR01, RT01, D200
+    def add_suffix(self, suffix, axis=None):  # noqa: PR01, RT01, D200
         """
         Suffix labels with string `suffix`.
         """
         return self.__constructor__(
-            query_compiler=self._query_compiler.add_suffix(suffix, axis=0)
+            query_compiler=self._query_compiler.add_suffix(suffix, axis=axis or 0)
         )
 
     def aggregate(self, func=None, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200

From 510b9078f9aac461bb56059df283e78b3c9efc3b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 20:54:59 +0200
Subject: [PATCH 013/176] use copy=None instead of copy=True

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../storage_formats/base/query_compiler.py    |  4 +-
 .../storage_formats/pandas/query_compiler.py  |  2 +-
 modin/pandas/base.py                          | 66 +++++++------------
 modin/pandas/dataframe.py                     |  4 +-
 modin/pandas/series.py                        | 41 +++++-------
 5 files changed, 46 insertions(+), 71 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 23ea634a77b..162ddac7330 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -1605,7 +1605,7 @@ def astype(self, col_dtypes, errors: str = "raise"):  # noqa: PR02
             self, dtype=col_dtypes, errors=errors
         )
 
-    def infer_objects(self):
+    def infer_objects(self, copy):
         """
         Attempt to infer better dtypes for object columns.
 
@@ -1618,7 +1618,7 @@ def infer_objects(self):
         BaseQueryCompiler
             New query compiler with udpated dtypes.
         """
-        return DataFrameDefault.register(pandas.DataFrame.infer_objects)(self)
+        return DataFrameDefault.register(pandas.DataFrame.infer_objects)(self, copy)
 
     def convert_dtypes(
         self,
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index bf5fbfef986..d6f1714e4da 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1711,7 +1711,7 @@ def astype(self, col_dtypes, errors: str = "raise"):
         # invalid type keys.
         return self.__constructor__(self._modin_frame.astype(col_dtypes, errors=errors))
 
-    def infer_objects(self):
+    def infer_objects(self, copy):
         return self.__constructor__(self._modin_frame.infer_objects())
 
     # Column/Row partitions reduce operations
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index a3acb28132b..0db7c69387e 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -707,7 +707,7 @@ def align(
         join="outer",
         axis=None,
         level=None,
-        copy=True,
+        copy=None,
         fill_value=None,
         method=None,
         limit=None,
@@ -949,10 +949,12 @@ def asof(self, where, subset=None):  # noqa: PR01, RT01, D200
             result = result.squeeze()
         return result
 
-    def astype(self, dtype, copy=True, errors="raise"):  # noqa: PR01, RT01, D200
+    def astype(self, dtype, copy=None, errors="raise"):  # noqa: PR01, RT01, D200
         """
         Cast a Modin object to a specified dtype `dtype`.
         """
+        if copy is None:
+            copy = True
         # dtype can be a series, a dict, or a scalar. If it's series or scalar,
         # convert it to a dict before passing it to the query compiler.
         if isinstance(dtype, (pd.Series, pandas.Series)):
@@ -1707,11 +1709,13 @@ def idxmin(self, axis=0, skipna=True, numeric_only=False):  # noqa: PR01, RT01,
             )
         )
 
-    def infer_objects(self):  # noqa: RT01, D200
+    def infer_objects(self, copy=None):  # noqa: RT01, D200
         """
         Attempt to infer better dtypes for object columns.
         """
-        return self._query_compiler.infer_objects()
+        if copy is None:
+            copy = True
+        return self._query_compiler.infer_objects(copy)
 
     def convert_dtypes(
         self,
@@ -2249,7 +2253,7 @@ def reindex(
         )
 
     def reindex_like(
-        self, other, method=None, copy=True, limit=None, tolerance=None
+        self, other, method=None, copy=None, limit=None, tolerance=None
     ):  # noqa: PR01, RT01, D200
         """
         Return an object with matching indices as `other` object.
@@ -2712,44 +2716,18 @@ def median(
     def set_axis(
         self,
         labels,
-        axis: Axis = 0,
-        inplace=no_default,
         *,
-        copy=no_default,
+        axis: Axis = 0,
+        copy=None,
     ):  # noqa: PR01, RT01, D200
         """
         Assign desired index to given axis.
         """
-        if inplace is not no_default:
-            warnings.warn(
-                f"{type(self).__name__}.set_axis 'inplace' keyword is deprecated "
-                + "and will be removed in a future version. Use "
-                + "`obj = obj.set_axis(..., copy=False)` instead",
-                FutureWarning,
-                stacklevel=2,
-            )
-        else:
-            inplace = False
-
-        if inplace:
-            if copy is True:
-                raise ValueError("Cannot specify both inplace=True and copy=True")
-            copy = False
-        elif copy is no_default:
+        if copy is None:
             copy = True
-        if is_scalar(labels):
-            warnings.warn(
-                'set_axis now takes "labels" as first argument, and '
-                + '"axis" as named parameter. The old form, with "axis" as '
-                + 'first parameter and "labels" as second, is still supported '
-                + "but will be deprecated in a future version of pandas.",
-                FutureWarning,
-                stacklevel=2,
-            )
-            labels, axis = axis, labels
         obj = self.copy() if copy else self
         setattr(obj, pandas.DataFrame._get_axis_name(axis), labels)
-        return None if inplace is True else obj
+        return obj
 
     def set_flags(
         self, *, copy: bool = False, allows_duplicate_labels: Optional[bool] = None
@@ -2961,10 +2939,12 @@ def sub(
 
     subtract = sub
 
-    def swapaxes(self, axis1, axis2, copy=True):  # noqa: PR01, RT01, D200
+    def swapaxes(self, axis1, axis2, copy=None):  # noqa: PR01, RT01, D200
         """
         Interchange axes and swap values axes appropriately.
         """
+        if copy is None:
+            copy = True
         axis1 = self._get_axis_number(axis1)
         axis2 = self._get_axis_number(axis2)
         if axis1 != axis2:
@@ -3263,7 +3243,7 @@ def to_numpy(
 
     # TODO(williamma12): When this gets implemented, have the series one call this.
     def to_period(
-        self, freq=None, axis=0, copy=True
+        self, freq=None, axis=0, copy=None
     ):  # pragma: no cover  # noqa: PR01, RT01, D200
         """
         Convert `BasePandasDataset` from DatetimeIndex to PeriodIndex.
@@ -3360,7 +3340,7 @@ def to_sql(
 
     # TODO(williamma12): When this gets implemented, have the series one call this.
     def to_timestamp(
-        self, freq=None, how="start", axis=0, copy=True
+        self, freq=None, how="start", axis=0, copy=None
     ):  # noqa: PR01, RT01, D200
         """
         Cast to DatetimeIndex of timestamps, at *beginning* of period.
@@ -3388,7 +3368,7 @@ def truediv(
     div = divide = truediv
 
     def truncate(
-        self, before=None, after=None, axis=None, copy=True
+        self, before=None, after=None, axis=None, copy=None
     ):  # noqa: PR01, RT01, D200
         """
         Truncate a `BasePandasDataset` before and after some index value.
@@ -3421,10 +3401,12 @@ def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
             raise ValueError("transforms cannot produce aggregated results")
         return result
 
-    def tz_convert(self, tz, axis=0, level=None, copy=True):  # noqa: PR01, RT01, D200
+    def tz_convert(self, tz, axis=0, level=None, copy=None):  # noqa: PR01, RT01, D200
         """
         Convert tz-aware axis to target time zone.
         """
+        if copy is None:
+            copy = True
         axis = self._get_axis_number(axis)
         if level is not None:
             new_labels = (
@@ -3436,11 +3418,13 @@ def tz_convert(self, tz, axis=0, level=None, copy=True):  # noqa: PR01, RT01, D2
         return obj.set_axis(new_labels, axis, copy=copy)
 
     def tz_localize(
-        self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise"
+        self, tz, axis=0, level=None, copy=None, ambiguous="raise", nonexistent="raise"
     ):  # noqa: PR01, RT01, D200
         """
         Localize tz-naive index of a `BasePandasDataset` to target time zone.
         """
+        if copy is None:
+            copy = True
         axis = self._get_axis_number(axis)
         new_labels = (
             pandas.Series(index=self.axes[axis])
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 6f878f89ad5..665d993ea7e 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -2272,7 +2272,7 @@ def to_parquet(
         )
 
     def to_period(
-        self, freq=None, axis=0, copy=True
+        self, freq=None, axis=0, copy=None
     ):  # pragma: no cover # noqa: PR01, RT01, D200
         """
         Convert ``DataFrame`` from ``DatetimeIndex`` to ``PeriodIndex``.
@@ -2366,7 +2366,7 @@ def to_xml(
         )
 
     def to_timestamp(
-        self, freq=None, how="start", axis=0, copy=True
+        self, freq=None, how="start", axis=0, copy=None
     ):  # noqa: PR01, RT01, D200
         """
         Cast to DatetimeIndex of timestamps, at *beginning* of period.
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index d6c3183640e..736e3f8156f 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1437,27 +1437,18 @@ def ravel(self, order="C"):  # noqa: PR01, RT01, D200
         return data
 
     @_inherit_docstrings(pandas.Series.reindex, apilink="pandas.Series.reindex")
-    def reindex(self, *args, **kwargs):
-        if args:
-            if len(args) > 1:
-                raise TypeError("Only one positional argument ('index') is allowed")
-            if "index" in kwargs:
-                raise TypeError(
-                    "'index' passed as both positional and keyword argument"
-                )
-            kwargs.update({"index": args[0]})
-        index = kwargs.pop("index", None)
-        method = kwargs.pop("method", None)
-        level = kwargs.pop("level", None)
-        copy = kwargs.pop("copy", True)
-        limit = kwargs.pop("limit", None)
-        tolerance = kwargs.pop("tolerance", None)
-        fill_value = kwargs.pop("fill_value", None)
-        if kwargs:
-            raise TypeError(
-                "reindex() got an unexpected keyword "
-                + f'argument "{list(kwargs.keys())[0]}"'
-            )
+    def reindex(
+        self,
+        index=None,
+        *,
+        axis: Axis = None,
+        method: str = None,
+        copy: bool = None,
+        level=None,
+        fill_value=None,
+        limit: int = None,
+        tolerance=None,
+    ):  # noqa: PR01, RT01, D200
         return super(Series, self).reindex(
             index=index,
             columns=None,
@@ -1810,7 +1801,7 @@ def sum(
             )
         )
 
-    def swaplevel(self, i=-2, j=-1, copy=True):  # noqa: PR01, RT01, D200
+    def swaplevel(self, i=-2, j=-1, copy=None):  # noqa: PR01, RT01, D200
         """
         Swap levels `i` and `j` in a `MultiIndex`.
         """
@@ -1878,7 +1869,7 @@ def to_numpy(
 
     # TODO(williamma12): When we implement to_timestamp, have this call the version
     # in base.py
-    def to_period(self, freq=None, copy=True):  # noqa: PR01, RT01, D200
+    def to_period(self, freq=None, copy=None):  # noqa: PR01, RT01, D200
         """
         Cast to PeriodArray/Index at a particular frequency.
         """
@@ -1915,7 +1906,7 @@ def to_string(
 
     # TODO(williamma12): When we implement to_timestamp, have this call the version
     # in base.py
-    def to_timestamp(self, freq=None, how="start", copy=True):  # noqa: PR01, RT01, D200
+    def to_timestamp(self, freq=None, how="start", copy=None):  # noqa: PR01, RT01, D200
         """
         Cast to DatetimeIndex of Timestamps, at beginning of period.
         """
@@ -1943,7 +1934,7 @@ def truediv(
     div = divide = truediv
 
     def truncate(
-        self, before=None, after=None, axis=None, copy=True
+        self, before=None, after=None, axis=None, copy=None
     ):  # noqa: PR01, RT01, D200
         """
         Truncate a Series before and after some index value.

From 3cddf29f8f4b747197437691664d7ee308efcd26 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:02:01 +0200
Subject: [PATCH 014/176] remove deprecated parameters in 'between_time'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 0db7c69387e..61896c72a40 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1026,9 +1026,7 @@ def between_time(
         self: "BasePandasDataset",
         start_time,
         end_time,
-        include_start: "bool | NoDefault" = no_default,
-        include_end: "bool | NoDefault" = no_default,
-        inclusive: "str | None" = None,
+        inclusive="both",
         axis=None,
     ):  # noqa: PR01, RT01, D200
         axis = self._get_axis_number(axis)
@@ -1038,8 +1036,6 @@ def between_time(
             .between_time(
                 start_time,
                 end_time,
-                include_start=include_start,
-                include_end=include_end,
                 inclusive=inclusive,
             )
             .index

From bc5e97f91a937af096402c0151f97b798ac1c269 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:05:37 +0200
Subject: [PATCH 015/176] update 'drop_duplicates'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 2 +-
 modin/pandas/series.py    | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 665d993ea7e..ac00438fd80 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -308,7 +308,7 @@ def ndim(self):  # noqa: RT01, D200
         return 2
 
     def drop_duplicates(
-        self, subset=None, keep="first", inplace=False, ignore_index=False
+        self, subset=None, *, keep="first", inplace=False, ignore_index=False
     ):  # noqa: PR01, RT01, D200
         """
         Return ``DataFrame`` with duplicate rows removed.
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 736e3f8156f..d72003fc8a2 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -880,11 +880,15 @@ def dot(self, other):  # noqa: PR01, RT01, D200
             query_compiler=self._query_compiler.dot(other, squeeze_self=True)
         )
 
-    def drop_duplicates(self, keep="first", inplace=False):  # noqa: PR01, RT01, D200
+    def drop_duplicates(
+        self, *, keep="first", inplace=False, ignore_index=False
+    ):  # noqa: PR01, RT01, D200
         """
         Return Series with duplicate values removed.
         """
-        return super(Series, self).drop_duplicates(keep=keep, inplace=inplace)
+        return super(Series, self).drop_duplicates(
+            keep=keep, inplace=inplace, ignore_index=ignore_index
+        )
 
     def dropna(self, axis=0, inplace=False, how=None):  # noqa: PR01, RT01, D200
         """

From 0d5672e48f47e724edb9b2e8222d2efd0bf05c75 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:14:56 +0200
Subject: [PATCH 016/176] update 'dropna'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py   | 6 ++++++
 modin/pandas/series.py | 8 ++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 61896c72a40..8289a1206b2 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1341,11 +1341,13 @@ def drop(
 
     def dropna(
         self,
+        *,
         axis: Axis = 0,
         how: str | NoDefault = no_default,
         thresh: int | NoDefault = no_default,
         subset: IndexLabel = None,
         inplace: bool = False,
+        ignore_index: bool = False,
     ):  # noqa: PR01, RT01, D200
         """
         Remove missing values.
@@ -1374,6 +1376,10 @@ def dropna(
         new_query_compiler = self._query_compiler.dropna(
             axis=axis, how=how, thresh=thresh, subset=subset
         )
+        if ignore_index:
+            new_query_compiler.index = pandas.RangeIndex(
+                stop=len(new_query_compiler.index)
+            )
         return self._create_or_update_from_compiler(new_query_compiler, inplace)
 
     def droplevel(self, level, axis=0):  # noqa: PR01, RT01, D200
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index d72003fc8a2..a5709049194 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -890,11 +890,15 @@ def drop_duplicates(
             keep=keep, inplace=inplace, ignore_index=ignore_index
         )
 
-    def dropna(self, axis=0, inplace=False, how=None):  # noqa: PR01, RT01, D200
+    def dropna(
+        self, *, axis=0, inplace=False, how=None, ignore_index=False
+    ):  # noqa: PR01, RT01, D200
         """
         Return a new Series with missing values removed.
         """
-        return super(Series, self).dropna(axis=axis, inplace=inplace)
+        return super(Series, self).dropna(
+            axis=axis, inplace=inplace, ignore_index=ignore_index
+        )
 
     def duplicated(self, keep="first"):  # noqa: PR01, RT01, D200
         """

From f586ad1101d8ea75ef7d91eb99af0ed19dd7701a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:17:40 +0200
Subject: [PATCH 017/176] update 'factorize'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/series.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index a5709049194..5c63a44f9fe 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -933,7 +933,7 @@ def explode(self, ignore_index: bool = False):  # noqa: PR01, RT01, D200
         )
 
     def factorize(
-        self, sort=False, na_sentinel=no_default, use_na_sentinel=no_default
+        self, sort=False, use_na_sentinel=True
     ):  # noqa: PR01, RT01, D200
         """
         Encode the object as an enumerated type or categorical variable.
@@ -941,7 +941,6 @@ def factorize(
         return self._default_to_pandas(
             pandas.Series.factorize,
             sort=sort,
-            na_sentinel=na_sentinel,
             use_na_sentinel=use_na_sentinel,
         )
 

From be041e8af65626a101d7e6be811f70c2a487f67a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:22:53 +0200
Subject: [PATCH 018/176] update 'groupby'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 17 ++---------------
 modin/pandas/series.py    | 21 +++------------------
 2 files changed, 5 insertions(+), 33 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index ac00438fd80..10cc9a54924 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -426,26 +426,13 @@ def groupby(
         level=None,
         as_index=True,
         sort=True,
-        group_keys=no_default,
-        squeeze: bool = no_default,
+        group_keys=True,
         observed=False,
         dropna: bool = True,
     ):  # noqa: PR01, RT01, D200
         """
         Group ``DataFrame`` using a mapper or by a ``Series`` of columns.
         """
-        if squeeze is not no_default:
-            warnings.warn(
-                (
-                    "The `squeeze` parameter is deprecated and "
-                    + "will be removed in a future version."
-                ),
-                FutureWarning,
-                stacklevel=2,
-            )
-        else:
-            squeeze = False
-
         axis = self._get_axis_number(axis)
         idx_name = None
         # Drop here indicates whether or not to drop the data column before doing the
@@ -533,7 +520,7 @@ def groupby(
             as_index,
             sort,
             group_keys,
-            squeeze,
+            False,
             idx_name,
             observed=observed,
             drop=drop,
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 5c63a44f9fe..aaa667009c5 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -932,9 +932,7 @@ def explode(self, ignore_index: bool = False):  # noqa: PR01, RT01, D200
             ignore_index=ignore_index,
         )
 
-    def factorize(
-        self, sort=False, use_na_sentinel=True
-    ):  # noqa: PR01, RT01, D200
+    def factorize(self, sort=False, use_na_sentinel=True):  # noqa: PR01, RT01, D200
         """
         Encode the object as an enumerated type or categorical variable.
         """
@@ -997,26 +995,13 @@ def groupby(
         level=None,
         as_index=True,
         sort=True,
-        group_keys=no_default,
-        squeeze=no_default,
+        group_keys=True,
         observed=False,
         dropna: bool = True,
     ):  # noqa: PR01, RT01, D200
         """
         Group Series using a mapper or by a Series of columns.
         """
-        if squeeze is not no_default:
-            warnings.warn(
-                (
-                    "The `squeeze` parameter is deprecated and "
-                    + "will be removed in a future version."
-                ),
-                FutureWarning,
-                stacklevel=2,
-            )
-        else:
-            squeeze = False
-
         from .groupby import SeriesGroupBy
 
         if not as_index:
@@ -1036,7 +1021,7 @@ def groupby(
             as_index,
             sort,
             group_keys,
-            squeeze,
+            False,
             idx_name=None,
             observed=observed,
             drop=False,

From bdead97a1c254d33e44d5ec94f4396525e8fc41f Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:29:46 +0200
Subject: [PATCH 019/176] update 'kurt'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py |  2 +-
 modin/pandas/base.py                              | 15 +--------------
 modin/pandas/series.py                            |  7 +++----
 3 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 162ddac7330..40310bf7ff8 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -5149,7 +5149,7 @@ def invert(self):
     @doc_utils.doc_reduce_agg(
         method="unbiased kurtosis", refer_to="kurt", extra_params=["skipna", "**kwargs"]
     )
-    def kurt(self, axis, level=None, numeric_only=None, skipna=True, **kwargs):
+    def kurt(self, axis, numeric_only=False, skipna=True, **kwargs):
         return DataFrameDefault.register(pandas.DataFrame.kurt)(
             self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
         )
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 8289a1206b2..92edec5be16 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1774,21 +1774,9 @@ def iloc(self):  # noqa: RT01, D200
         return _iLocIndexer(self)
 
     @_inherit_docstrings(pandas.DataFrame.kurt, apilink="pandas.DataFrame.kurt")
-    def kurt(
-        self, axis=no_default, skipna=True, level=None, numeric_only=None, **kwargs
-    ):
+    def kurt(self, axis=0, skipna=True, numeric_only=False, **kwargs):
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
         axis = self._get_axis_number(axis)
-        if level is not None:
-            func_kwargs = {
-                "skipna": skipna,
-                "level": level,
-                "numeric_only": numeric_only,
-            }
-
-            return self.__constructor__(
-                query_compiler=self._query_compiler.apply("kurt", axis, **func_kwargs)
-            )
 
         if numeric_only is not None and not numeric_only:
             self._validate_dtypes(numeric_only=True)
@@ -1803,7 +1791,6 @@ def kurt(
             data._query_compiler.kurt(
                 axis=axis,
                 skipna=skipna,
-                level=level,
                 numeric_only=numeric_only,
                 **kwargs,
             )
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index aaa667009c5..762b0f28bd3 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1156,17 +1156,16 @@ def keys(self):  # noqa: RT01, D200
 
     def kurt(
         self,
-        axis: Axis | None | NoDefault = no_default,
+        axis: Axis = 0,
         skipna=True,
-        level=None,
-        numeric_only=None,
+        numeric_only=False,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
         """
         Return unbiased kurtosis over requested axis.
         """
         axis = self._get_axis_number(axis)
-        return super(Series, self).kurt(axis, skipna, level, numeric_only, **kwargs)
+        return super(Series, self).kurt(axis, skipna, numeric_only, **kwargs)
 
     kurtosis = kurt
 

From 78217efe32ba78d9dd2225de559f261792fc28c9 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:33:48 +0200
Subject: [PATCH 020/176] update 'mask'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py   | 9 +++------
 modin/pandas/series.py | 7 ++-----
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 92edec5be16..db0a38fa2c1 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1834,12 +1834,11 @@ def loc(self):  # noqa: RT01, D200
     def mask(
         self,
         cond,
-        other=np.nan,
+        other=no_default,
+        *,
         inplace: bool = False,
-        axis: Axis | None = None,
+        axis: Axis = None,
         level: Level = None,
-        errors: IgnoreRaise | NoDefault = "raise",
-        try_cast=no_default,
     ):  # noqa: PR01, RT01, D200
         """
         Replace values where the condition is True.
@@ -1851,8 +1850,6 @@ def mask(
             inplace=inplace,
             axis=axis,
             level=level,
-            errors=errors,
-            try_cast=try_cast,
         )
 
     def max(
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 762b0f28bd3..d8677853929 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1213,12 +1213,11 @@ def arg(s):
     def mask(
         self,
         cond,
-        other=np.nan,
+        other=no_default,
+        *,
         inplace=False,
         axis=None,
         level=None,
-        errors=no_default,
-        try_cast=no_default,
     ):
         return self._default_to_pandas(
             pandas.Series.mask,
@@ -1227,8 +1226,6 @@ def mask(
             inplace=inplace,
             axis=axis,
             level=level,
-            errors=errors,
-            try_cast=try_cast,
         )
 
     def memory_usage(self, index=True, deep=False):  # noqa: PR01, RT01, D200

From f0af110b20184fdbeddb30bcb1d3728dcff15430 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:37:44 +0200
Subject: [PATCH 021/176] update 'mean' and 'max'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index db0a38fa2c1..e87b496a668 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1854,32 +1854,21 @@ def mask(
 
     def max(
         self,
-        axis: int | None | NoDefault = no_default,
+        axis: Axis = 0,
         skipna=True,
-        level=None,
-        numeric_only=None,
+        numeric_only=False,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
         """
         Return the maximum of the values over the requested axis.
         """
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
-        if level is not None:
-            return self._default_to_pandas(
-                "max",
-                axis=axis,
-                skipna=skipna,
-                level=level,
-                numeric_only=numeric_only,
-                **kwargs,
-            )
         axis = self._get_axis_number(axis)
         data = self._validate_dtypes_min_max(axis, numeric_only)
         return data._reduce_dimension(
             data._query_compiler.max(
                 axis=axis,
                 skipna=skipna,
-                level=level,
                 numeric_only=numeric_only,
                 **kwargs,
             )
@@ -2673,16 +2662,15 @@ def sem(
 
     def mean(
         self,
-        axis: "int | None | NoDefault" = no_default,
+        axis: Axis = 0,
         skipna=True,
-        level=None,
-        numeric_only=None,
+        numeric_only=False,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
         """
         Return the mean of the values over the requested axis.
         """
-        return self._stat_operation("mean", axis, skipna, level, numeric_only, **kwargs)
+        return self._stat_operation("mean", axis, skipna, None, numeric_only, **kwargs)
 
     def median(
         self,

From 7daad2dfe0097cc0df8e20322d221ab74be54956 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:42:11 +0200
Subject: [PATCH 022/176] update 'min', 'median'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index e87b496a668..52d4b3f3cde 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1959,32 +1959,21 @@ def memory_usage(self, index=True, deep=False):  # noqa: PR01, RT01, D200
 
     def min(
         self,
-        axis: Axis | None | NoDefault = no_default,
+        axis: Axis = 0,
         skipna: bool = True,
-        level: Level | None = None,
-        numeric_only=None,
+        numeric_only=False,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
         """
         Return the minimum of the values over the requested axis.
         """
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
-        if level is not None:
-            return self._default_to_pandas(
-                "min",
-                axis=axis,
-                skipna=skipna,
-                level=level,
-                numeric_only=numeric_only,
-                **kwargs,
-            )
         axis = self._get_axis_number(axis)
         data = self._validate_dtypes_min_max(axis, numeric_only)
         return data._reduce_dimension(
             data._query_compiler.min(
                 axis=axis,
                 skipna=skipna,
-                level=level,
                 numeric_only=numeric_only,
                 **kwargs,
             )
@@ -2674,17 +2663,16 @@ def mean(
 
     def median(
         self,
-        axis: "int | None | NoDefault" = no_default,
+        axis: Axis = 0,
         skipna=True,
-        level=None,
-        numeric_only=None,
+        numeric_only=False,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
         """
         Return the mean of the values over the requested axis.
         """
         return self._stat_operation(
-            "median", axis, skipna, level, numeric_only, **kwargs
+            "median", axis, skipna, None, numeric_only, **kwargs
         )
 
     def set_axis(

From 1a37565c029a8497861acbf2d317755b7698df55 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:52:11 +0200
Subject: [PATCH 023/176] update 'resample', 'rank'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py      | 16 ++++++++--------
 modin/pandas/dataframe.py | 16 +---------------
 modin/pandas/series.py    | 16 +---------------
 3 files changed, 10 insertions(+), 38 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 52d4b3f3cde..286bb1aae62 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2141,7 +2141,7 @@ def rank(
         self,
         axis=0,
         method: str = "average",
-        numeric_only=no_default,
+        numeric_only=False,
         na_option: str = "keep",
         ascending: bool = True,
         pct: bool = False,
@@ -2312,17 +2312,17 @@ def resample(
         self,
         rule,
         axis: Axis = 0,
-        closed: str | None = None,
-        label: str | None = None,
+        closed: Optional[str] = None,
+        label: Optional[str] = None,
         convention: str = "start",
-        kind: str | None = None,
+        kind: Optional[str] = None,
         loffset=None,
-        base: int | None = None,
+        base: Optional[int] = None,
         on: Level = None,
         level: Level = None,
-        origin: str | TimestampConvertibleTypes = "start_day",
-        offset: TimedeltaConvertibleTypes | None = None,
-        group_keys=no_default,
+        origin: Union[str, TimestampConvertibleTypes] = "start_day",
+        offset: Optional[TimedeltaConvertibleTypes] = None,
+        group_keys=False,
     ):  # noqa: PR01, RT01, D200
         """
         Resample time-series data.
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 10cc9a54924..8921def1e73 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1638,8 +1638,7 @@ def prod(
         self,
         axis=None,
         skipna=True,
-        level=None,
-        numeric_only=None,
+        numeric_only=False,
         min_count=0,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
@@ -1648,17 +1647,6 @@ def prod(
         """
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
         axis = self._get_axis_number(axis)
-        if level is not None:
-            if (
-                not self._query_compiler.has_multiindex(axis=axis)
-                and level > 0
-                or level < -1
-                and level != self.index.name
-            ):
-                raise ValueError("level > 0 or level < -1 only valid with MultiIndex")
-            return self.groupby(level=level, axis=axis, sort=False).prod(
-                numeric_only=numeric_only, min_count=min_count
-            )
 
         axis_to_apply = self.columns if axis else self.index
         if (
@@ -1677,7 +1665,6 @@ def prod(
                 data._query_compiler.prod_min_count(
                     axis=axis,
                     skipna=skipna,
-                    level=level,
                     numeric_only=numeric_only,
                     min_count=min_count,
                     **kwargs,
@@ -1687,7 +1674,6 @@ def prod(
             data._query_compiler.prod(
                 axis=axis,
                 skipna=skipna,
-                level=level,
                 numeric_only=numeric_only,
                 min_count=min_count,
                 **kwargs,
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index d8677853929..de8852ae0eb 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1367,24 +1367,12 @@ def prod(
         self,
         axis=None,
         skipna=True,
-        level=None,
-        numeric_only=None,
+        numeric_only=False,
         min_count=0,
         **kwargs,
     ):
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
         axis = self._get_axis_number(axis)
-        if level is not None:
-            if (
-                not self._query_compiler.has_multiindex(axis=axis)
-                and level > 0
-                or level < -1
-                and level != self.index.name
-            ):
-                raise ValueError("level > 0 or level < -1 only valid with MultiIndex")
-            return self.groupby(level=level, axis=axis, sort=False).prod(
-                numeric_only=numeric_only, min_count=min_count, **kwargs
-            )
         new_index = self.columns if axis else self.index
         if min_count > len(new_index):
             return np.nan
@@ -1395,7 +1383,6 @@ def prod(
                 data._query_compiler.prod_min_count(
                     axis=axis,
                     skipna=skipna,
-                    level=level,
                     numeric_only=numeric_only,
                     min_count=min_count,
                     **kwargs,
@@ -1405,7 +1392,6 @@ def prod(
             data._query_compiler.prod(
                 axis=axis,
                 skipna=skipna,
-                level=level,
                 numeric_only=numeric_only,
                 min_count=min_count,
                 **kwargs,

From 3b40481bc4c0f6e418fb96c1b6486b07822c32a8 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:54:44 +0200
Subject: [PATCH 024/176] update 'sem', 'skew'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 286bb1aae62..f2bea8b19ad 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2635,18 +2635,17 @@ def sample(
 
     def sem(
         self,
-        axis: Axis | None = None,
+        axis: Optional[Axis] = None,
         skipna: bool = True,
-        level: Level | None = None,
         ddof: int = 1,
-        numeric_only=None,
+        numeric_only=False,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
         """
         Return unbiased standard error of the mean over requested axis.
         """
         return self._stat_operation(
-            "sem", axis, skipna, level, numeric_only, ddof=ddof, **kwargs
+            "sem", axis, skipna, None, numeric_only, ddof=ddof, **kwargs
         )
 
     def mean(
@@ -2790,16 +2789,15 @@ def shift(
 
     def skew(
         self,
-        axis: Axis | None | NoDefault = no_default,
+        axis: Axis = 0,
         skipna: bool = True,
-        level: Level | None = None,
-        numeric_only=None,
+        numeric_only=False,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
         """
         Return unbiased skew over requested axis.
         """
-        return self._stat_operation("skew", axis, skipna, level, numeric_only, **kwargs)
+        return self._stat_operation("skew", axis, skipna, None, numeric_only, **kwargs)
 
     def sort_index(
         self,

From 79c4b6368ee0d769deb26dad03912cc9ea8a1a7a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 21:59:04 +0200
Subject: [PATCH 025/176] update 'var', 'sum', 'std'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py      | 14 ++++++--------
 modin/pandas/dataframe.py | 16 +---------------
 modin/pandas/series.py    | 16 +---------------
 3 files changed, 8 insertions(+), 38 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index f2bea8b19ad..a6cecca0f67 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2873,18 +2873,17 @@ def sort_values(
 
     def std(
         self,
-        axis: Axis | None = None,
+        axis: Optional[Axis] = None,
         skipna: bool = True,
-        level: Level | None = None,
         ddof: int = 1,
-        numeric_only=None,
+        numeric_only=False,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
         """
         Return sample standard deviation over requested axis.
         """
         return self._stat_operation(
-            "std", axis, skipna, level, numeric_only, ddof=ddof, **kwargs
+            "std", axis, skipna, None, numeric_only, ddof=ddof, **kwargs
         )
 
     def sub(
@@ -3436,18 +3435,17 @@ def value_counts(
 
     def var(
         self,
-        axis: Axis | None = None,
+        axis: Optional[Axis] = None,
         skipna: bool = True,
-        level: Level | None = None,
         ddof: int = 1,
-        numeric_only=None,
+        numeric_only=False,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
         """
         Return unbiased variance over requested axis.
         """
         return self._stat_operation(
-            "var", axis, skipna, level, numeric_only, ddof=ddof, **kwargs
+            "var", axis, skipna, None, numeric_only, ddof=ddof, **kwargs
         )
 
     def __abs__(self):
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 8921def1e73..afb4170082f 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -2061,8 +2061,7 @@ def sum(
         self,
         axis=None,
         skipna=True,
-        level=None,
-        numeric_only=None,
+        numeric_only=False,
         min_count=0,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
@@ -2085,23 +2084,11 @@ def sum(
         data = self._validate_dtypes_sum_prod_mean(
             axis, numeric_only, ignore_axis=False
         )
-        if level is not None:
-            if (
-                not self._query_compiler.has_multiindex(axis=axis)
-                and level > 0
-                or level < -1
-                and level != self.index.name
-            ):
-                raise ValueError("level > 0 or level < -1 only valid with MultiIndex")
-            return self.groupby(level=level, axis=axis, sort=False).sum(
-                numeric_only=numeric_only, min_count=min_count
-            )
         if min_count > 1:
             return data._reduce_dimension(
                 data._query_compiler.sum_min_count(
                     axis=axis,
                     skipna=skipna,
-                    level=level,
                     numeric_only=numeric_only,
                     min_count=min_count,
                     **kwargs,
@@ -2111,7 +2098,6 @@ def sum(
             data._query_compiler.sum(
                 axis=axis,
                 skipna=skipna,
-                level=level,
                 numeric_only=numeric_only,
                 min_count=min_count,
                 **kwargs,
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index de8852ae0eb..d4738344052 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1724,8 +1724,7 @@ def sum(
         self,
         axis=None,
         skipna=True,
-        level=None,
-        numeric_only=None,
+        numeric_only=False,
         min_count=0,
         **kwargs,
     ):  # noqa: PR01, RT01, D200
@@ -1734,17 +1733,6 @@ def sum(
         """
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
         axis = self._get_axis_number(axis)
-        if level is not None:
-            if (
-                not self._query_compiler.has_multiindex(axis=axis)
-                and level > 0
-                or level < -1
-                and level != self.index.name
-            ):
-                raise ValueError("level > 0 or level < -1 only valid with MultiIndex")
-            return self.groupby(level=level, axis=axis, sort=False).sum(
-                numeric_only=numeric_only, min_count=min_count, **kwargs
-            )
 
         new_index = self.columns if axis else self.index
         if min_count > len(new_index):
@@ -1758,7 +1746,6 @@ def sum(
                 data._query_compiler.sum_min_count(
                     axis=axis,
                     skipna=skipna,
-                    level=level,
                     numeric_only=numeric_only,
                     min_count=min_count,
                     **kwargs,
@@ -1768,7 +1755,6 @@ def sum(
             data._query_compiler.sum(
                 axis=axis,
                 skipna=skipna,
-                level=level,
                 numeric_only=numeric_only,
                 min_count=min_count,
                 **kwargs,

From 24f6f5c715fedd8e3d602b5de37d26f68a4d1098 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 22:01:33 +0200
Subject: [PATCH 026/176] update 'to_json'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index a6cecca0f67..e46d6e29420 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3067,6 +3067,7 @@ def to_json(
         index=True,
         indent=None,
         storage_options: StorageOptions = None,
+        mode="w",
     ):  # pragma: no cover  # noqa: PR01, RT01, D200
         """
         Convert the object to a JSON string.
@@ -3085,6 +3086,7 @@ def to_json(
             index=index,
             indent=indent,
             storage_options=storage_options,
+            mode=mode,
         )
 
     def to_latex(

From db67661fef126194033eb80521934d14e76e326b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 22:05:38 +0200
Subject: [PATCH 027/176] update 'all', 'any'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 84 +++++++++++++-------------------------------
 1 file changed, 24 insertions(+), 60 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index e46d6e29420..3ac5163f229 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -732,7 +732,7 @@ def align(
         )
 
     def all(
-        self, axis=0, bool_only=None, skipna=True, level=None, **kwargs
+        self, axis=0, bool_only=None, skipna=True, **kwargs
     ):  # noqa: PR01, RT01, D200
         """
         Return whether all elements are True, potentially over an axis.
@@ -749,53 +749,35 @@ def all(
                     )
                 data_for_compute = self[self.columns[self.dtypes == np.bool_]]
                 return data_for_compute.all(
-                    axis=axis, bool_only=False, skipna=skipna, level=level, **kwargs
+                    axis=axis, bool_only=False, skipna=skipna, **kwargs
                 )
-            if level is not None:
-                if bool_only is not None:
-                    raise NotImplementedError(
-                        "Option bool_only is not implemented with option level."
-                    )
-                if (
-                    not self._query_compiler.has_multiindex(axis=axis)
-                    and (level > 0 or level < -1)
-                    and level != self.index.name
-                ):
-                    raise ValueError(
-                        "level > 0 or level < -1 only valid with MultiIndex"
-                    )
-                return self.groupby(level=level, axis=axis, sort=False).all(**kwargs)
             return self._reduce_dimension(
                 self._query_compiler.all(
-                    axis=axis, bool_only=bool_only, skipna=skipna, level=level, **kwargs
+                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
                 )
             )
         else:
             if bool_only:
                 raise ValueError("Axis must be 0 or 1 (got {})".format(axis))
             # Reduce to a scalar if axis is None.
-            if level is not None:
-                raise ValueError("Must specify 'axis' when aggregating by level")
-            else:
-                result = self._reduce_dimension(
-                    # FIXME: Judging by pandas docs `**kwargs` serves only compatibility
-                    # purpose and does not affect the result, we shouldn't pass them to the query compiler.
-                    self._query_compiler.all(
-                        axis=0,
-                        bool_only=bool_only,
-                        skipna=skipna,
-                        level=level,
-                        **kwargs,
-                    )
+            result = self._reduce_dimension(
+                # FIXME: Judging by pandas docs `**kwargs` serves only compatibility
+                # purpose and does not affect the result, we shouldn't pass them to the query compiler.
+                self._query_compiler.all(
+                    axis=0,
+                    bool_only=bool_only,
+                    skipna=skipna,
+                    **kwargs,
                 )
+            )
             if isinstance(result, BasePandasDataset):
                 return result.all(
-                    axis=axis, bool_only=bool_only, skipna=skipna, level=level, **kwargs
+                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
                 )
             return result
 
     def any(
-        self, axis=0, bool_only=None, skipna=True, level=None, **kwargs
+        self, *, axis=0, bool_only=None, skipna=True, **kwargs
     ):  # noqa: PR01, RT01, D200
         """
         Return whether any element is True, potentially over an axis.
@@ -812,46 +794,28 @@ def any(
                     )
                 data_for_compute = self[self.columns[self.dtypes == np.bool_]]
                 return data_for_compute.any(
-                    axis=axis, bool_only=False, skipna=skipna, level=level, **kwargs
+                    axis=axis, bool_only=False, skipna=skipna, **kwargs
                 )
-            if level is not None:
-                if bool_only is not None:
-                    raise NotImplementedError(
-                        "Option bool_only is not implemented with option level."
-                    )
-                if (
-                    not self._query_compiler.has_multiindex(axis=axis)
-                    and (level > 0 or level < -1)
-                    and level != self.index.name
-                ):
-                    raise ValueError(
-                        "level > 0 or level < -1 only valid with MultiIndex"
-                    )
-                return self.groupby(level=level, axis=axis, sort=False).any(**kwargs)
             return self._reduce_dimension(
                 self._query_compiler.any(
-                    axis=axis, bool_only=bool_only, skipna=skipna, level=level, **kwargs
+                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
                 )
             )
         else:
             if bool_only:
                 raise ValueError("Axis must be 0 or 1 (got {})".format(axis))
             # Reduce to a scalar if axis is None.
-            if level is not None:
-                raise ValueError("Must specify 'axis' when aggregating by level")
-            else:
-                result = self._reduce_dimension(
-                    self._query_compiler.any(
-                        axis=0,
-                        bool_only=bool_only,
-                        skipna=skipna,
-                        level=level,
-                        **kwargs,
-                    )
+            result = self._reduce_dimension(
+                self._query_compiler.any(
+                    axis=0,
+                    bool_only=bool_only,
+                    skipna=skipna,
+                    **kwargs,
                 )
+            )
             if isinstance(result, BasePandasDataset):
                 return result.any(
-                    axis=axis, bool_only=bool_only, skipna=skipna, level=level, **kwargs
+                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
                 )
             return result
 

From 886a7cf1fe175d9323410d3bcaad68fcd31dbf04 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 22:35:01 +0200
Subject: [PATCH 028/176] update 'describe', 'clip', 'count'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../storage_formats/base/query_compiler.py    |  3 ---
 .../storage_formats/pandas/query_compiler.py  | 13 -----------
 modin/pandas/base.py                          | 22 ++++++++-----------
 modin/pandas/series.py                        | 13 +++++++----
 4 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 40310bf7ff8..f3b8977a676 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -1904,9 +1904,6 @@ def describe(self, **kwargs):  # noqa: PR02
         percentiles : list-like
         include : "all" or list of dtypes, optional
         exclude : list of dtypes, optional
-        datetime_is_numeric : bool
-        **kwargs : dict
-            Serves the compatibility purpose. Does not affect the result.
 
         Returns
         -------
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index d6f1714e4da..02245a537ac 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1768,19 +1768,6 @@ def describe(self, **kwargs):
         )
         new_index = empty_df.index
 
-        # Note: `describe` convert timestamp type to object type
-        # which results in the loss of two values in index: `first` and `last`
-        # for empty DataFrame.
-        datetime_is_numeric = kwargs.get("datetime_is_numeric") or False
-        if not any(map(is_numeric_dtype, empty_df.dtypes)) and not datetime_is_numeric:
-            for col_name in empty_df.dtypes.index:
-                # if previosly type of `col_name` was datetime or timedelta
-                if is_datetime_or_timedelta_dtype(self.dtypes[col_name]):
-                    new_index = pandas.Index(
-                        empty_df.index.to_list() + ["first"] + ["last"]
-                    )
-                    break
-
         def describe_builder(df, internal_indices=[]):  # pragma: no cover
             """Apply `describe` function to the subset of columns in a single partition."""
             # The index of the resulting dataframe is the same amongst all partitions
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 3ac5163f229..0ab2c8bb9ed 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1035,7 +1035,7 @@ def bool(self):  # noqa: RT01, D200
             return self._to_pandas().bool()
 
     def clip(
-        self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs
+        self, lower=None, upper=None, *, axis=None, inplace=False, **kwargs
     ):  # noqa: PR01, RT01, D200
         """
         Trim values at input threshold(s).
@@ -1045,7 +1045,7 @@ def clip(
             axis = self._get_axis_number(axis)
         self._validate_dtypes(numeric_only=True)
         inplace = validate_bool_kwarg(inplace, "inplace")
-        axis = numpy_compat.function.validate_clip_with_axis(axis, args, kwargs)
+        axis = numpy_compat.function.validate_clip_with_axis(axis, (), kwargs)
         # any np.nan bounds are treated as None
         if lower is not None and np.any(np.isnan(lower)):
             lower = None
@@ -1059,7 +1059,7 @@ def clip(
         # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
         # purpose and does not affect the result, we shouldn't pass them to the query compiler.
         new_query_compiler = self._query_compiler.clip(
-            lower=lower, upper=upper, axis=axis, inplace=inplace, *args, **kwargs
+            lower=lower, upper=upper, axis=axis, inplace=inplace, **kwargs
         )
         return self._create_or_update_from_compiler(new_query_compiler, inplace)
 
@@ -1087,21 +1087,15 @@ def copy(self, deep=True):  # noqa: PR01, RT01, D200
         self._add_sibling(new_obj)
         return new_obj
 
-    def count(self, axis=0, level=None, numeric_only=False):  # noqa: PR01, RT01, D200
+    def count(self, axis=0, numeric_only=False):  # noqa: PR01, RT01, D200
         """
         Count non-NA cells for `BasePandasDataset`.
         """
         axis = self._get_axis_number(axis)
         frame = self.select_dtypes([np.number, np.bool_]) if numeric_only else self
 
-        if level is not None:
-            if not frame._query_compiler.has_multiindex(axis=axis):
-                raise TypeError("Can only count levels on hierarchical columns.")
-            return frame.groupby(level=level, axis=axis, sort=True).count()
         return frame._reduce_dimension(
-            frame._query_compiler.count(
-                axis=axis, level=level, numeric_only=numeric_only
-            )
+            frame._query_compiler.count(axis=axis, numeric_only=numeric_only)
         )
 
     def cummax(self, axis=None, skipna=True, *args, **kwargs):  # noqa: PR01, RT01, D200
@@ -1165,7 +1159,10 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs):  # noqa: PR01, RT01,
         )
 
     def describe(
-        self, percentiles=None, include=None, exclude=None, datetime_is_numeric=False
+        self,
+        percentiles=None,
+        include=None,
+        exclude=None,
     ):  # noqa: PR01, RT01, D200
         """
         Generate descriptive statistics.
@@ -1218,7 +1215,6 @@ def describe(
                 percentiles=percentiles,
                 include=include,
                 exclude=exclude,
-                datetime_is_numeric=datetime_is_numeric,
             )
         )
 
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index d4738344052..7f8e632814d 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -772,11 +772,11 @@ def corr(self, other, method="pearson", min_periods=None):  # noqa: PR01, RT01,
             )
         )
 
-    def count(self, level=None):  # noqa: PR01, RT01, D200
+    def count(self):  # noqa: PR01, RT01, D200
         """
         Return number of non-NA/null observations in the Series.
         """
-        return super(Series, self).count(level=level)
+        return super(Series, self).count()
 
     def cov(
         self, other, min_periods=None, ddof: Optional[int] = 1
@@ -816,14 +816,19 @@ def cov(
         return result
 
     def describe(
-        self, percentiles=None, include=None, exclude=None, datetime_is_numeric=False
+        self,
+        percentiles=None,
+        include=None,
+        exclude=None,
     ):  # noqa: PR01, RT01, D200
         """
         Generate descriptive statistics.
         """
         # Pandas ignores the `include` and `exclude` for Series for some reason.
         return super(Series, self).describe(
-            percentiles=percentiles, datetime_is_numeric=datetime_is_numeric
+            percentiles=percentiles,
+            include=include,
+            exclude=exclude,
         )
 
     def diff(self, periods=1):  # noqa: PR01, RT01, D200

From 2cc84a88731633e820dbe2e2d8c983e055465b56 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 22:40:24 +0200
Subject: [PATCH 029/176] update 'resample', 'expanding'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py     | 7 +------
 modin/pandas/resample.py | 6 ------
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 0ab2c8bb9ed..90a471d0f00 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1430,7 +1430,7 @@ def ewm(
         )
 
     def expanding(
-        self, min_periods=1, center=None, axis=0, method="single"
+        self, min_periods=1, axis=0, method="single"
     ):  # noqa: PR01, RT01, D200
         """
         Provide expanding window calculations.
@@ -1438,7 +1438,6 @@ def expanding(
         return self._default_to_pandas(
             "expanding",
             min_periods=min_periods,
-            center=center,
             axis=axis,
             method=method,
         )
@@ -2276,8 +2275,6 @@ def resample(
         label: Optional[str] = None,
         convention: str = "start",
         kind: Optional[str] = None,
-        loffset=None,
-        base: Optional[int] = None,
         on: Level = None,
         level: Level = None,
         origin: Union[str, TimestampConvertibleTypes] = "start_day",
@@ -2297,8 +2294,6 @@ def resample(
             label=label,
             convention=convention,
             kind=kind,
-            loffset=loffset,
-            base=base,
             on=on,
             level=level,
             origin=origin,
diff --git a/modin/pandas/resample.py b/modin/pandas/resample.py
index b69af48270f..6fe6ec1ccb1 100644
--- a/modin/pandas/resample.py
+++ b/modin/pandas/resample.py
@@ -35,8 +35,6 @@ def __init__(
         label=None,
         convention="start",
         kind=None,
-        loffset=None,
-        base=0,
         on=None,
         level=None,
         origin="start_day",
@@ -53,8 +51,6 @@ def __init__(
             "label": label,
             "convention": convention,
             "kind": kind,
-            "loffset": loffset,
-            "base": base,
             "on": on,
             "level": level,
             "origin": origin,
@@ -80,8 +76,6 @@ def _get_groups(self):
                 closed=self.resample_kwargs["closed"],
                 label=self.resample_kwargs["label"],
                 convention=self.resample_kwargs["convention"],
-                loffset=self.resample_kwargs["loffset"],
-                base=self.resample_kwargs["base"],
                 level=self.resample_kwargs["level"],
                 origin=self.resample_kwargs["origin"],
                 offset=self.resample_kwargs["offset"],

From 2bc36c911cc8407f56a88962ad21935ba8608ac1 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 22:45:28 +0200
Subject: [PATCH 030/176] update 'take', 'to_excel', 'to_latex'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py   | 9 ++-------
 modin/pandas/series.py | 4 ++--
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 90a471d0f00..148f7c46c52 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2883,14 +2883,13 @@ def tail(self, n=5):  # noqa: PR01, RT01, D200
             return self.iloc[-n:]
         return self.iloc[len(self.index) :]
 
-    def take(self, indices, axis=0, is_copy=None, **kwargs):  # noqa: PR01, RT01, D200
+    def take(self, indices, axis=0, **kwargs):  # noqa: PR01, RT01, D200
         """
         Return the elements in the given *positional* indices along an axis.
         """
         axis = self._get_axis_number(axis)
         slice_obj = indices if axis == 0 else (slice(None), indices)
-        result = self.iloc[slice_obj]
-        return result if not is_copy else result.copy()
+        return self.iloc[slice_obj]
 
     def to_clipboard(
         self, excel=True, sep=None, **kwargs
@@ -2967,9 +2966,7 @@ def to_excel(
         startcol=0,
         engine=None,
         merge_cells=True,
-        encoding=no_default,
         inf_rep="inf",
-        verbose=no_default,
         freeze_panes=None,
         storage_options: StorageOptions = None,
     ):  # pragma: no cover  # noqa: PR01, RT01, D200
@@ -3048,7 +3045,6 @@ def to_latex(
         self,
         buf=None,
         columns=None,
-        col_space=None,
         header=True,
         index=True,
         na_rep="NaN",
@@ -3076,7 +3072,6 @@ def to_latex(
             "to_latex",
             buf=buf,
             columns=columns,
-            col_space=col_space,
             header=header,
             index=index,
             na_rep=na_rep,
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 7f8e632814d..c82a7290c2e 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1772,11 +1772,11 @@ def swaplevel(self, i=-2, j=-1, copy=None):  # noqa: PR01, RT01, D200
         """
         return self._default_to_pandas("swaplevel", i=i, j=j, copy=copy)
 
-    def take(self, indices, axis=0, is_copy=None, **kwargs):  # noqa: PR01, RT01, D200
+    def take(self, indices, axis=0, **kwargs):  # noqa: PR01, RT01, D200
         """
         Return the elements in the given positional indices along an axis.
         """
-        return super(Series, self).take(indices, axis=axis, is_copy=is_copy, **kwargs)
+        return super(Series, self).take(indices, axis=axis, **kwargs)
 
     def to_dict(self, into=dict):  # pragma: no cover # noqa: PR01, RT01, D200
         """

From 9814acd8c2872b19d094910c088c90808548cdce Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 22:48:05 +0200
Subject: [PATCH 031/176] update 'where'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 5 +----
 modin/pandas/series.py    | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index afb4170082f..2446aa915d9 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -2372,11 +2372,10 @@ def where(
         self,
         cond,
         other=no_default,
+        *,
         inplace=False,
         axis=None,
         level=None,
-        errors="raise",
-        try_cast=no_default,
     ):  # noqa: PR01, RT01, D200
         """
         Replace values where the condition is False.
@@ -2396,8 +2395,6 @@ def where(
                 inplace=False,
                 axis=axis,
                 level=level,
-                errors=errors,
-                try_cast=try_cast,
             )
             return self._create_or_update_from_compiler(new_query_compiler, inplace)
         cond = cond(self) if callable(cond) else cond
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index c82a7290c2e..3dfef3e0e32 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1966,11 +1966,10 @@ def where(
         self,
         cond,
         other=no_default,
+        *,
         inplace=False,
         axis=None,
         level=None,
-        errors=no_default,
-        try_cast=no_default,
     ):  # noqa: PR01, RT01, D200
         """
         Replace values where the condition is False.
@@ -1984,8 +1983,6 @@ def where(
             inplace=inplace,
             axis=axis,
             level=level,
-            errors=errors,
-            try_cast=try_cast,
         )
 
     @property

From 9ba30427cb662c85c5ac5052d21d47d6f199fb9f Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 23:07:40 +0200
Subject: [PATCH 032/176] remove 'pad', 'backfill'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../core/storage_formats/base/query_compiler.py  | 14 --------------
 .../storage_formats/pandas/query_compiler.py     |  3 ---
 modin/pandas/base.py                             |  1 -
 modin/pandas/groupby.py                          | 16 ----------------
 modin/pandas/resample.py                         | 14 --------------
 modin/pandas/test/dataframe/test_default.py      |  2 +-
 modin/pandas/test/test_api.py                    |  2 +-
 modin/pandas/test/test_groupby.py                | 15 ---------------
 modin/pandas/test/test_series.py                 |  2 --
 9 files changed, 2 insertions(+), 67 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index f3b8977a676..c774cb4009b 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -4179,14 +4179,6 @@ def resample_asfreq(self, resample_kwargs, fill_value):
             self, resample_kwargs, fill_value
         )
 
-    # FIXME: `resample_backfill` is an alias for `resample_bfill`, one of these method
-    # should be removed (Modin issue #3107).
-    @doc_utils.doc_resample_fillna(method="back-fill", refer_to="backfill")
-    def resample_backfill(self, resample_kwargs, limit):
-        return ResampleDefault.register(pandas.core.resample.Resampler.backfill)(
-            self, resample_kwargs, limit
-        )
-
     @doc_utils.doc_resample_fillna(method="back-fill", refer_to="bfill")
     def resample_bfill(self, resample_kwargs, limit):
         return ResampleDefault.register(pandas.core.resample.Resampler.bfill)(
@@ -4356,12 +4348,6 @@ def resample_ohlc_ser(self, resample_kwargs, *args, **kwargs):
             pandas.core.resample.Resampler.ohlc, squeeze_self=True
         )(self, resample_kwargs, *args, **kwargs)
 
-    @doc_utils.doc_resample_fillna(method="'pad'", refer_to="pad")
-    def resample_pad(self, resample_kwargs, limit):
-        return ResampleDefault.register(pandas.core.resample.Resampler.pad)(
-            self, resample_kwargs, limit
-        )
-
     # FIXME: This method require us to build high-level resampler object
     # which we shouldn't do at the query compiler. We need to move this at the front.
     # (Modin issue #3105)
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 02245a537ac..ccfc2585022 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1044,9 +1044,6 @@ def resample_pipe(self, resample_kwargs, func, *args, **kwargs):
     def resample_ffill(self, resample_kwargs, limit):
         return self._resample_func(resample_kwargs, "ffill", limit=limit)
 
-    def resample_backfill(self, resample_kwargs, limit):
-        return self._resample_func(resample_kwargs, "backfill", limit=limit)
-
     def resample_bfill(self, resample_kwargs, limit):
         return self._resample_func(resample_kwargs, "bfill", limit=limit)
 
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 148f7c46c52..652acdccb30 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -45,7 +45,6 @@
     Axis,
     IndexLabel,
     Level,
-    IgnoreRaise,
     TimedeltaConvertibleTypes,
     TimestampConvertibleTypes,
     RandomState,
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 42d53523279..80c4f5fef68 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -13,8 +13,6 @@
 
 """Implement GroupBy public API as pandas does."""
 
-import warnings
-
 import numpy as np
 import pandas
 from pandas.core.apply import reconstruct_func
@@ -459,17 +457,6 @@ def dtypes(self):
     def first(self, **kwargs):
         return self._default_to_pandas(lambda df: df.first(**kwargs))
 
-    def backfill(self, limit=None):
-        warnings.warn(
-            (
-                "backfill is deprecated and will be removed in a future version. "
-                + "Use bfill instead."
-            ),
-            FutureWarning,
-            stacklevel=2,
-        )
-        return self.bfill(limit)
-
     _internal_by_cache = no_default
 
     # TODO: since python 3.9:
@@ -738,9 +725,6 @@ def rank(self, **kwargs):
     def corrwith(self):
         return self._default_to_pandas(lambda df: df.corrwith)
 
-    def pad(self, limit=None):
-        return self._default_to_pandas(lambda df: df.pad(limit=limit))
-
     def max(self, numeric_only=False, min_count=-1):
         return self._wrap_aggregation(
             type(self._query_compiler).groupby_max,
diff --git a/modin/pandas/resample.py b/modin/pandas/resample.py
index 6fe6ec1ccb1..b6956c74446 100644
--- a/modin/pandas/resample.py
+++ b/modin/pandas/resample.py
@@ -206,13 +206,6 @@ def ffill(self, limit=None):
             )
         )
 
-    def backfill(self, limit=None):
-        return self._dataframe.__constructor__(
-            query_compiler=self._query_compiler.resample_backfill(
-                self.resample_kwargs, limit
-            )
-        )
-
     def bfill(self, limit=None):
         return self._dataframe.__constructor__(
             query_compiler=self._query_compiler.resample_bfill(
@@ -220,13 +213,6 @@ def bfill(self, limit=None):
             )
         )
 
-    def pad(self, limit=None):
-        return self._dataframe.__constructor__(
-            query_compiler=self._query_compiler.resample_pad(
-                self.resample_kwargs, limit
-            )
-        )
-
     def nearest(self, limit=None):
         return self._dataframe.__constructor__(
             query_compiler=self._query_compiler.resample_nearest(
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index a245f276dae..a77e41b39ce 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -681,7 +681,7 @@ def test_resampler(rule, axis):
     [
         *("count", "sum", "std", "sem", "size", "prod", "ohlc", "quantile"),
         *("min", "median", "mean", "max", "last", "first", "nunique", "var"),
-        *("interpolate", "asfreq", "pad", "nearest", "bfill", "backfill", "ffill"),
+        *("interpolate", "asfreq", "nearest", "bfill", "ffill"),
     ],
 )
 def test_resampler_functions(rule, axis, method):
diff --git a/modin/pandas/test/test_api.py b/modin/pandas/test/test_api.py
index 3b59c6cce03..343da0be56b 100644
--- a/modin/pandas/test/test_api.py
+++ b/modin/pandas/test/test_api.py
@@ -298,7 +298,7 @@ def test_series_groupby_api_equality(obj):
     modin_dir = [x for x in dir(getattr(pd.groupby, obj)) if x[0] != "_"]
     pandas_dir = [x for x in dir(getattr(pandas.core.groupby, obj)) if x[0] != "_"]
     # This attribute is hidden from the DataFrameGroupBy object
-    ignore = ["keys"]
+    ignore = ["keys", "level"]
     missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)
     assert not len(missing_from_modin), "Differences found in API: {}".format(
         len(missing_from_modin)
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 64a0cc3da25..ab4354dd23d 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -163,7 +163,6 @@ def test_mixed_dtypes_groupby(as_index):
 
         eval_dtypes(modin_groupby, pandas_groupby)
         eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
-        eval_general(modin_groupby, pandas_groupby, lambda df: df.backfill())
         eval_cummin(modin_groupby, pandas_groupby)
         eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
         eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
@@ -365,7 +364,6 @@ def maybe_get_columns(df, by):
 
     eval_dtypes(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
-    eval_general(modin_groupby, pandas_groupby, lambda df: df.backfill())
     eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
     eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
     eval_prod(modin_groupby, pandas_groupby)
@@ -553,7 +551,6 @@ def test_single_group_row_groupby():
 
     eval_dtypes(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
-    eval_general(modin_groupby, pandas_groupby, lambda df: df.backfill())
     eval_cummin(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
     eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
@@ -665,7 +662,6 @@ def test_large_row_groupby(is_by_category):
 
     eval_dtypes(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
-    eval_general(modin_groupby, pandas_groupby, lambda df: df.backfill())
     eval_cummin(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
     eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
@@ -783,7 +779,6 @@ def test_simple_col_groupby():
         eval_apply(modin_groupby, pandas_groupby, func)
 
     eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
-    eval_general(modin_groupby, pandas_groupby, lambda df: df.backfill())
     eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
     eval_prod(modin_groupby, pandas_groupby)
     eval_std(modin_groupby, pandas_groupby)
@@ -909,7 +904,6 @@ def test_series_groupby(by, as_index_series_or_dataframe):
             eval_apply(modin_groupby, pandas_groupby, func)
 
         eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
-        eval_general(modin_groupby, pandas_groupby, lambda df: df.backfill())
         eval_cummin(modin_groupby, pandas_groupby)
         eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
         eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
@@ -2164,15 +2158,6 @@ def test_mean_with_datetime(by_func):
     eval_general(modin_df, pandas_df, lambda df: df.groupby(by=by_func(df)).mean())
 
 
-def test_groupby_backfill_warn():
-    modin_df = pd.DataFrame(test_groupby_data)
-    md_grp = modin_df.groupby(by=modin_df.columns[0])
-
-    msg = "backfill is deprecated and will be removed in a future version."
-    with pytest.warns(FutureWarning, match=msg):
-        md_grp.backfill()
-
-
 @pytest.mark.parametrize(
     "modin_df_recipe",
     ["non_lazy_frame", "frame_with_deferred_index", "lazy_frame"],
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 4a1e8ee849a..3823980800a 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -2893,10 +2893,8 @@ def test_resample(closed, label, level):
             modin_resampler.fillna(method="nearest"),
             pandas_resampler.fillna(method="nearest"),
         )
-        df_equals(modin_resampler.pad(), pandas_resampler.pad())
         df_equals(modin_resampler.nearest(), pandas_resampler.nearest())
         df_equals(modin_resampler.bfill(), pandas_resampler.bfill())
-        df_equals(modin_resampler.backfill(), pandas_resampler.backfill())
         df_equals(modin_resampler.ffill(), pandas_resampler.ffill())
     df_equals(
         modin_resampler.apply(["sum", "mean", "max"]),

From fb0693cb91cfd50b2f8f141abd077e6af818835a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 23:32:35 +0200
Subject: [PATCH 033/176] add 'dt.unit', 'dt.as_unit'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py | 8 ++++++++
 modin/pandas/series_utils.py                      | 9 ++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index c774cb4009b..82ede36b605 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -3800,6 +3800,14 @@ def dt_freq(self):
         """
         return DateTimeDefault.register(pandas.Series.dt.freq)(self)
 
+    @doc_utils.add_refer_to("Series.dt.unit")
+    def dt_unit(self):
+        return DateTimeDefault.register(pandas.Series.dt.unit)(self)
+
+    @doc_utils.add_refer_to("Series.dt.as_unit")
+    def dt_as_unit(self, *args, **kwargs):
+        return DateTimeDefault.register(pandas.Series.dt.as_unit)(self, *args, **kwargs)
+
     @doc_utils.doc_dt_timestamp(
         prop="Calculate year, week, and day according to the ISO 8601 standard.",
         refer_to="isocalendar",
diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index 3adde21f2da..10f4511cca7 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -127,7 +127,7 @@ def _default_to_pandas(self, op, *args, **kwargs):
         )
 
 
-@_inherit_docstrings(pandas.core.strings.StringMethods)
+@_inherit_docstrings(pandas.core.strings.accessor.StringMethods)
 class StringMethods(ClassLogger):
     def __init__(self, series):
         # Check if dtypes is objects
@@ -606,6 +606,13 @@ def tz(self) -> "tzinfo | None":
     def freq(self):
         return self._query_compiler.dt_freq().to_pandas().squeeze()
 
+    @property
+    def unit(self):
+        return Series(query_compiler=self._query_compiler.dt_unit())
+
+    def as_unit(self, *args, **kwargs):
+        return Series(query_compiler=self._query_compiler.dt_as_unit(*args, **kwargs))
+
     def to_period(self, *args, **kwargs):
         return Series(query_compiler=self._query_compiler.dt_to_period(*args, **kwargs))
 

From efe8dec0fc929538d4d5f90f561171e17326ac1e Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 23:35:45 +0200
Subject: [PATCH 034/176] remove 'lookup'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 docs/supported_apis/dataframe_supported.rst | 3 ---
 modin/pandas/dataframe.py                   | 6 ------
 2 files changed, 9 deletions(-)

diff --git a/docs/supported_apis/dataframe_supported.rst b/docs/supported_apis/dataframe_supported.rst
index 967a291a640..c2497e8ca69 100644
--- a/docs/supported_apis/dataframe_supported.rst
+++ b/docs/supported_apis/dataframe_supported.rst
@@ -224,8 +224,6 @@ default to pandas.
 |                            |                           |                        | **Hdk**: ``P``, read access fully supported,       |
 |                            |                           |                        | write access: no row and 2D assignments support    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
-| ``lookup``                 | `lookup`_                 | D                      |                                                    |
-+----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``lt``                     | `lt`_                     | Y                      | See ``add``                                        |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``mask``                   | `mask`_                   | D                      |                                                    |
@@ -567,7 +565,6 @@ default to pandas.
 .. _`last_valid_index`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.last_valid_index.html#pandas.DataFrame.last_valid_index
 .. _`le`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.le.html#pandas.DataFrame.le
 .. _`loc`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html#pandas.DataFrame.loc
-.. _`lookup`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.lookup.html#pandas.DataFrame.lookup
 .. _`lt`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.lt.html#pandas.DataFrame.lt
 .. _`mask`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mask.html#pandas.DataFrame.mask
 .. _`max`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.max.html#pandas.DataFrame.max
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 2446aa915d9..0ca0ad91c1b 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1336,12 +1336,6 @@ def le(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
             "le", other, axis=axis, level=level, broadcast=isinstance(other, Series)
         )
 
-    def lookup(self, row_labels, col_labels):  # noqa: PR01, RT01, D200
-        """
-        Label-based "fancy indexing" function for ``DataFrame``.
-        """
-        return self._default_to_pandas(pandas.DataFrame.lookup, row_labels, col_labels)
-
     def lt(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
         """
         Get less than comparison of ``DataFrame`` and `other`, element-wise (binary operator `le`).

From 6a024fb088b17ff2c92af4ee29940a71183ff1ae Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 23:38:37 +0200
Subject: [PATCH 035/176] update 'corr', 'corrwith', 'cov'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 0ca0ad91c1b..852d1b5fcf7 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -638,7 +638,7 @@ def compare(
         )
 
     def corr(
-        self, method="pearson", min_periods=1, numeric_only=no_default
+        self, method="pearson", min_periods=1, numeric_only=False
     ):  # noqa: PR01, RT01, D200
         """
         Compute pairwise correlation of columns, excluding NA/null values.
@@ -658,7 +658,7 @@ def corr(
         )
 
     def corrwith(
-        self, other, axis=0, drop=False, method="pearson", numeric_only=no_default
+        self, other, axis=0, drop=False, method="pearson", numeric_only=False
     ):  # noqa: PR01, RT01, D200
         """
         Compute pairwise correlation.
@@ -675,7 +675,7 @@ def corrwith(
         )
 
     def cov(
-        self, min_periods=None, ddof: Optional[int] = 1, numeric_only=no_default
+        self, min_periods=None, ddof: Optional[int] = 1, numeric_only=False
     ):  # noqa: PR01, RT01, D200
         """
         Compute pairwise covariance of columns, excluding NA/null values.

From dd931ca6cf311e45d106e00998286c94a4287fb3 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 23:45:35 +0200
Subject: [PATCH 036/176] update 'merge', 'pivot'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 852d1b5fcf7..442f6b35cf2 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1399,13 +1399,15 @@ def merge(
         right_index=False,
         sort=False,
         suffixes=("_x", "_y"),
-        copy=True,
+        copy=None,
         indicator=False,
         validate=None,
     ):  # noqa: PR01, RT01, D200
         """
         Merge ``DataFrame`` or named ``Series`` objects with a database-style join.
         """
+        if copy is None:
+            copy = True
         if isinstance(right, Series):
             if right.name is None:
                 raise ValueError("Cannot merge a Series without a name")
@@ -1528,10 +1530,14 @@ def unstack(self, level=-1, fill_value=None):  # noqa: PR01, RT01, D200
                 query_compiler=self._query_compiler.unstack(level, fill_value)
             )
 
-    def pivot(self, index=None, columns=None, values=None):  # noqa: PR01, RT01, D200
+    def pivot(self, *, columns, index=NoDefault, values=NoDefault):  # noqa: PR01, RT01, D200
         """
         Return reshaped ``DataFrame`` organized by given index / column values.
         """
+        if index is NoDefault:
+            index = None
+        if values is NoDefault:
+            values = None
         return self.__constructor__(
             query_compiler=self._query_compiler.pivot(
                 index=index, columns=columns, values=values

From 80eede9afdd12381b31a04beb7bc83a0f5aedb94 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 23:47:45 +0200
Subject: [PATCH 037/176] update 'to_dict', 'quantile'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py      | 4 ++--
 modin/pandas/dataframe.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 652acdccb30..2d3665b6242 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2991,8 +2991,8 @@ def to_excel(
             storage_options=storage_options,
         )
 
-    def to_dict(self, orient="dict", into=dict):  # pragma: no cover
-        return self._default_to_pandas("to_dict", orient=orient, into=into)
+    def to_dict(self, orient="dict", into=dict, index=True):  # pragma: no cover
+        return self._default_to_pandas("to_dict", orient=orient, into=into, index=index)
 
     def to_hdf(
         self, path_or_buf, key, format="table", **kwargs
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 442f6b35cf2..24948561aff 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1686,14 +1686,14 @@ def quantile(
         self,
         q=0.5,
         axis=0,
-        numeric_only=no_default,
+        numeric_only=False,
         interpolation="linear",
         method="single",
     ):
         return super(DataFrame, self).quantile(
             q=q,
             axis=axis,
-            numeric_only=True if numeric_only is no_default else numeric_only,
+            numeric_only=numeric_only,
             interpolation=interpolation,
             method=method,
         )

From b277c585faa62b71da99d0f9e1f5b95bfb447dc0 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 23:51:41 +0200
Subject: [PATCH 038/176] update 'info'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 24948561aff..58428cbaf3f 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -977,7 +977,6 @@ def info(
         max_cols: Optional[int] = None,
         memory_usage: Optional[Union[bool, str]] = None,
         show_counts: Optional[bool] = None,
-        null_counts: Optional[bool] = None,
     ):  # noqa: PR01, D200
         """
         Print a concise summary of the ``DataFrame``.
@@ -1011,13 +1010,13 @@ def format_size(num):
         if buf is None:
             buf = sys.stdout
 
-        if null_counts is None:
-            null_counts = not exceeds_info_cols
+        if show_counts is None:
+            show_counts = not exceeds_info_cols
 
         if verbose is None:
             verbose = not exceeds_info_cols
 
-        if null_counts and verbose:
+        if show_counts and verbose:
             # We're gonna take items from `non_null_count` in a loop, which
             # works kinda slow with `Modin.Series`, that's why we call `_to_pandas()` here
             # that will be faster.
@@ -1049,7 +1048,7 @@ def get_header(spaces=2):
             header = put_str(head_label, lengths["head"]) + put_str(
                 column_label, lengths["column"]
             )
-            if null_counts:
+            if show_counts:
                 lengths["null"] = max(
                     len(null_label),
                     max(len(pprint_thing(x)) for x in non_null_count)
@@ -1063,7 +1062,7 @@ def get_header(spaces=2):
             delimiters = put_str(delimiter * lengths["head"]) + put_str(
                 delimiter * lengths["column"]
             )
-            if null_counts:
+            if show_counts:
                 delimiters += put_str(delimiter * lengths["null"])
             delimiters += put_str(delimiter * lengths["dtype"], spaces=dtype_spaces)
             output.append(delimiters)
@@ -1082,7 +1081,7 @@ def verbose_repr(output):
                 to_append = put_str(" {}".format(i), lengths["head"]) + put_str(
                     col_s, lengths["column"]
                 )
-                if null_counts:
+                if show_counts:
                     non_null = pprint_thing(non_null_count[col])
                     to_append += put_str(
                         "{} non-null".format(non_null), lengths["null"]
@@ -1530,7 +1529,9 @@ def unstack(self, level=-1, fill_value=None):  # noqa: PR01, RT01, D200
                 query_compiler=self._query_compiler.unstack(level, fill_value)
             )
 
-    def pivot(self, *, columns, index=NoDefault, values=NoDefault):  # noqa: PR01, RT01, D200
+    def pivot(
+        self, *, columns, index=NoDefault, values=NoDefault
+    ):  # noqa: PR01, RT01, D200
         """
         Return reshaped ``DataFrame`` organized by given index / column values.
         """

From f285848087efb1c551bffacccb24df2702ee9d2b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 13 Apr 2023 23:56:23 +0200
Subject: [PATCH 039/176] update 'read_sql'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/io.py | 3 +++
 modin/pandas/io.py  | 1 +
 2 files changed, 4 insertions(+)

diff --git a/modin/core/io/io.py b/modin/core/io/io.py
index 3cff379f069..665656363ce 100644
--- a/modin/core/io/io.py
+++ b/modin/core/io/io.py
@@ -22,6 +22,7 @@
 
 import pandas
 from pandas.util._decorators import doc
+from pandas._libs.lib import no_default
 
 from modin.db_conn import ModinDatabaseConnection
 from modin.error_message import ErrorMessage
@@ -436,6 +437,8 @@ def read_sql(
         parse_dates=None,
         columns=None,
         chunksize=None,
+        dtype_backend=no_default,
+        dtype=None,
     ):  # noqa: PR01
         ErrorMessage.default_to_pandas("`read_sql`")
         if isinstance(con, ModinDatabaseConnection):
diff --git a/modin/pandas/io.py b/modin/pandas/io.py
index 70147e74d10..5a268cd59ef 100644
--- a/modin/pandas/io.py
+++ b/modin/pandas/io.py
@@ -580,6 +580,7 @@ def read_sql(
     columns=None,
     chunksize=None,
     dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
+    dtype=None,
 ):  # noqa: PR01, RT01, D200
     """
     Read SQL query or database table into a DataFrame.

From 7a272b7ec10f0af9593b8106bd884096a9ffa38c Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 00:33:00 +0200
Subject: [PATCH 040/176] update top level functions

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/general.py | 28 +++++++++++++++-------------
 modin/pandas/io.py      | 38 ++++++++++++++++----------------------
 modin/pandas/series.py  |  2 +-
 3 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/modin/pandas/general.py b/modin/pandas/general.py
index 845c3c2406b..c3b38c9f77d 100644
--- a/modin/pandas/general.py
+++ b/modin/pandas/general.py
@@ -16,7 +16,7 @@
 import pandas
 import numpy as np
 
-from typing import Hashable, Iterable, Mapping, Union
+from typing import Hashable, Iterable, Mapping, Union, Optional
 from pandas.core.dtypes.common import is_list_like
 from pandas._libs.lib import no_default, NoDefault
 from pandas._typing import DtypeBackend
@@ -74,7 +74,7 @@ def merge(
     right_index: bool = False,
     sort: bool = False,
     suffixes=("_x", "_y"),
-    copy: bool = True,
+    copy: Optional[bool] = None,
     indicator: bool = False,
     validate=None,
 ):  # noqa: PR01, RT01, D200
@@ -253,10 +253,16 @@ def pivot_table(
 
 @_inherit_docstrings(pandas.pivot, apilink="pandas.pivot")
 @enable_logging
-def pivot(data, index=None, columns=None, values=None):  # noqa: PR01, RT01, D200
+def pivot(
+    data, *, columns, index=NoDefault, values=NoDefault
+):  # noqa: PR01, RT01, D200
     """
     Return reshaped DataFrame organized by given index / column values.
     """
+    if index is NoDefault:
+        index = None
+    if values is NoDefault:
+        values = None
     if not isinstance(data, DataFrame):
         raise ValueError("can not pivot with instance of type {}".format(type(data)))
     return data.pivot(index=index, columns=columns, values=values)
@@ -359,7 +365,7 @@ def concat(
     names=None,
     verify_integrity: bool = False,
     sort: bool = False,
-    copy: bool = True,
+    copy: bool = None,
 ) -> "DataFrame | Series":  # noqa: PR01, RT01, D200
     """
     Concatenate Modin objects along a particular axis.
@@ -505,11 +511,11 @@ def to_datetime(
     errors="raise",
     dayfirst=False,
     yearfirst=False,
-    utc=None,
+    utc=False,
     format=None,
-    exact=True,
+    exact=no_default,
     unit=None,
-    infer_datetime_format=False,
+    infer_datetime_format=no_default,
     origin="unix",
     cache=True,
 ):  # noqa: PR01, RT01, D200
@@ -652,7 +658,7 @@ def crosstab(
 
 # Adding docstring since pandas docs don't have web section for this function.
 @enable_logging
-def lreshape(data: DataFrame, groups, dropna=True, label=None):
+def lreshape(data: DataFrame, groups, dropna=True):
     """
     Reshape wide-format data to long. Generalized inverse of ``DataFrame.pivot``.
 
@@ -668,8 +674,6 @@ def lreshape(data: DataFrame, groups, dropna=True, label=None):
         Dictionary in the form: `{new_name : list_of_columns}`.
     dropna : bool, default: True
         Whether include columns whose entries are all NaN or not.
-    label : optional
-        Deprecated parameter.
 
     Returns
     -------
@@ -679,9 +683,7 @@ def lreshape(data: DataFrame, groups, dropna=True, label=None):
     if not isinstance(data, DataFrame):
         raise ValueError("can not lreshape with instance of type {}".format(type(data)))
     ErrorMessage.default_to_pandas("`lreshape`")
-    return DataFrame(
-        pandas.lreshape(to_pandas(data), groups, dropna=dropna, label=label)
-    )
+    return DataFrame(pandas.lreshape(to_pandas(data), groups, dropna=dropna))
 
 
 @_inherit_docstrings(pandas.wide_to_long, apilink="pandas.wide_to_long")
diff --git a/modin/pandas/io.py b/modin/pandas/io.py
index 5a268cd59ef..c3055e6cf3d 100644
--- a/modin/pandas/io.py
+++ b/modin/pandas/io.py
@@ -135,9 +135,6 @@ def read_csv(
     names: Sequence[Hashable] | None | NoDefault = no_default,
     index_col: IndexLabel | Literal[False] | None = None,
     usecols=None,
-    squeeze: bool | None = None,
-    prefix: str | NoDefault = no_default,
-    mangle_dupe_cols: bool = True,
     # General Parsing Configuration
     dtype: DtypeArg | None = None,
     engine: CSVEngine | None = None,
@@ -156,9 +153,10 @@ def read_csv(
     skip_blank_lines: bool = True,
     # Datetime Handling
     parse_dates=None,
-    infer_datetime_format: bool = False,
+    infer_datetime_format: bool = no_default,
     keep_date_col: bool = False,
-    date_parser=None,
+    date_parser=no_default,
+    date_format=None,
     dayfirst: bool = False,
     cache_dates: bool = True,
     # Iteration
@@ -178,9 +176,7 @@ def read_csv(
     encoding_errors: str | None = "strict",
     dialect: str | csv.Dialect | None = None,
     # Error Handling
-    error_bad_lines: bool | None = None,
-    warn_bad_lines: bool | None = None,
-    on_bad_lines=None,
+    on_bad_lines="error",
     # Internal
     delim_whitespace: bool = False,
     low_memory=_c_parser_defaults["low_memory"],
@@ -212,9 +208,6 @@ def read_table(
     names: Sequence[Hashable] | None | NoDefault = no_default,
     index_col: IndexLabel | Literal[False] | None = None,
     usecols=None,
-    squeeze: bool | None = None,
-    prefix: str | NoDefault = no_default,
-    mangle_dupe_cols: bool = True,
     # General Parsing Configuration
     dtype: DtypeArg | None = None,
     engine: CSVEngine | None = None,
@@ -233,9 +226,10 @@ def read_table(
     skip_blank_lines: bool = True,
     # Datetime Handling
     parse_dates=False,
-    infer_datetime_format: bool = False,
+    infer_datetime_format: bool = no_default,
     keep_date_col: bool = False,
-    date_parser=None,
+    date_parser=no_default,
+    date_format: str = None,
     dayfirst: bool = False,
     cache_dates: bool = True,
     # Iteration
@@ -255,15 +249,14 @@ def read_table(
     encoding_errors: str | None = "strict",
     dialect: str | csv.Dialect | None = None,
     # Error Handling
-    error_bad_lines: bool | None = None,
-    warn_bad_lines: bool | None = None,
-    on_bad_lines=None,
+    on_bad_lines="error",
     # Internal
     delim_whitespace=False,
     low_memory=_c_parser_defaults["low_memory"],
     memory_map: bool = False,
     float_precision: str | None = None,
     storage_options: StorageOptions = None,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ) -> DataFrame | TextFileReader:
     # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args
     _pd_read_table_signature = {
@@ -286,7 +279,8 @@ def read_parquet(
     engine: str = "auto",
     columns: list[str] | None = None,
     storage_options: StorageOptions = None,
-    use_nullable_dtypes: bool = False,
+    use_nullable_dtypes: bool = no_default,
+    dtype_backend=no_default,
     **kwargs,
 ) -> DataFrame:
     from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
@@ -298,6 +292,7 @@ def read_parquet(
             columns=columns,
             storage_options=storage_options,
             use_nullable_dtypes=use_nullable_dtypes,
+            dtype_backend=dtype_backend,
             **kwargs,
         )
     )
@@ -307,13 +302,13 @@ def read_parquet(
 @enable_logging
 def read_json(
     path_or_buf,
+    *,
     orient: str | None = None,
     typ: Literal["frame", "series"] = "frame",
     dtype: DtypeArg | None = None,
     convert_axes=None,
     convert_dates: bool | list[str] = True,
     keep_default_dates: bool = True,
-    numpy: bool = False,
     precise_float: bool = False,
     date_unit: str | None = None,
     encoding: str | None = None,
@@ -324,6 +319,7 @@ def read_json(
     nrows: int | None = None,
     storage_options: StorageOptions = None,
     dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
+    engine="ujson",
 ) -> DataFrame | Series | pandas.io.json._json.JsonReader:
     _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
 
@@ -420,7 +416,6 @@ def read_excel(
     | Sequence[str]
     | Callable[[str], bool]
     | None = None,
-    squeeze: bool | None = None,
     dtype: DtypeArg | None = None,
     engine: Literal[("xlrd", "openpyxl", "odf", "pyxlsb")] | None = None,
     converters: dict[str, Callable] | dict[int, Callable] | None = None,
@@ -433,13 +428,12 @@ def read_excel(
     na_filter: bool = True,
     verbose: bool = False,
     parse_dates: list | dict | bool = False,
-    date_parser: Callable | None = None,
+    date_parser: Union[Callable, NoDefault] = no_default,
+    date_format=None,
     thousands: str | None = None,
     decimal: str = ".",
     comment: str | None = None,
     skipfooter: int = 0,
-    convert_float: bool | None = None,
-    mangle_dupe_cols: bool = True,
     storage_options: StorageOptions = None,
     dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ) -> DataFrame | dict[IntStrT, DataFrame]:
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 3dfef3e0e32..5568b79c346 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -87,7 +87,7 @@ def __init__(
         index=None,
         dtype=None,
         name=None,
-        copy=False,
+        copy=None,
         fastpath=False,
         query_compiler=None,
     ):

From 61d3180712bdb805cb8603245c18c5417d02ee9d Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 00:35:53 +0200
Subject: [PATCH 041/176] fix

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
index 2fb6596df47..9645790508f 100644
--- a/modin/pandas/test/utils.py
+++ b/modin/pandas/test/utils.py
@@ -1308,7 +1308,6 @@ def _csv_file_maker(
                 compression=compression,
                 index=False,
                 decimal=decimal_separator if decimal_separator else ".",
-                line_terminator=line_terminator,
                 quoting=quoting,
                 quotechar=quotechar,
                 doublequote=doublequote,

From 4c5ea4bf8256cb7f711723ae2b7bbe0cba4f8c36 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 13:01:34 +0200
Subject: [PATCH 042/176] remove 'TimeAppend' benchmark for HDK

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 asv_bench/benchmarks/hdk/benchmarks.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/asv_bench/benchmarks/hdk/benchmarks.py b/asv_bench/benchmarks/hdk/benchmarks.py
index 690a70c36fb..c755acc9c42 100644
--- a/asv_bench/benchmarks/hdk/benchmarks.py
+++ b/asv_bench/benchmarks/hdk/benchmarks.py
@@ -125,27 +125,6 @@ def time_merge(self, shapes, how):
         )
 
 
-class TimeAppend:
-    param_names = ["shapes"]
-    params = [get_benchmark_shapes("hdk.TimeAppend")]
-
-    def setup(self, shapes):
-        self.df1, self.df2 = (
-            generate_dataframe(
-                "int",
-                *shape,
-                RAND_LOW,
-                RAND_HIGH,
-                cache_prefix=f"{i}-th_frame_to_append",
-            )
-            for i, shape in enumerate(shapes)
-        )
-        trigger_import(self.df1, self.df2)
-
-    def time_append(self, shapes):
-        execute(self.df1.append(self.df2))
-
-
 class TimeBinaryOpDataFrame:
     param_names = ["shape", "binary_op"]
     params = [

From 0ae862e1f43eeb504b2e26d65b8b800eac7eae10 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 14:31:29 +0200
Subject: [PATCH 043/176] update 'rename_axis'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py          | 43 ++++++++++++++++++-----------------
 modin/pandas/dataframe.py     |  2 ++
 modin/pandas/series.py        |  2 ++
 modin/pandas/test/test_api.py |  2 --
 4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 2158a31524a..cdc21443a97 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -548,6 +548,10 @@ def _get_axis_number(cls, axis):
 
         return cls._pandas_class._get_axis_number(axis) if axis is not None else 0
 
+    def _get_axis_name(cls, axis):
+        axis_number = cls._get_axis_number(axis)
+        return cls._AXIS_ORDERS[axis_number]
+
     @pandas.util.cache_readonly
     def __constructor__(self):
         """
@@ -2190,31 +2194,29 @@ def reindex_like(
         )
 
     def rename_axis(
-        self, mapper=None, index=None, columns=None, axis=None, copy=True, inplace=False
+        self,
+        mapper=no_default,
+        *,
+        index=no_default,
+        columns=no_default,
+        axis=0,
+        copy=None,
+        inplace=False,
     ):  # noqa: PR01, RT01, D200
         """
         Set the name of the axis for the index or columns.
         """
-        kwargs = {
-            "index": index,
-            "columns": columns,
-            "axis": axis,
-            "copy": copy,
-        }
-        if inplace is not None:
-            kwargs["inplace"] = inplace
-        else:
-            inplace = False
-        axes, kwargs = getattr(
-            pandas, type(self).__name__
-        )()._construct_axes_from_arguments((), kwargs, sentinel=sentinel)
+        axes = {"index": index, "columns": columns}
+
+        if copy is None:
+            copy = True
+
         if axis is not None:
             axis = self._get_axis_number(axis)
-        else:
-            axis = 0
+
         inplace = validate_bool_kwarg(inplace, "inplace")
 
-        if mapper is not None:
+        if mapper is not no_default:
             # Use v0.23 behavior if a scalar or list
             non_mapper = is_scalar(mapper) or (
                 is_list_like(mapper) and not is_dict_like(mapper)
@@ -2227,11 +2229,10 @@ def rename_axis(
             # Use new behavior.  Means that index and/or columns is specified
             result = self if inplace else self.copy(deep=copy)
 
-            for axis in axes:
-                if axes[axis] is None:
+            for axis in range(self._AXIS_LEN):
+                v = axes.get(self._get_axis_name(axis))
+                if v is no_default:
                     continue
-                v = axes[axis]
-                axis = self._get_axis_number(axis)
                 non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v))
                 if non_mapper:
                     newnames = v
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 58428cbaf3f..303cc900048 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -113,6 +113,8 @@ class DataFrame(BasePandasDataset):
     ``pd.read_csv``).
     """
 
+    _AXIS_ORDERS = ["index", "columns"]
+    _AXIS_LEN = len(_AXIS_ORDERS)
     _pandas_class = pandas.DataFrame
 
     def __init__(
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 5568b79c346..e2b1f6c5d72 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -78,6 +78,8 @@ class Series(BasePandasDataset):
         A query compiler object to create the Series from.
     """
 
+    _AXIS_ORDERS = ["index"]
+    _AXIS_LEN = len(_AXIS_ORDERS)
     _pandas_class = pandas.Series
     __array_priority__ = pandas.Series.__array_priority__
 
diff --git a/modin/pandas/test/test_api.py b/modin/pandas/test/test_api.py
index 343da0be56b..c0b25f3f486 100644
--- a/modin/pandas/test/test_api.py
+++ b/modin/pandas/test/test_api.py
@@ -162,8 +162,6 @@ def test_dataframe_api_equality():
 
     # These have to be checked manually
     allowed_different = ["to_hdf", "hist"]
-    # skip verifying .rename_axis() due to https://github.com/modin-project/modin/issues/5077
-    allowed_different.append("rename_axis")
     difference = []
 
     # Check that we don't have extra params

From 28d64400713ce2323701febc217fa19d51c04dab Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 14:39:29 +0200
Subject: [PATCH 044/176] update 'drop'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py                             | 11 ++++++-----
 modin/pandas/test/dataframe/test_map_metadata.py |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index cdc21443a97..ae35269ccc0 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1235,6 +1235,7 @@ def diff(self, periods=1, axis=0):  # noqa: PR01, RT01, D200
     def drop(
         self,
         labels=None,
+        *,
         axis=0,
         index=None,
         columns=None,
@@ -1262,12 +1263,12 @@ def drop(
         if labels is not None:
             if index is not None or columns is not None:
                 raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
-            axis = pandas.DataFrame()._get_axis_name(axis)
-            axes = {axis: labels}
+            axis_name = self._get_axis_name(axis)
+            axes = {axis_name: labels}
         elif index is not None or columns is not None:
-            axes, _ = pandas.DataFrame()._construct_axes_from_arguments(
-                (index, columns), {}
-            )
+            axes = {"index": index}
+            if self.ndim == 2:
+                axes["columns"] = columns
         else:
             raise ValueError(
                 "Need to specify at least one of 'labels', 'index' or 'columns'"
diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py
index 6c528e0d87a..b9524e87f81 100644
--- a/modin/pandas/test/dataframe/test_map_metadata.py
+++ b/modin/pandas/test/dataframe/test_map_metadata.py
@@ -689,9 +689,9 @@ def test_drop():
     df_equals(modin_simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :])
 
     pytest.raises(KeyError, modin_simple.drop, 5)
-    pytest.raises(KeyError, modin_simple.drop, "C", 1)
+    pytest.raises(KeyError, modin_simple.drop, "C", axis=1)
     pytest.raises(KeyError, modin_simple.drop, [1, 5])
-    pytest.raises(KeyError, modin_simple.drop, ["A", "C"], 1)
+    pytest.raises(KeyError, modin_simple.drop, ["A", "C"], axis=1)
 
     # errors = 'ignore'
     df_equals(modin_simple.drop(5, errors="ignore"), simple)

From 4344caaa5bc12a483f8173d72d3bc0cc41bb729a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 18:15:17 +0200
Subject: [PATCH 045/176] remove 'resample_pad'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py   | 2 --
 modin/core/storage_formats/pandas/query_compiler.py | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 9dbaf95c785..b6361367a27 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -4199,8 +4199,6 @@ def resample_count(self, resample_kwargs):
             self, resample_kwargs
         )
 
-    # FIXME: `resample_ffill` is an alias for `resample_pad`, one of these method
-    # should be removed (Modin issue #3107).
     @doc_utils.doc_resample_fillna(method="forward-fill", refer_to="ffill")
     def resample_ffill(self, resample_kwargs, limit):
         return ResampleDefault.register(pandas.core.resample.Resampler.ffill)(
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index ccfc2585022..48de8303d7c 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1047,9 +1047,6 @@ def resample_ffill(self, resample_kwargs, limit):
     def resample_bfill(self, resample_kwargs, limit):
         return self._resample_func(resample_kwargs, "bfill", limit=limit)
 
-    def resample_pad(self, resample_kwargs, limit):
-        return self._resample_func(resample_kwargs, "pad", limit=limit)
-
     def resample_nearest(self, resample_kwargs, limit):
         return self._resample_func(resample_kwargs, "nearest", limit=limit)
 

From 172a1d836a806ee5ce37a1cc826799433628bf6b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 18:21:03 +0200
Subject: [PATCH 046/176] disable 'exercise_3' notebook

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../jupyter/execution/pandas_on_dask/test/test_notebooks.py    | 3 ++-
 .../jupyter/execution/pandas_on_ray/test/test_notebooks.py     | 3 ++-
 .../jupyter/execution/pandas_on_unidist/test/test_notebooks.py | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py b/examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py
index 1d723e0c6b5..9097a03c34e 100644
--- a/examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py
+++ b/examples/tutorial/jupyter/execution/pandas_on_dask/test/test_notebooks.py
@@ -95,7 +95,8 @@ def sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):
     _replace_str(nb, "modin_mad_custom = ...", user_mad_implementation)
 
     nbformat.write(nb, modified_notebook_path)
-    _execute_notebook(modified_notebook_path)
+    # need to update example, `.mad` doesn't exist
+    # _execute_notebook(modified_notebook_path)
 
 
 # this notebook works "as is" but for testing purposes we can use smaller dataset
diff --git a/examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py b/examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py
index 1504143e486..fc9b6750b49 100644
--- a/examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py
+++ b/examples/tutorial/jupyter/execution/pandas_on_ray/test/test_notebooks.py
@@ -99,7 +99,8 @@ def sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):
     _replace_str(nb, "modin_mad_custom = ...", user_mad_implementation)
 
     nbformat.write(nb, modified_notebook_path)
-    _execute_notebook(modified_notebook_path)
+    # need to update example, `.mad` doesn't exist
+    # _execute_notebook(modified_notebook_path)
 
 
 # this notebook works "as is" but for testing purposes we can use smaller dataset
diff --git a/examples/tutorial/jupyter/execution/pandas_on_unidist/test/test_notebooks.py b/examples/tutorial/jupyter/execution/pandas_on_unidist/test/test_notebooks.py
index e36355689d2..b21d58ce804 100644
--- a/examples/tutorial/jupyter/execution/pandas_on_unidist/test/test_notebooks.py
+++ b/examples/tutorial/jupyter/execution/pandas_on_unidist/test/test_notebooks.py
@@ -101,7 +101,8 @@ def sq_mad_func(self, axis=None, skipna=True, level=None, **kwargs):
     _replace_str(nb, "modin_mad_custom = ...", user_mad_implementation)
 
     nbformat.write(nb, modified_notebook_path)
-    _execute_notebook(modified_notebook_path)
+    # need to update example, `.mad` doesn't exist
+    # _execute_notebook(modified_notebook_path)
 
 
 # this notebook works "as is" but for testing purposes we can use smaller dataset

From 739331c77938cedc98867ccb09d7c423e145d40b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 18:53:53 +0200
Subject: [PATCH 047/176] remove 'warn_bad_lines', 'error_bad_lines' for hdk
 and tests

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../native/implementations/hdk_on_native/io/io.py | 10 ----------
 modin/experimental/pandas/io.py                   |  2 --
 modin/pandas/test/test_io.py                      | 15 ++-------------
 3 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py
index 86a3fb5a51f..73a82f1f3d7 100644
--- a/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py
+++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py
@@ -93,8 +93,6 @@ class HdkOnNativeIO(BaseIO, TextFileDispatcher):
         "dialect",
         "quoting",
         "comment",
-        "warn_bad_lines",
-        "error_bad_lines",
         "on_bad_lines",
         "low_memory",
         "memory_map",
@@ -484,8 +482,6 @@ def _validate_read_csv_kwargs(
         delimiter = read_csv_kwargs["delimiter"]
         sep = read_csv_kwargs["sep"]
         on_bad_lines = read_csv_kwargs["on_bad_lines"]
-        error_bad_lines = read_csv_kwargs["error_bad_lines"]
-        warn_bad_lines = read_csv_kwargs["warn_bad_lines"]
         delim_whitespace = read_csv_kwargs["delim_whitespace"]
 
         if delimiter and (sep is not lib.no_default):
@@ -502,12 +498,6 @@ def _validate_read_csv_kwargs(
                 "Specified a delimiter with both sep and "
                 + "delim_whitespace=True; you can only specify one."
             )
-        if on_bad_lines is not None:
-            if error_bad_lines is not None or warn_bad_lines is not None:
-                raise ValueError(
-                    "Both on_bad_lines and error_bad_lines/warn_bad_lines are set. "
-                    + "Please only set on_bad_lines."
-                )
 
         if on_bad_lines not in ["error", "warn", "skip", None]:
             raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines.")
diff --git a/modin/experimental/pandas/io.py b/modin/experimental/pandas/io.py
index af1caaeed31..d7b7f3d7237 100644
--- a/modin/experimental/pandas/io.py
+++ b/modin/experimental/pandas/io.py
@@ -212,8 +212,6 @@ def parser_func(
         encoding=None,
         encoding_errors="strict",
         dialect=None,
-        error_bad_lines=None,
-        warn_bad_lines=None,
         on_bad_lines=None,
         skipfooter=0,
         doublequote=True,
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 79561b18da5..e104b57949e 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -794,20 +794,11 @@ def test_read_csv_quoting(
             )
 
     # Error Handling parameters tests
-    @pytest.mark.parametrize("warn_bad_lines", [True, False, None])
-    @pytest.mark.parametrize("error_bad_lines", [True, False, None])
     @pytest.mark.parametrize("on_bad_lines", ["error", "warn", "skip", None])
-    def test_read_csv_error_handling(
-        self,
-        warn_bad_lines,
-        error_bad_lines,
-        on_bad_lines,
-    ):
+    def test_read_csv_error_handling(self, on_bad_lines):
         # in that case exceptions are raised both by Modin and pandas
         # and tests pass
-        raise_exception_case = on_bad_lines is not None and (
-            error_bad_lines is not None or warn_bad_lines is not None
-        )
+        raise_exception_case = on_bad_lines is not None
         if (
             not raise_exception_case
             and Engine.get() not in ["Python", "Cloudpython"]
@@ -818,8 +809,6 @@ def test_read_csv_error_handling(
             fn_name="read_csv",
             # read_csv kwargs
             filepath_or_buffer=pytest.csvs_names["test_read_csv_bad_lines"],
-            warn_bad_lines=warn_bad_lines,
-            error_bad_lines=error_bad_lines,
             on_bad_lines=on_bad_lines,
         )
 

From d752d59b0f1ec45f8865345eebe45d712adf185b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 19:35:36 +0200
Subject: [PATCH 048/176] fix TestCsv tests

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/experimental/pandas/io.py |  5 ----
 modin/pandas/base.py            |  2 +-
 modin/pandas/io.py              |  9 ------
 modin/pandas/test/test_io.py    | 50 ++-------------------------------
 4 files changed, 3 insertions(+), 63 deletions(-)

diff --git a/modin/experimental/pandas/io.py b/modin/experimental/pandas/io.py
index d7b7f3d7237..921ec8ddd11 100644
--- a/modin/experimental/pandas/io.py
+++ b/modin/experimental/pandas/io.py
@@ -177,9 +177,7 @@ def parser_func(
         names=lib.no_default,
         index_col=None,
         usecols=None,
-        squeeze=False,
         prefix=lib.no_default,
-        mangle_dupe_cols=True,
         dtype=None,
         engine=None,
         converters=None,
@@ -228,9 +226,6 @@ def parser_func(
         _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
         if f_locals.get("sep", sep) is False:
             f_locals["sep"] = "\t"
-        # mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
-        # kwargs so pandas doesn't spuriously warn people not to use it.
-        f_locals.pop("mangle_dupe_cols", None)
 
         kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
         return _read(**kwargs)
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index ae35269ccc0..e947fcfaa73 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1674,7 +1674,7 @@ def idxmin(self, axis=0, skipna=True, numeric_only=False):  # noqa: PR01, RT01,
             )
         )
 
-    def infer_objects(self, copy=None):  # noqa: RT01, D200
+    def infer_objects(self, copy=None):  # noqa: PR01, RT01, D200
         """
         Attempt to infer better dtypes for object columns.
         """
diff --git a/modin/pandas/io.py b/modin/pandas/io.py
index c3055e6cf3d..d92c06c85dd 100644
--- a/modin/pandas/io.py
+++ b/modin/pandas/io.py
@@ -190,9 +190,6 @@ def read_csv(
         val.name for val in inspect.signature(pandas.read_csv).parameters.values()
     }
     _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
-    # mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
-    # kwargs so pandas doesn't spuriously warn people not to use it.
-    f_locals.pop("mangle_dupe_cols", None)
     kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
     return _read(**kwargs)
 
@@ -265,9 +262,6 @@ def read_table(
     _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
     if f_locals.get("sep", sep) is False or f_locals.get("sep", sep) is no_default:
         f_locals["sep"] = "\t"
-    # mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
-    # kwargs so pandas doesn't spuriously warn people not to use it.
-    f_locals.pop("mangle_dupe_cols", None)
     kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_table_signature}
     return _read(**kwargs)
 
@@ -438,9 +432,6 @@ def read_excel(
     dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
 ) -> DataFrame | dict[IntStrT, DataFrame]:
     _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
-    # mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
-    # kwargs so pandas doesn't spuriously warn people not to use it.
-    kwargs.pop("mangle_dupe_cols", None)
 
     from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
 
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index e104b57949e..ef0d662dc07 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -264,7 +264,6 @@ def test_read_csv_delimiters(
     # Column and Index Locations and Names tests
     @pytest.mark.parametrize("header", ["infer", None, 0])
     @pytest.mark.parametrize("index_col", [None, "col1"])
-    @pytest.mark.parametrize("prefix", [None, "_", "col"])
     @pytest.mark.parametrize(
         "names", [lib.no_default, ["col1"], ["c1", "c2", "c3", "c4", "c5", "c6", "c7"]]
     )
@@ -276,7 +275,6 @@ def test_read_csv_col_handling(
         self,
         header,
         index_col,
-        prefix,
         names,
         usecols,
         skip_blank_lines,
@@ -293,7 +291,6 @@ def test_read_csv_col_handling(
             filepath_or_buffer=pytest.csvs_names["test_read_csv_blank_lines"],
             header=header,
             index_col=index_col,
-            prefix=prefix,
             names=names,
             usecols=usecols,
             skip_blank_lines=skip_blank_lines,
@@ -460,50 +457,6 @@ def test_read_csv_skipinitialspace(self):
 
             eval_io_from_str(str_initial_spaces, unique_filename, skipinitialspace=True)
 
-    @pytest.mark.parametrize(
-        "test_case",
-        ["single_element", "single_column", "multiple_columns"],
-    )
-    def test_read_csv_squeeze(self, request, test_case):
-        if request.config.getoption("--simulate-cloud").lower() != "off":
-            pytest.xfail(
-                reason="Error EOFError: stream has been closed in `modin in the cloud` mode - issue #3329"
-            )
-        with ensure_clean(".csv") as unique_filename:
-            str_single_element = "1"
-            str_single_col = "1\n2\n3\n"
-            str_four_cols = "1, 2, 3, 4\n5, 6, 7, 8\n9, 10, 11, 12\n"
-            case_to_data = {
-                "single_element": str_single_element,
-                "single_column": str_single_col,
-                "multiple_columns": str_four_cols,
-            }
-
-            eval_io_from_str(case_to_data[test_case], unique_filename, squeeze=True)
-            eval_io_from_str(
-                case_to_data[test_case], unique_filename, header=None, squeeze=True
-            )
-
-    def test_read_csv_mangle_dupe_cols(self):
-        with ensure_clean() as unique_filename, pytest.warns(
-            FutureWarning, match="'mangle_dupe_cols' keyword is deprecated"
-        ):
-            str_non_unique_cols = "col,col,col,col\n5, 6, 7, 8\n9, 10, 11, 12\n"
-            eval_io_from_str(
-                str_non_unique_cols, unique_filename, mangle_dupe_cols=True
-            )
-
-    # Putting this filterwarnings in setup.cfg doesn't seem to catch the error.
-    @pytest.mark.filterwarnings(
-        "error:.*'mangle_dupe_cols' keyword is deprecated:FutureWarning"
-    )
-    def test_read_csv_does_not_warn_mangle_dupe_cols_kwarg(self):
-        with ensure_clean() as unique_filename:
-            eval_io_from_str(
-                "a,b,c\n1,2,3\n",
-                unique_filename,
-            )
-
     # NA and Missing Data Handling tests
     @pytest.mark.parametrize("na_values", ["custom_nan", "73"])
     @pytest.mark.parametrize("keep_default_na", [True, False])
@@ -536,7 +489,8 @@ def test_read_csv_nans_handling(
     @pytest.mark.parametrize("infer_datetime_format", [True, False])
     @pytest.mark.parametrize("keep_date_col", [True, False])
     @pytest.mark.parametrize(
-        "date_parser", [None, lambda x: pandas.to_datetime(x, format="%Y-%m-%d")]
+        "date_parser",
+        [lib.no_default, lambda x: pandas.to_datetime(x, format="%Y-%m-%d")],
     )
     @pytest.mark.parametrize("dayfirst", [True, False])
     @pytest.mark.parametrize("cache_dates", [True, False])

From 27dae06b9581a6a87d30e357df29bce894b01782 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 20:10:34 +0200
Subject: [PATCH 049/176] fix some parquet cases

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/column_stores/parquet_dispatcher.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modin/core/io/column_stores/parquet_dispatcher.py b/modin/core/io/column_stores/parquet_dispatcher.py
index 8cb67f48c10..ed6f18c0cca 100644
--- a/modin/core/io/column_stores/parquet_dispatcher.py
+++ b/modin/core/io/column_stores/parquet_dispatcher.py
@@ -613,7 +613,10 @@ def _read(cls, path, engine, columns, **kwargs):
         ParquetFile API is used. Please refer to the documentation here
         https://arrow.apache.org/docs/python/parquet.html
         """
-        if any(arg not in ("storage_options", "use_nullable_dtypes") for arg in kwargs):
+        if any(
+            arg not in ("storage_options", "use_nullable_dtypes", "dtype_backend")
+            for arg in kwargs
+        ):
             return cls.single_worker_read(
                 path,
                 engine=engine,

From e13a13deae855f4810461c33c5afa0bb82eb0a43 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 23:43:46 +0200
Subject: [PATCH 050/176] fix 'drop' test; remove 'Series__array_wrap__'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py                          | 24 -------------------
 modin/pandas/dataframe.py                     | 24 +++++++++++++++++++
 .../test/dataframe/test_map_metadata.py       |  2 +-
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index e947fcfaa73..f5bfa62abe3 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3441,30 +3441,6 @@ def __array__(self, dtype=None):
         arr = self.to_numpy(dtype)
         return arr
 
-    def __array_wrap__(self, result, context=None):
-        """
-        Get called after a ufunc and other functions.
-
-        Parameters
-        ----------
-        result : np.ndarray
-            The result of the ufunc or other function called on the NumPy array
-            returned by __array__.
-        context : tuple of (func, tuple, int), optional
-            This parameter is returned by ufuncs as a 3-element tuple: (name of the
-            ufunc, arguments of the ufunc, domain of the ufunc), but is not set by
-            other NumPy functions.
-
-        Returns
-        -------
-        BasePandasDataset
-            Wrapped Modin object.
-        """
-        # TODO: This is very inefficient. __array__ and as_matrix have been
-        # changed to call the more efficient to_numpy, but this has been left
-        # unchanged since we are not sure of its purpose.
-        return self._default_to_pandas("__array_wrap__", result, context=context)
-
     def __copy__(self, deep=True):
         """
         Return the copy of the `BasePandasDataset`.
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 303cc900048..b623845fbc5 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1947,6 +1947,30 @@ def is_dtype_instance_mapper(column, dtype):
         ]
         return self.drop(columns=self.columns[indicate], inplace=False)
 
+    def __array_wrap__(self, result, context=None):
+        """
+        Get called after a ufunc and other functions.
+
+        Parameters
+        ----------
+        result : np.ndarray
+            The result of the ufunc or other function called on the NumPy array
+            returned by __array__.
+        context : tuple of (func, tuple, int), optional
+            This parameter is returned by ufuncs as a 3-element tuple: (name of the
+            ufunc, arguments of the ufunc, domain of the ufunc), but is not set by
+            other NumPy functions.
+
+        Returns
+        -------
+        BasePandasDataset
+            Wrapped Modin object.
+        """
+        # TODO: This is very inefficient. __array__ and as_matrix have been
+        # changed to call the more efficient to_numpy, but this has been left
+        # unchanged since we are not sure of its purpose.
+        return self._default_to_pandas("__array_wrap__", result, context=context)
+
     def set_index(
         self, keys, drop=True, append=False, inplace=False, verify_integrity=False
     ):  # noqa: PR01, RT01, D200
diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py
index b9524e87f81..12d8f8b9f7d 100644
--- a/modin/pandas/test/dataframe/test_map_metadata.py
+++ b/modin/pandas/test/dataframe/test_map_metadata.py
@@ -755,7 +755,7 @@ def test_drop_api_equivalence():
     modin_df2 = modin_df.drop(index="a")
     df_equals(modin_df1, modin_df2)
 
-    modin_df1 = modin_df.drop("d", 1)
+    modin_df1 = modin_df.drop("d", axis=1)
     modin_df2 = modin_df.drop(columns="d")
     df_equals(modin_df1, modin_df2)
 

From 498e5eb9042891b4b7ca53357beab35aa28c7746 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 14 Apr 2023 23:49:30 +0200
Subject: [PATCH 051/176] fix 'test_get'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index f5bfa62abe3..87fd9439431 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3529,9 +3529,7 @@ def __getitem__(self, key):
         # see if we can slice the rows
         # This lets us reuse code in pandas to error check
         indexer = None
-        if isinstance(key, slice) or (
-            isinstance(key, str) and (not self._is_dataframe or key not in self.columns)
-        ):
+        if isinstance(key, slice):
             indexer = self.index._convert_slice_indexer(key, kind="getitem")
         if indexer is not None:
             return self._getitem_slice(indexer)

From bbbf82ea90fa8d03352761811d7b235e13f0b86d Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 00:00:40 +0200
Subject: [PATCH 052/176] fix some docs

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index b6361367a27..5ec76d7e145 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -1603,7 +1603,7 @@ def astype(self, col_dtypes, errors: str = "raise"):  # noqa: PR02
             self, dtype=col_dtypes, errors=errors
         )
 
-    def infer_objects(self, copy):
+    def infer_objects(self, copy=None):
         """
         Attempt to infer better dtypes for object columns.
 
@@ -1611,6 +1611,11 @@ def infer_objects(self, copy):
         and unconvertible columns unchanged. The inference rules are the same
         as during normal Series/DataFrame construction.
 
+        Parameters
+        ----------
+        copy : bool, optional
+            Whether to make a copy for non-object or non-inferrable columns or Series.
+
         Returns
         -------
         BaseQueryCompiler
@@ -1644,10 +1649,10 @@ def convert_dtypes(
             Whether, if possible, conversion can be done to floating extension types.
             If `convert_integer` is also True, preference will be give to integer dtypes
             if the floats can be faithfully casted to integers.
-        dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable"
+        dtype_backend : {"numpy_nullable", "pyarrow"}, default: "numpy_nullable"
             Which dtype_backend to use, e.g. whether a DataFrame should use nullable
             dtypes for all dtypes that have a nullable
-            implementation when "numpy_nullable" is set, pyarrow is used for all
+            implementation when "numpy_nullable" is set, PyArrow is used for all
             dtypes if "pyarrow" is set.
 
         Returns

From b300345558c61184a271a2f43b17da7cb475d722 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 00:10:57 +0200
Subject: [PATCH 053/176] fix 'test_internals.py'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/test/storage_formats/pandas/test_internals.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/test/storage_formats/pandas/test_internals.py b/modin/test/storage_formats/pandas/test_internals.py
index b3494f1b3a9..3319f0cb3f8 100644
--- a/modin/test/storage_formats/pandas/test_internals.py
+++ b/modin/test/storage_formats/pandas/test_internals.py
@@ -148,8 +148,8 @@ def test_aligning_blocks_with_duplicated_index():
     data21 = [0]
     data22 = [1, 2, 3]
 
-    df1 = pd.DataFrame(data11).append(pd.DataFrame(data12))
-    df2 = pd.DataFrame(data21).append(pd.DataFrame(data22))
+    df1 = pd.concat((pd.DataFrame(data11), pd.DataFrame(data12)))
+    df2 = pd.concat((pd.DataFrame(data21), pd.DataFrame(data22)))
 
     repr(df1 - df2)
 

From e4ed3f84900a4d523b89682dd0c8be4c12239da6 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 00:12:02 +0200
Subject: [PATCH 054/176] fix 'time_drop'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 asv_bench/benchmarks/benchmarks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/benchmarks.py b/asv_bench/benchmarks/benchmarks.py
index 7390ecb7ebd..aaae2b00cd4 100644
--- a/asv_bench/benchmarks/benchmarks.py
+++ b/asv_bench/benchmarks/benchmarks.py
@@ -471,7 +471,7 @@ def setup(self, shape, axis, drop_ncols):
         self.labels = self.df.axes[axis][:drop_count]
 
     def time_drop(self, shape, axis, drop_ncols):
-        execute(self.df.drop(self.labels, axis))
+        execute(self.df.drop(self.labels, axis=axis))
 
 
 class TimeHead:

From 24174aa4c1966552f31914875a9be19337dba6b7 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 00:39:09 +0200
Subject: [PATCH 055/176] fix 'test_binary.py'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/dataframe/test_binary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/test/dataframe/test_binary.py b/modin/pandas/test/dataframe/test_binary.py
index dab1700f787..f6952fddfe8 100644
--- a/modin/pandas/test/dataframe/test_binary.py
+++ b/modin/pandas/test/dataframe/test_binary.py
@@ -260,7 +260,7 @@ def test_mismatched_row_partitions(is_idx_aligned, op_type, is_more_other_partit
     modin_df, pandas_df = modin_df1.loc[:2], pandas_df1.loc[:2]
 
     modin_df2 = pd.concat((modin_df, modin_df))
-    pandas_df2 = pd.concat((pandas_df, pandas_df))
+    pandas_df2 = pandas.concat((pandas_df, pandas_df))
     if is_more_other_partitions:
         modin_df2, modin_df1 = modin_df1, modin_df2
         pandas_df2, pandas_df1 = pandas_df1, pandas_df2

From cc78e9e96ede5f2e252f5c2975c5c38c5a8c315a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 01:24:24 +0200
Subject: [PATCH 056/176] remove 'lookup' op, remove 'base' param for resample,
 remove 'null_count' param for info in tests

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py                        |  2 +-
 modin/pandas/test/dataframe/test_default.py | 16 +++++++---------
 modin/pandas/test/test_series.py            |  4 ++--
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 87fd9439431..6f65eff90e2 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3350,7 +3350,7 @@ def tz_localize(
             )
             .index
         )
-        return self.set_axis(new_labels, axis, copy=copy)
+        return self.set_axis(new_labels, axis=axis, copy=copy)
 
     # TODO: uncomment the following lines when #3331 issue will be closed
     # @prepend_to_notes(
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index b9640b9bed5..a20ba76336e 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -70,7 +70,6 @@
         ("from_records", lambda df: {"data": to_pandas(df)}),
         ("hist", lambda df: {"column": "int_col"}),
         ("interpolate", None),
-        ("lookup", lambda df: {"row_labels": [0], "col_labels": ["int_col"]}),
         ("mask", lambda df: {"cond": df != 0}),
         ("pct_change", None),
         # ("to_xarray", None),
@@ -371,7 +370,6 @@ def test_info_default_param(data):
             verbose=None,
             max_cols=None,
             memory_usage=None,
-            null_counts=None,
             operation=lambda df, **kwargs: df.info(**kwargs),
             buf=lambda df: second if isinstance(df, pandas.DataFrame) else first,
         )
@@ -390,8 +388,8 @@ def test_info_default_param(data):
 @pytest.mark.parametrize("verbose", [True, False])
 @pytest.mark.parametrize("max_cols", [10, 99999999])
 @pytest.mark.parametrize("memory_usage", [True, False, "deep"])
-@pytest.mark.parametrize("null_counts", [True, False])
-def test_info(data, verbose, max_cols, memory_usage, null_counts):
+@pytest.mark.parametrize("show_counts", [True, False])
+def atest_info(data, verbose, max_cols, memory_usage, show_counts):
     with io.StringIO() as first, io.StringIO() as second:
         eval_general(
             pd.DataFrame(data),
@@ -400,7 +398,7 @@ def test_info(data, verbose, max_cols, memory_usage, null_counts):
             verbose=verbose,
             max_cols=max_cols,
             memory_usage=memory_usage,
-            null_counts=null_counts,
+            show_counts=show_counts,
             buf=lambda df: second if isinstance(df, pandas.DataFrame) else first,
         )
         modin_info = first.getvalue().splitlines()
@@ -665,9 +663,9 @@ def test_resampler(rule, axis):
         test_data_resample["data"],
         test_data_resample["index"],
     )
-    modin_resampler = pd.DataFrame(data, index=index).resample(rule, axis=axis, base=2)
+    modin_resampler = pd.DataFrame(data, index=index).resample(rule, axis=axis)
     pandas_resampler = pandas.DataFrame(data, index=index).resample(
-        rule, axis=axis, base=2
+        rule, axis=axis
     )
 
     assert pandas_resampler.indices == modin_resampler.indices
@@ -700,7 +698,7 @@ def test_resampler_functions(rule, axis, method):
     eval_general(
         modin_df,
         pandas_df,
-        lambda df: getattr(df.resample(rule, axis=axis, base=2), method)(),
+        lambda df: getattr(df.resample(rule, axis=axis), method)(),
     )
 
 
@@ -728,7 +726,7 @@ def test_resampler_functions_with_arg(rule, axis, method_arg):
     eval_general(
         modin_df,
         pandas_df,
-        lambda df: getattr(df.resample(rule, axis=axis, base=2), method)(arg),
+        lambda df: getattr(df.resample(rule, axis=axis), method)(arg),
     )
 
 
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index ce53f761229..fea7ce2bfdd 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -2865,10 +2865,10 @@ def test_resample(closed, label, level):
         pandas_series.index = index
         modin_series.index = index
     pandas_resampler = pandas_series.resample(
-        rule, closed=closed, label=label, base=base, level=level
+        rule, closed=closed, label=label, level=level
     )
     modin_resampler = modin_series.resample(
-        rule, closed=closed, label=label, base=base, level=level
+        rule, closed=closed, label=label, level=level
     )
 
     df_equals(modin_resampler.count(), pandas_resampler.count())

From 23e644dff04eb1c622f3938da1fd8eaf60ebe8c8 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 01:35:14 +0200
Subject: [PATCH 057/176] remove 'inplace' parameter for 'as_ordered',
 'as_unordered', 'reorder_categories', 'rename_categories'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/series_utils.py                | 15 +++++-----
 modin/pandas/test/dataframe/test_default.py |  6 ++--
 modin/pandas/test/test_series.py            | 33 ++++++++-------------
 3 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index 10f4511cca7..2ea8d5a5a4d 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -61,17 +61,16 @@ def ordered(self):
     def codes(self):
         return Series(query_compiler=self._query_compiler.cat_codes())
 
-    def rename_categories(self, new_categories, inplace=False):
+    def rename_categories(self, new_categories):
         return self._default_to_pandas(
-            pandas.Series.cat.rename_categories, new_categories, inplace=inplace
+            pandas.Series.cat.rename_categories, new_categories
         )
 
-    def reorder_categories(self, new_categories, ordered=None, inplace=False):
+    def reorder_categories(self, new_categories, ordered=None):
         return self._default_to_pandas(
             pandas.Series.cat.reorder_categories,
             new_categories,
             ordered=ordered,
-            inplace=inplace,
         )
 
     def add_categories(self, new_categories, inplace=False):
@@ -98,11 +97,11 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
             inplace=inplace,
         )
 
-    def as_ordered(self, inplace=False):
-        return self._default_to_pandas(pandas.Series.cat.as_ordered, inplace=inplace)
+    def as_ordered(self, *args, **kwargs):
+        return self._default_to_pandas(pandas.Series.cat.as_ordered, *args, **kwargs)
 
-    def as_unordered(self, inplace=False):
-        return self._default_to_pandas(pandas.Series.cat.as_unordered, inplace=inplace)
+    def as_unordered(self, *args, **kwargs):
+        return self._default_to_pandas(pandas.Series.cat.as_unordered, *args, **kwargs)
 
     def _default_to_pandas(self, op, *args, **kwargs):
         """
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index a20ba76336e..4aebba77655 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -389,7 +389,7 @@ def test_info_default_param(data):
 @pytest.mark.parametrize("max_cols", [10, 99999999])
 @pytest.mark.parametrize("memory_usage", [True, False, "deep"])
 @pytest.mark.parametrize("show_counts", [True, False])
-def atest_info(data, verbose, max_cols, memory_usage, show_counts):
+def test_info(data, verbose, max_cols, memory_usage, show_counts):
     with io.StringIO() as first, io.StringIO() as second:
         eval_general(
             pd.DataFrame(data),
@@ -664,9 +664,7 @@ def test_resampler(rule, axis):
         test_data_resample["index"],
     )
     modin_resampler = pd.DataFrame(data, index=index).resample(rule, axis=axis)
-    pandas_resampler = pandas.DataFrame(data, index=index).resample(
-        rule, axis=axis
-    )
+    pandas_resampler = pandas.DataFrame(data, index=index).resample(rule, axis=axis)
 
     assert pandas_resampler.indices == modin_resampler.indices
     assert pandas_resampler.groups == modin_resampler.groups
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index fea7ce2bfdd..38f98c73b3b 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -2852,7 +2852,6 @@ def test_replace():
 def test_resample(closed, label, level):
     rule = "5T"
     freq = "H"
-    base = 2
 
     index = pandas.date_range("1/1/2000", periods=12, freq=freq)
     pandas_series = pandas.Series(range(12), index=index)
@@ -4574,11 +4573,10 @@ def test_cat_codes_issue5650(set_min_partition_size):
 @pytest.mark.parametrize(
     "data", test_data_categorical_values, ids=test_data_categorical_keys
 )
-@pytest.mark.parametrize("inplace", [True, False])
-def test_cat_rename_categories(data, inplace):
+def test_cat_rename_categories(data):
     modin_series, pandas_series = create_test_series(data.copy())
-    pandas_result = pandas_series.cat.rename_categories(list("qwert"), inplace=inplace)
-    modin_result = modin_series.cat.rename_categories(list("qwert"), inplace=inplace)
+    pandas_result = pandas_series.cat.rename_categories(list("qwert"))
+    modin_result = modin_series.cat.rename_categories(list("qwert"))
     df_equals(modin_series, pandas_series)
     df_equals(modin_result, pandas_result)
 
@@ -4587,15 +4585,10 @@ def test_cat_rename_categories(data, inplace):
     "data", test_data_categorical_values, ids=test_data_categorical_keys
 )
 @pytest.mark.parametrize("ordered", bool_arg_values, ids=bool_arg_keys)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_cat_reorder_categories(data, ordered, inplace):
+def test_cat_reorder_categories(data, ordered):
     modin_series, pandas_series = create_test_series(data.copy())
-    pandas_result = pandas_series.cat.reorder_categories(
-        list("tades"), ordered=ordered, inplace=inplace
-    )
-    modin_result = modin_series.cat.reorder_categories(
-        list("tades"), ordered=ordered, inplace=inplace
-    )
+    pandas_result = pandas_series.cat.reorder_categories(list("tades"), ordered=ordered)
+    modin_result = modin_series.cat.reorder_categories(list("tades"), ordered=ordered)
     df_equals(modin_series, pandas_series)
     df_equals(modin_result, pandas_result)
 
@@ -4659,11 +4652,10 @@ def test_cat_set_categories(data, ordered, rename, inplace):
 @pytest.mark.parametrize(
     "data", test_data_categorical_values, ids=test_data_categorical_keys
 )
-@pytest.mark.parametrize("inplace", [True, False])
-def test_cat_as_ordered(data, inplace):
+def test_cat_as_ordered(data):
     modin_series, pandas_series = create_test_series(data.copy())
-    pandas_result = pandas_series.cat.as_ordered(inplace=inplace)
-    modin_result = modin_series.cat.as_ordered(inplace=inplace)
+    pandas_result = pandas_series.cat.as_ordered()
+    modin_result = modin_series.cat.as_ordered()
     df_equals(modin_series, pandas_series)
     df_equals(modin_result, pandas_result)
 
@@ -4671,11 +4663,10 @@ def test_cat_as_ordered(data, inplace):
 @pytest.mark.parametrize(
     "data", test_data_categorical_values, ids=test_data_categorical_keys
 )
-@pytest.mark.parametrize("inplace", [True, False])
-def test_cat_as_unordered(data, inplace):
+def test_cat_as_unordered(data):
     modin_series, pandas_series = create_test_series(data.copy())
-    pandas_result = pandas_series.cat.as_unordered(inplace=inplace)
-    modin_result = modin_series.cat.as_unordered(inplace=inplace)
+    pandas_result = pandas_series.cat.as_unordered()
+    modin_result = modin_series.cat.as_unordered()
     df_equals(modin_series, pandas_series)
     df_equals(modin_result, pandas_result)
 

From 8e6fa4f5cf3ea64e91519f9c0e94900f69436c01 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 01:43:27 +0200
Subject: [PATCH 058/176] more fixes for 'test_groupby.py'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py    | 2 +-
 modin/pandas/groupby.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 6f65eff90e2..398d6568839 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3327,7 +3327,7 @@ def tz_convert(self, tz, axis=0, level=None, copy=None):  # noqa: PR01, RT01, D2
         else:
             new_labels = self.axes[axis].tz_convert(tz)
         obj = self.copy() if copy else self
-        return obj.set_axis(new_labels, axis, copy=copy)
+        return obj.set_axis(new_labels, axis=axis, copy=copy)
 
     def tz_localize(
         self, tz, axis=0, level=None, copy=None, ambiguous="raise", nonexistent="raise"
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 80c4f5fef68..8a1121eb5d0 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1278,9 +1278,7 @@ def _default_to_pandas(self, f, *args, **kwargs):
         by = GroupBy.validate_by(by)
 
         def groupby_on_multiple_columns(df, *args, **kwargs):
-            groupby_obj = df.groupby(
-                by=by, axis=self._axis, squeeze=self._squeeze, **self._kwargs
-            )
+            groupby_obj = df.groupby(by=by, axis=self._axis, **self._kwargs)
 
             if callable(f):
                 return f(groupby_obj, *args, **kwargs)

From 103ca002b92b01ce3f5603490ef10da1aad428ea Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 15:51:10 +0200
Subject: [PATCH 059/176] Disallow passing non-keyword arguments to
 'interpolate'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py | 12 ++++++------
 modin/pandas/dataframe.py                         |  1 +
 modin/pandas/resample.py                          | 13 +++++++------
 modin/pandas/series.py                            |  1 +
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 5ec76d7e145..2acf3572514 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -4284,12 +4284,12 @@ def resample_interpolate(
             self,
             resample_kwargs,
             method,
-            axis,
-            limit,
-            inplace,
-            limit_direction,
-            limit_area,
-            downcast,
+            axis=axis,
+            limit=limit,
+            inplace=inplace,
+            limit_direction=limit_direction,
+            limit_area=limit_area,
+            downcast=downcast,
             **kwargs,
         )
 
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index b623845fbc5..d1b1cc37ace 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1177,6 +1177,7 @@ def insert(
     def interpolate(
         self,
         method="linear",
+        *,
         axis=0,
         limit=None,
         inplace=False,
diff --git a/modin/pandas/resample.py b/modin/pandas/resample.py
index b6956c74446..d0e906ff0cc 100644
--- a/modin/pandas/resample.py
+++ b/modin/pandas/resample.py
@@ -237,6 +237,7 @@ def asfreq(self, fill_value=None):
     def interpolate(
         self,
         method="linear",
+        *,
         axis=0,
         limit=None,
         inplace=False,
@@ -249,12 +250,12 @@ def interpolate(
             query_compiler=self._query_compiler.resample_interpolate(
                 self.resample_kwargs,
                 method,
-                axis,
-                limit,
-                inplace,
-                limit_direction,
-                limit_area,
-                downcast,
+                axis=axis,
+                limit=limit,
+                inplace=inplace,
+                limit_direction=limit_direction,
+                limit_area=limit_area,
+                downcast=downcast,
                 **kwargs,
             )
         )
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index e2b1f6c5d72..280a1bec157 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1108,6 +1108,7 @@ def info(
     def interpolate(
         self,
         method="linear",
+        *,
         axis=0,
         limit=None,
         inplace=False,

From eccb01825e2ccdbf3062117752dec29bc14cffee Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 16:37:48 +0200
Subject: [PATCH 060/176] fix for 'test_indexing'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/dataframe/test_indexing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/test/dataframe/test_indexing.py b/modin/pandas/test/dataframe/test_indexing.py
index 73dc309530a..4365b887a6b 100644
--- a/modin/pandas/test/dataframe/test_indexing.py
+++ b/modin/pandas/test/dataframe/test_indexing.py
@@ -2424,8 +2424,8 @@ def test_index_order():
 
     for func in ["all", "any", "count"]:
         df_equals(
-            getattr(df_modin, func)(level=0).index,
-            getattr(df_pandas, func)(level=0).index,
+            getattr(df_modin, func)().index,
+            getattr(df_pandas, func)().index,
         )
 
 

From bbdf297994e2936b79f217852925a5ad6e202bd3 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 17:41:17 +0200
Subject: [PATCH 061/176] fix 'test_loc_series'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/indexing.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index 34682e6aef0..b0e4f37c569 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -876,6 +876,11 @@ def _set_item_existing_loc(self, row_loc, col_loc, item):
             return
 
         row_lookup, col_lookup = self.qc.get_positions_from_labels(row_loc, col_loc)
+        if isinstance(item, np.ndarray) and is_boolean_array(row_loc):
+            # fix for 'test_loc_series'; np.log(Series) returns nd.array instead
+            # of Series as it was before (`Series.__array_wrap__` is removed)
+            # otherwise incompatible shapes are obtained
+            item = item.take(row_lookup)
         self._setitem_positional(
             row_lookup,
             col_lookup,

From 7932315dfd5ccd7a20d51903ad9a10fa6792d78a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 20:57:35 +0200
Subject: [PATCH 062/176] fixes for 'test_reduce.py'; affects usage of 'level',
 'numeric_only' parameters

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../storage_formats/pandas/query_compiler.py  |  2 +-
 modin/pandas/base.py                          | 62 +++++--------------
 modin/pandas/dataframe.py                     | 14 ++---
 modin/pandas/groupby.py                       | 18 ++----
 modin/pandas/series.py                        |  2 +
 modin/pandas/test/dataframe/test_default.py   | 16 -----
 modin/pandas/test/dataframe/test_reduce.py    | 54 ++--------------
 modin/pandas/test/dataframe/test_window.py    |  2 +-
 modin/pandas/test/test_series.py              | 16 -----
 9 files changed, 33 insertions(+), 153 deletions(-)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 48de8303d7c..bbf00eb44a4 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -3195,7 +3195,7 @@ def compute_groupby(df, drop=False, partition_idx=0):
 
         # that means that exception in `compute_groupby` was raised
         # in every partition, so we also should raise it
-        # TODO: we should be able to drop this logic with pandas 2.0 as it removes `numeric_only=None`
+        # TODO: we should be able to drop this logic with pandas 2.0.0 as it removes `numeric_only=None`
         # parameter for groupby thus making the behavior of processing of non-numeric columns more
         # predictable (we can decide whether to raise an exception before actually executing groupby)
         if len(result.columns) == 0 and len(self.columns) != 0:
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 398d6568839..652e77862d9 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1741,14 +1741,10 @@ def kurt(self, axis=0, skipna=True, numeric_only=False, **kwargs):
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
         axis = self._get_axis_number(axis)
 
-        if numeric_only is not None and not numeric_only:
+        if not numeric_only:
             self._validate_dtypes(numeric_only=True)
 
-        data = (
-            self._get_numeric_data(axis)
-            if numeric_only is None or numeric_only
-            else self
-        )
+        data = self._get_numeric_data(axis) if numeric_only else self
 
         return self._reduce_dimension(
             data._query_compiler.kurt(
@@ -1842,8 +1838,7 @@ def _stat_operation(
         op_name: str,
         axis: Union[int, str],
         skipna: bool,
-        level: Optional[Union[int, str]],
-        numeric_only: Optional[bool] = None,
+        numeric_only: Optional[bool] = False,
         **kwargs,
     ):
         """
@@ -1857,10 +1852,7 @@ def _stat_operation(
             Axis to apply method on.
         skipna : bool
             Exclude NA/null values when computing the result.
-        level : int or str
-            If specified `axis` is a MultiIndex, applying method along a particular
-            level, collapsing into a Series.
-        numeric_only : bool, optional
+        numeric_only : bool, default: False
             Include only float, int, boolean columns. If None, will attempt
             to use everything, then use only numeric data.
         **kwargs : dict
@@ -1876,37 +1868,15 @@ def _stat_operation(
         """
         axis = self._get_axis_number(axis)
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
-        if level is not None:
-            return self._default_to_pandas(
-                op_name,
-                axis=axis,
-                skipna=skipna,
-                level=level,
-                numeric_only=numeric_only,
-                **kwargs,
-            )
-        # If `numeric_only` is None, then we can do this precheck to whether or not
-        # frame contains non-numeric columns, if it doesn't, then we can pass to a query compiler
-        # `numeric_only=False` parameter and make its work easier in that case, rather than
-        # performing under complicate `numeric_only=None` parameter
+
         if not numeric_only:
-            try:
-                self._validate_dtypes(numeric_only=True)
-            except TypeError:
-                if numeric_only is not None:
-                    raise
-            else:
-                numeric_only = False
+            # fix for 'test_reduce_specific'
+            self._validate_dtypes(numeric_only=True)
 
-        data = (
-            self._get_numeric_data(axis)
-            if numeric_only is None or numeric_only
-            else self
-        )
+        data = self._get_numeric_data(axis) if numeric_only else self
         result_qc = getattr(data._query_compiler, op_name)(
             axis=axis,
             skipna=skipna,
-            level=level,
             numeric_only=numeric_only,
             **kwargs,
         )
@@ -2601,7 +2571,7 @@ def sem(
         Return unbiased standard error of the mean over requested axis.
         """
         return self._stat_operation(
-            "sem", axis, skipna, None, numeric_only, ddof=ddof, **kwargs
+            "sem", axis, skipna, numeric_only, ddof=ddof, **kwargs
         )
 
     def mean(
@@ -2614,7 +2584,7 @@ def mean(
         """
         Return the mean of the values over the requested axis.
         """
-        return self._stat_operation("mean", axis, skipna, None, numeric_only, **kwargs)
+        return self._stat_operation("mean", axis, skipna, numeric_only, **kwargs)
 
     def median(
         self,
@@ -2626,9 +2596,7 @@ def median(
         """
         Return the mean of the values over the requested axis.
         """
-        return self._stat_operation(
-            "median", axis, skipna, None, numeric_only, **kwargs
-        )
+        return self._stat_operation("median", axis, skipna, numeric_only, **kwargs)
 
     def set_axis(
         self,
@@ -2753,7 +2721,7 @@ def skew(
         """
         Return unbiased skew over requested axis.
         """
-        return self._stat_operation("skew", axis, skipna, None, numeric_only, **kwargs)
+        return self._stat_operation("skew", axis, skipna, numeric_only, **kwargs)
 
     def sort_index(
         self,
@@ -2839,7 +2807,7 @@ def std(
         Return sample standard deviation over requested axis.
         """
         return self._stat_operation(
-            "std", axis, skipna, None, numeric_only, ddof=ddof, **kwargs
+            "std", axis, skipna, numeric_only, ddof=ddof, **kwargs
         )
 
     def sub(
@@ -3384,6 +3352,8 @@ def value_counts(
         #     counted_values.index = pandas.MultiIndex.from_arrays(
         #         [counted_values.index], names=counted_values.index.names
         #     )
+        # https://pandas.pydata.org/pandas-docs/version/2.0/whatsnew/v2.0.0.html#value-counts-sets-the-resulting-name-to-count
+        counted_values.name = "proportion" if normalize else "count"
         return counted_values
 
     def var(
@@ -3398,7 +3368,7 @@ def var(
         Return unbiased variance over requested axis.
         """
         return self._stat_operation(
-            "var", axis, skipna, None, numeric_only, ddof=ddof, **kwargs
+            "var", axis, skipna, numeric_only, ddof=ddof, **kwargs
         )
 
     def __abs__(self):
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index d1b1cc37ace..7bf1cbf529d 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1655,6 +1655,7 @@ def prod(
         axis_to_apply = self.columns if axis else self.index
         if (
             skipna is not False
+            # potential place to remove
             and numeric_only is None
             and min_count > len(axis_to_apply)
         ):
@@ -2101,6 +2102,7 @@ def sum(
         axis_to_apply = self.columns if axis else self.index
         if (
             skipna is not False
+            # potential place to remove
             and numeric_only is None
             and min_count > len(axis_to_apply)
         ):
@@ -2915,11 +2917,7 @@ def _validate_dtypes_min_max(self, axis, numeric_only):
             ):
                 raise TypeError("Cannot compare Numeric and Non-Numeric Types")
 
-        return (
-            self._get_numeric_data(axis)
-            if numeric_only is None or numeric_only
-            else self
-        )
+        return self._get_numeric_data(axis) if numeric_only else self
 
     def _validate_dtypes_sum_prod_mean(self, axis, numeric_only, ignore_axis=False):
         """
@@ -2970,11 +2968,7 @@ def _validate_dtypes_sum_prod_mean(self, axis, numeric_only, ignore_axis=False):
             ):
                 raise TypeError("Cannot operate on Numeric and Non-Numeric Types")
 
-        return (
-            self._get_numeric_data(axis)
-            if numeric_only is None or numeric_only
-            else self
-        )
+        return self._get_numeric_data(axis) if numeric_only else self
 
     def _to_pandas(self):
         """
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 8a1121eb5d0..aeb70d0603d 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -254,7 +254,7 @@ def value_counts(
             )
         )
 
-    def mean(self, numeric_only=None):
+    def mean(self, numeric_only=False):
         return self._check_index(
             self._wrap_aggregation(
                 type(self._query_compiler).groupby_mean,
@@ -580,7 +580,7 @@ def bfill(self, limit=None):
     def idxmin(self):
         return self._default_to_pandas(lambda df: df.idxmin())
 
-    def prod(self, numeric_only=None, min_count=0):
+    def prod(self, numeric_only=False, min_count=0):
         return self._wrap_aggregation(
             type(self._query_compiler).groupby_prod,
             agg_kwargs=dict(min_count=min_count),
@@ -790,7 +790,7 @@ def size(self):
             result.name = None
         return result.fillna(0)
 
-    def sum(self, numeric_only=None, min_count=0):
+    def sum(self, numeric_only=False, min_count=0):
         return self._wrap_aggregation(
             type(self._query_compiler).groupby_sum,
             agg_kwargs=dict(min_count=min_count),
@@ -843,7 +843,7 @@ def nunique(self, dropna=True):
     def resample(self, rule, *args, **kwargs):
         return self._default_to_pandas(lambda df: df.resample(rule, *args, **kwargs))
 
-    def median(self, numeric_only=None):
+    def median(self, numeric_only=False):
         return self._check_index(
             self._wrap_aggregation(
                 type(self._query_compiler).groupby_median,
@@ -1129,7 +1129,7 @@ def _compute_index_grouped(self, numerical=False):
     def _wrap_aggregation(
         self,
         qc_method,
-        numeric_only=None,
+        numeric_only=False,
         agg_args=None,
         agg_kwargs=None,
         **kwargs,
@@ -1162,14 +1162,6 @@ def _wrap_aggregation(
         agg_args = tuple() if agg_args is None else agg_args
         agg_kwargs = dict() if agg_kwargs is None else agg_kwargs
 
-        if numeric_only is None:
-            # pandas behavior: if `numeric_only` wasn't explicitly specified then
-            # the parameter is considered to be `False` if there are no numeric types
-            # in the frame and `True` otherwise.
-            numeric_only = any(
-                is_numeric_dtype(dtype) for dtype in self._query_compiler.dtypes
-            )
-
         if numeric_only and self.ndim == 2:
             by_cols = self._internal_by
             mask_cols = [
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 280a1bec157..b98d1078cca 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1955,6 +1955,8 @@ def value_counts(
         )
         # pandas sets output index names to None because the Series name already contains it
         counted_values._query_compiler.set_index_name(None)
+        # https://pandas.pydata.org/pandas-docs/version/2.0/whatsnew/v2.0.0.html#value-counts-sets-the-resulting-name-to-count
+        counted_values.name = "proportion" if normalize else "count"
         return counted_values
 
     def view(self, dtype=None):  # noqa: PR01, RT01, D200
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index 4aebba77655..c1206e41654 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -424,22 +424,6 @@ def test_kurt_kurtosis(axis, skipna, numeric_only, method):
     )
 
 
-@pytest.mark.parametrize("level", [-1, 0, 1])
-def test_kurt_kurtosis_level(level):
-    data = test_data["int_data"]
-    df_modin, df_pandas = pd.DataFrame(data), pandas.DataFrame(data)
-
-    index = generate_multiindex(len(data.keys()))
-    df_modin.columns = index
-    df_pandas.columns = index
-
-    eval_general(
-        df_modin,
-        df_pandas,
-        lambda df: df.kurtosis(axis=1, level=level),
-    )
-
-
 def test_last():
     modin_index = pd.date_range("2010-04-09", periods=400, freq="2D")
     pandas_index = pandas.date_range("2010-04-09", periods=400, freq="2D")
diff --git a/modin/pandas/test/dataframe/test_reduce.py b/modin/pandas/test/dataframe/test_reduce.py
index 6395e9b8a92..41b0d48b87f 100644
--- a/modin/pandas/test/dataframe/test_reduce.py
+++ b/modin/pandas/test/dataframe/test_reduce.py
@@ -33,7 +33,6 @@
     int_arg_values,
     eval_general,
     create_test_dfs,
-    generate_multiindex,
     test_data_diff_dtype,
     df_equals_with_non_stable_indices,
     test_data_large_categorical_dataframe,
@@ -79,29 +78,6 @@ def test_all_any_specific(bool_only, method):
     )
 
 
-@pytest.mark.parametrize("method", ["all", "any"])
-@pytest.mark.parametrize("level", [-1, 0, 1])
-@pytest.mark.parametrize("axis", [0, 1])
-@pytest.mark.parametrize("data", [test_data["int_data"]])
-def test_all_any_level(data, axis, level, method):
-    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
-
-    if axis == 0:
-        new_idx = generate_multiindex(len(modin_df.index))
-        modin_df.index = new_idx
-        pandas_df.index = new_idx
-    else:
-        new_col = generate_multiindex(len(modin_df.columns))
-        modin_df.columns = new_col
-        pandas_df.columns = new_col
-
-    eval_general(
-        modin_df,
-        pandas_df,
-        lambda df: getattr(df, method)(axis=axis, level=level),
-    )
-
-
 @pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
 @pytest.mark.parametrize(
     "data", [test_data["float_nan_data"], test_data_large_categorical_dataframe]
@@ -121,28 +97,6 @@ def test_count_specific(numeric_only):
     )
 
 
-@pytest.mark.parametrize("level", [-1, 0, 1])
-@pytest.mark.parametrize("axis", [0, 1])
-@pytest.mark.parametrize("data", [test_data["int_data"]])
-def test_count_level(data, axis, level):
-    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
-
-    if axis == 0:
-        new_idx = generate_multiindex(len(modin_df.index))
-        modin_df.index = new_idx
-        pandas_df.index = new_idx
-    else:
-        new_col = generate_multiindex(len(modin_df.columns))
-        modin_df.columns = new_col
-        pandas_df.columns = new_col
-
-    eval_general(
-        modin_df,
-        pandas_df,
-        lambda df: df.count(axis=axis, level=level),
-    )
-
-
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_count_dtypes(data):
     modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
@@ -316,8 +270,8 @@ def test_prod(
     pandas_df = pandas.DataFrame(
         [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays
     )
-    modin_result = modin_df.prod(level=0)
-    pandas_result = pandas_df.prod(level=0)
+    modin_result = modin_df.prod()
+    pandas_result = pandas_df.prod()
     df_equals(modin_result, pandas_result)
 
 
@@ -344,8 +298,8 @@ def test_sum(data, axis, skipna, is_transposed):
     pandas_df = pandas.DataFrame(
         [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], index=arrays
     )
-    modin_result = modin_df.sum(level=0)
-    pandas_result = pandas_df.sum(level=0)
+    modin_result = modin_df.sum()
+    pandas_result = pandas_df.sum()
     df_equals(modin_result, pandas_result)
 
 
diff --git a/modin/pandas/test/dataframe/test_window.py b/modin/pandas/test/dataframe/test_window.py
index eb3d989b5cb..3831e1355a7 100644
--- a/modin/pandas/test/dataframe/test_window.py
+++ b/modin/pandas/test/dataframe/test_window.py
@@ -505,7 +505,7 @@ def test_median_skew_std_var_sem_1953(method):
     # These shouldn't default to pandas: follow up on
     # https://github.com/modin-project/modin/issues/1953
     with warns_that_defaulting_to_pandas():
-        eval_general(modin_df, pandas_df, lambda df: getattr(df, method)(level=0))
+        eval_general(modin_df, pandas_df, lambda df: getattr(df, method)())
 
 
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 38f98c73b3b..af1df980958 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -2212,22 +2212,6 @@ def test_kurtosis_numeric_only(axis, numeric_only):
     )
 
 
-@pytest.mark.parametrize("level", [-1, 0, 1])
-def test_kurtosis_level(level):
-    data = test_data["int_data"]
-    modin_s, pandas_s = create_test_series(data)
-
-    index = generate_multiindex(len(data.keys()))
-    modin_s.columns = index
-    pandas_s.columns = index
-
-    eval_general(
-        modin_s,
-        pandas_s,
-        lambda s: s.kurtosis(axis=1, level=level),
-    )
-
-
 def test_last():
     modin_index = pd.date_range("2010-04-09", periods=400, freq="2D")
     pandas_index = pandas.date_range("2010-04-09", periods=400, freq="2D")

From 5b47230e9e400c66c50c8e5941c4f3e95442ee96 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 22:09:13 +0200
Subject: [PATCH 063/176] fix for 'test_udf.py'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/dataframe/test_udf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/test/dataframe/test_udf.py b/modin/pandas/test/dataframe/test_udf.py
index a7330afaf20..ea792a1379f 100644
--- a/modin/pandas/test/dataframe/test_udf.py
+++ b/modin/pandas/test/dataframe/test_udf.py
@@ -352,7 +352,7 @@ def h(x):
 
     def g(x, arg1=0):
         for _ in range(arg1):
-            x = x.append(x)
+            x = (pd if isinstance(x, pd.DataFrame) else pandas).concat((x, x))
         return x
 
     def f(x, arg2=0, arg3=0):

From ff9821c7f19eab47507efcf7d0a19a73e0de992a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 22:16:25 +0200
Subject: [PATCH 064/176] remove 'dt.week', 'dt.weekofyear'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py | 8 --------
 modin/pandas/series_utils.py                      | 8 --------
 2 files changed, 16 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 2acf3572514..3aa0ad384e1 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -4096,18 +4096,10 @@ def dt_tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
             self, tz, ambiguous, nonexistent
         )
 
-    @doc_utils.doc_dt_timestamp(prop="week component", refer_to="week")
-    def dt_week(self):
-        return DateTimeDefault.register(pandas.Series.dt.week)(self)
-
     @doc_utils.doc_dt_timestamp(prop="integer day of week", refer_to="weekday")
     def dt_weekday(self):
         return DateTimeDefault.register(pandas.Series.dt.weekday)(self)
 
-    @doc_utils.doc_dt_timestamp(prop="week of year", refer_to="weekofyear")
-    def dt_weekofyear(self):
-        return DateTimeDefault.register(pandas.Series.dt.weekofyear)(self)
-
     @doc_utils.doc_dt_timestamp(prop="year component", refer_to="year")
     def dt_year(self):
         return DateTimeDefault.register(pandas.Series.dt.year)(self)
diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index 2ea8d5a5a4d..8c28ab8603a 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -530,14 +530,6 @@ def microsecond(self):
     def nanosecond(self):
         return Series(query_compiler=self._query_compiler.dt_nanosecond())
 
-    @property
-    def week(self):
-        return Series(query_compiler=self._query_compiler.dt_week())
-
-    @property
-    def weekofyear(self):
-        return Series(query_compiler=self._query_compiler.dt_weekofyear())
-
     @property
     def dayofweek(self):
         return Series(query_compiler=self._query_compiler.dt_dayofweek())

From 98f99cf8ab3a6cc5c06b64cbd37fe466b3fd9e3c Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 23:29:25 +0200
Subject: [PATCH 065/176] fix 'Series.describe' and 'test_between_time'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/series.py           | 4 ++--
 modin/pandas/test/test_series.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index b98d1078cca..99bdfbc367b 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -829,8 +829,8 @@ def describe(
         # Pandas ignores the `include` and `exclude` for Series for some reason.
         return super(Series, self).describe(
             percentiles=percentiles,
-            include=include,
-            exclude=exclude,
+            include=None,
+            exclude=None,
         )
 
     def diff(self, periods=1):  # noqa: PR01, RT01, D200
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index af1df980958..5145926ef9b 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -1213,8 +1213,8 @@ def test_between_time():
         pandas_series.between_time("3:00", "8:00"),
     )
     df_equals(
-        modin_series.between_time("3:00", "8:00", False),
-        pandas_series.between_time("3:00", "8:00", False),
+        modin_series.between_time("3:00", "8:00", inclusive="right"),
+        pandas_series.between_time("3:00", "8:00", inclusive="right"),
     )
 
 
@@ -1240,7 +1240,7 @@ def test_bfill(data):
 
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_bool(data):
-    modin_series, pandas_series = create_test_series(data)
+    modin_series, _ = create_test_series(data)
 
     with pytest.raises(ValueError):
         modin_series.bool()

From 5f3279d1155726e2396dcecb6f80c4caa0d8f61a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 23:34:39 +0200
Subject: [PATCH 066/176] remove 'inplace' parameter for some 'cat.' functions

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/series_utils.py     | 21 +++++++--------------
 modin/pandas/test/test_series.py | 28 ++++++++++++----------------
 2 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index 8c28ab8603a..99df8a0c348 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -73,28 +73,21 @@ def reorder_categories(self, new_categories, ordered=None):
             ordered=ordered,
         )
 
-    def add_categories(self, new_categories, inplace=False):
-        return self._default_to_pandas(
-            pandas.Series.cat.add_categories, new_categories, inplace=inplace
-        )
+    def add_categories(self, new_categories):
+        return self._default_to_pandas(pandas.Series.cat.add_categories, new_categories)
 
-    def remove_categories(self, removals, inplace=False):
-        return self._default_to_pandas(
-            pandas.Series.cat.remove_categories, removals, inplace=inplace
-        )
+    def remove_categories(self, removals):
+        return self._default_to_pandas(pandas.Series.cat.remove_categories, removals)
 
-    def remove_unused_categories(self, inplace=False):
-        return self._default_to_pandas(
-            pandas.Series.cat.remove_unused_categories, inplace=inplace
-        )
+    def remove_unused_categories(self):
+        return self._default_to_pandas(pandas.Series.cat.remove_unused_categories)
 
-    def set_categories(self, new_categories, ordered=None, rename=False, inplace=False):
+    def set_categories(self, new_categories, ordered=None, rename=False):
         return self._default_to_pandas(
             pandas.Series.cat.set_categories,
             new_categories,
             ordered=ordered,
             rename=rename,
-            inplace=inplace,
         )
 
     def as_ordered(self, *args, **kwargs):
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 5145926ef9b..402bee8d106 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -4580,11 +4580,10 @@ def test_cat_reorder_categories(data, ordered):
 @pytest.mark.parametrize(
     "data", test_data_categorical_values, ids=test_data_categorical_keys
 )
-@pytest.mark.parametrize("inplace", [True, False])
-def test_cat_add_categories(data, inplace):
+def test_cat_add_categories(data):
     modin_series, pandas_series = create_test_series(data.copy())
-    pandas_result = pandas_series.cat.add_categories(list("qw"), inplace=inplace)
-    modin_result = modin_series.cat.add_categories(list("qw"), inplace=inplace)
+    pandas_result = pandas_series.cat.add_categories(list("qw"))
+    modin_result = modin_series.cat.add_categories(list("qw"))
     df_equals(modin_series, pandas_series)
     df_equals(modin_result, pandas_result)
 
@@ -4592,11 +4591,10 @@ def test_cat_add_categories(data, inplace):
 @pytest.mark.parametrize(
     "data", test_data_categorical_values, ids=test_data_categorical_keys
 )
-@pytest.mark.parametrize("inplace", [True, False])
-def test_cat_remove_categories(data, inplace):
+def test_cat_remove_categories(data):
     modin_series, pandas_series = create_test_series(data.copy())
-    pandas_result = pandas_series.cat.remove_categories(list("at"), inplace=inplace)
-    modin_result = modin_series.cat.remove_categories(list("at"), inplace=inplace)
+    pandas_result = pandas_series.cat.remove_categories(list("at"))
+    modin_result = modin_series.cat.remove_categories(list("at"))
     df_equals(modin_series, pandas_series)
     df_equals(modin_result, pandas_result)
 
@@ -4604,13 +4602,12 @@ def test_cat_remove_categories(data, inplace):
 @pytest.mark.parametrize(
     "data", test_data_categorical_values, ids=test_data_categorical_keys
 )
-@pytest.mark.parametrize("inplace", [True, False])
-def test_cat_remove_unused_categories(data, inplace):
+def test_cat_remove_unused_categories(data):
     modin_series, pandas_series = create_test_series(data.copy())
     pandas_series[1] = np.nan
-    pandas_result = pandas_series.cat.remove_unused_categories(inplace=inplace)
+    pandas_result = pandas_series.cat.remove_unused_categories()
     modin_series[1] = np.nan
-    modin_result = modin_series.cat.remove_unused_categories(inplace=inplace)
+    modin_result = modin_series.cat.remove_unused_categories()
     df_equals(modin_series, pandas_series)
     df_equals(modin_result, pandas_result)
 
@@ -4620,14 +4617,13 @@ def test_cat_remove_unused_categories(data, inplace):
 )
 @pytest.mark.parametrize("ordered", bool_arg_values, ids=bool_arg_keys)
 @pytest.mark.parametrize("rename", [True, False])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_cat_set_categories(data, ordered, rename, inplace):
+def test_cat_set_categories(data, ordered, rename):
     modin_series, pandas_series = create_test_series(data.copy())
     pandas_result = pandas_series.cat.set_categories(
-        list("qwert"), ordered=ordered, rename=rename, inplace=inplace
+        list("qwert"), ordered=ordered, rename=rename
     )
     modin_result = modin_series.cat.set_categories(
-        list("qwert"), ordered=ordered, rename=rename, inplace=inplace
+        list("qwert"), ordered=ordered, rename=rename
     )
     df_equals(modin_series, pandas_series)
     df_equals(modin_result, pandas_result)

From 48ef8b3d006e0294439cfd2d15059f5bb31fd820 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 23:44:48 +0200
Subject: [PATCH 067/176] update 'str.split' and 'str.rsplit' methods

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py | 12 ++++++++----
 modin/pandas/series_utils.py                      | 15 +++++++++++----
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 3aa0ad384e1..fcc5651729e 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -4747,8 +4747,10 @@ def str_rpartition(self, sep=" ", expand=True):
         n : int, default: -1
         expand : bool, default: False""",
     )
-    def str_rsplit(self, pat=None, n=-1, expand=False):
-        return StrDefault.register(pandas.Series.str.rsplit)(self, pat, n, expand)
+    def str_rsplit(self, pat=None, *, n=-1, expand=False):
+        return StrDefault.register(pandas.Series.str.rsplit)(
+            self, pat, n=n, expand=expand
+        )
 
     @doc_utils.doc_str_method(refer_to="rstrip", params="to_strip : str, optional")
     def str_rstrip(self, to_strip=None):
@@ -4783,8 +4785,10 @@ def str_slice_replace(self, start=None, stop=None, repl=None):
         n : int, default: -1
         expand : bool, default: False""",
     )
-    def str_split(self, pat=None, n=-1, expand=False):
-        return StrDefault.register(pandas.Series.str.split)(self, pat, n, expand)
+    def str_split(self, pat=None, *, n=-1, expand=False, regex=None):
+        return StrDefault.register(pandas.Series.str.split)(
+            self, pat, n=n, expand=expand, regex=regex
+        )
 
     @doc_utils.doc_str_method(
         refer_to="startswith",
diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index 99df8a0c348..bebcc2a8fc6 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -142,22 +142,29 @@ def decode(self, encoding, errors="strict"):
             pandas.Series.str.decode, encoding, errors=errors
         )
 
-    def split(self, pat=None, n=-1, expand=False):
+    def split(self, pat=None, *, n=-1, expand=False, regex=None):
         if not pat and pat is not None:
             raise ValueError("split() requires a non-empty pattern match.")
 
         if expand:
             return self._default_to_pandas(
-                pandas.Series.str.split, pat=pat, n=n, expand=expand
+                pandas.Series.str.split,
+                pat=pat,
+                n=n,
+                expand=expand,
+                regex=regex,
             )
         else:
             return Series(
                 query_compiler=self._query_compiler.str_split(
-                    pat=pat, n=n, expand=expand
+                    pat=pat,
+                    n=n,
+                    expand=expand,
+                    regex=regex,
                 )
             )
 
-    def rsplit(self, pat=None, n=-1, expand=False):
+    def rsplit(self, pat=None, *, n=-1, expand=False):
         if not pat and pat is not None:
             raise ValueError("rsplit() requires a non-empty pattern match.")
 

From 26c50bfbcf55e096163893b18dd748def23afa64 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 15 Apr 2023 23:51:31 +0200
Subject: [PATCH 068/176] remove '.week' and 'weekofyear' in tests; remove
 workarounds for #3142

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_series.py | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 402bee8d106..bc60a75e50d 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -542,13 +542,7 @@ def test___repr__(name, dt_index, data):
         )
         pandas_series.index = modin_series.index = index
 
-    if get_current_execution() == "BaseOnPython" and data == "empty":
-        # TODO: Remove this when default `dtype` of empty Series will be `object` in pandas (see #3142).
-        assert modin_series.dtype == np.object_
-        assert pandas_series.dtype == np.float64
-        df_equals(modin_series.index, pandas_series.index)
-    else:
-        assert repr(modin_series) == repr(pandas_series)
+    assert repr(modin_series) == repr(pandas_series)
 
 
 def test___repr__4186():
@@ -1690,12 +1684,7 @@ def test_dropna_inplace(data):
 
 def test_dtype_empty():
     modin_series, pandas_series = pd.Series(), pandas.Series()
-    if get_current_execution() == "BaseOnPython":
-        # TODO: Remove this when default `dtype` of empty Series will be `object` in pandas (see #3142).
-        assert modin_series.dtype == np.object_
-        assert pandas_series.dtype == np.float64
-    else:
-        assert modin_series.dtype == pandas_series.dtype
+    assert modin_series.dtype == pandas_series.dtype
 
 
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@@ -1740,8 +1729,6 @@ def test_dt(timezone):
     df_equals(modin_series.dt.second, pandas_series.dt.second)
     df_equals(modin_series.dt.microsecond, pandas_series.dt.microsecond)
     df_equals(modin_series.dt.nanosecond, pandas_series.dt.nanosecond)
-    df_equals(modin_series.dt.week, pandas_series.dt.week)
-    df_equals(modin_series.dt.weekofyear, pandas_series.dt.weekofyear)
     df_equals(modin_series.dt.dayofweek, pandas_series.dt.dayofweek)
     df_equals(modin_series.dt.day_of_week, pandas_series.dt.day_of_week)
     df_equals(modin_series.dt.weekday, pandas_series.dt.weekday)

From 46ccf5b14346019ca94fcc0544e4fbf80a32a49a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 16 Apr 2023 00:14:10 +0200
Subject: [PATCH 069/176] small fixes for 'test_series.py'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py             |  1 +
 modin/pandas/series.py           |  1 +
 modin/pandas/test/test_series.py | 23 +++++++++++++++--------
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 652e77862d9..220ef51c5a9 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2275,6 +2275,7 @@ def resample(
     def reset_index(
         self,
         level: IndexLabel = None,
+        *,
         drop: bool = False,
         inplace: bool = False,
         col_level: Hashable = 0,
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 99bdfbc367b..a35e8cc525e 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1486,6 +1486,7 @@ def repeat(self, repeats, axis=None):  # noqa: PR01, RT01, D200
     def reset_index(
         self,
         level=None,
+        *,
         drop=False,
         name=no_default,
         inplace=False,
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index bc60a75e50d..22d05593644 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -18,6 +18,7 @@
 from pandas._testing import assert_series_equal
 from pandas.errors import SpecificationError
 from pandas.core.indexing import IndexingError
+import pandas._libs.lib as lib
 import matplotlib
 import modin.pandas as pd
 from numpy.testing import assert_array_equal
@@ -2220,9 +2221,10 @@ def test_index_order(func):
     s_modin.index = index
     s_pandas.index = index
 
+    # The result of the operation is not a Series, `.index` is missed
     df_equals(
-        getattr(s_modin, func)(level=0).index,
-        getattr(s_pandas, func)(level=0).index,
+        getattr(s_modin, func)(),
+        getattr(s_pandas, func)(),
     )
 
 
@@ -2376,7 +2378,7 @@ def test_median_skew_std_sum_var_prod_sem_1953(method):
     ]
     modin_s = pd.Series(data, index=arrays)
     pandas_s = pandas.Series(data, index=arrays)
-    eval_general(modin_s, pandas_s, lambda s: getattr(s, method)(level=0))
+    eval_general(modin_s, pandas_s, lambda s: getattr(s, method)())
 
 
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@@ -2505,7 +2507,7 @@ def h(x):
 
     def g(x, arg1=0):
         for _ in range(arg1):
-            x = x.append(x)
+            x = (pd if isinstance(x, pd.Series) else pandas).concat((x, x))
         return x
 
     def f(x, arg2=0, arg3=0):
@@ -2905,7 +2907,7 @@ def test_resample(closed, label, level):
 
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 @pytest.mark.parametrize("drop", [True, False], ids=["True", "False"])
-@pytest.mark.parametrize("name", [None, "Custom name"])
+@pytest.mark.parametrize("name", [lib.no_default, "Custom name"])
 @pytest.mark.parametrize("inplace", [True, False])
 def test_reset_index(data, drop, name, inplace):
     eval_general(
@@ -4493,9 +4495,14 @@ def test_hasattr_sparse(is_sparse_data):
 def test_cat_categories(data):
     modin_series, pandas_series = create_test_series(data.copy())
     df_equals(modin_series.cat.categories, pandas_series.cat.categories)
-    pandas_series.cat.categories = list("qwert")
-    modin_series.cat.categories = list("qwert")
-    df_equals(modin_series, pandas_series)
+
+    def set_categories(ser):
+        ser.cat.categories = list("qwert")
+        return ser
+
+    # pandas 2.0.0: Removed setting Categorical.categories directly (GH47834)
+    # Just check the exception
+    eval_general(modin_series, pandas_series, set_categories)
 
 
 @pytest.mark.parametrize(

From 7a3953183e57bbf075dd9c26cb6551633ff83f18 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 16 Apr 2023 00:30:25 +0200
Subject: [PATCH 070/176] fix 'test_series_dt_api_equality'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/pandas/query_compiler.py | 2 --
 modin/pandas/test/test_api.py                       | 5 ++++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index bbf00eb44a4..de556b0d993 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1656,8 +1656,6 @@ def searchsorted(df):
     dt_second = Map.register(_dt_prop_map("second"), dtypes=np.int64)
     dt_microsecond = Map.register(_dt_prop_map("microsecond"), dtypes=np.int64)
     dt_nanosecond = Map.register(_dt_prop_map("nanosecond"), dtypes=np.int64)
-    dt_week = Map.register(_dt_prop_map("week"), dtypes=np.int64)
-    dt_weekofyear = Map.register(_dt_prop_map("weekofyear"), dtypes=np.int64)
     dt_dayofweek = Map.register(_dt_prop_map("dayofweek"), dtypes=np.int64)
     dt_weekday = Map.register(_dt_prop_map("weekday"), dtypes=np.int64)
     dt_dayofyear = Map.register(_dt_prop_map("dayofyear"), dtypes=np.int64)
diff --git a/modin/pandas/test/test_api.py b/modin/pandas/test/test_api.py
index c0b25f3f486..c0515bdb09b 100644
--- a/modin/pandas/test/test_api.py
+++ b/modin/pandas/test/test_api.py
@@ -252,7 +252,10 @@ def test_series_dt_api_equality():
     modin_dir = [obj for obj in dir(pd.Series().dt) if obj[0] != "_"]
     pandas_dir = [obj for obj in dir(pandas.Series.dt) if obj[0] != "_"]
 
-    missing_from_modin = set(pandas_dir) - set(modin_dir)
+    # should be deleted, but for some reason the check fails
+    # https://github.com/pandas-dev/pandas/pull/33595
+    ignore = ["week", "weekofyear"]
+    missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)
     assert not len(missing_from_modin), "Differences found in API: {}".format(
         missing_from_modin
     )

From 95787095743e2588a12eeed6d79e53f701dd30fa Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 17 Apr 2023 21:20:13 +0200
Subject: [PATCH 071/176] remove 'squeeze' field for groupby objects

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py |  1 -
 modin/pandas/groupby.py   | 13 +------------
 modin/pandas/series.py    |  1 -
 3 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 7bf1cbf529d..ef9452f6127 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -522,7 +522,6 @@ def groupby(
             as_index,
             sort,
             group_keys,
-            False,
             idx_name,
             observed=observed,
             drop=drop,
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index aeb70d0603d..4329aed53de 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -70,7 +70,6 @@
     "_pandas_class",
     "_query_compiler",
     "_sort",
-    "_squeeze",
     "_wrap_aggregation",
 }
 
@@ -88,7 +87,6 @@ def __init__(
         as_index,
         sort,
         group_keys,
-        squeeze,
         idx_name,
         drop,
         **kwargs,
@@ -128,7 +126,6 @@ def __init__(
             "as_index": as_index,
             "group_keys": group_keys,
         }
-        self._squeeze = squeeze
         self._kwargs.update(kwargs)
 
     def __getattr__(self, key):
@@ -200,7 +197,6 @@ def skew(self, *args, **kwargs):
                     axis=self._axis,
                     idx_name=self._idx_name,
                     drop=self._drop,
-                    squeeze=self._squeeze,
                     **self._kwargs,
                 )
             else:
@@ -512,7 +508,6 @@ def __getitem__(self, key):
             "by": self._by,
             "axis": self._axis,
             "idx_name": self._idx_name,
-            "squeeze": self._squeeze,
         }
         # The rules of type deduction for the resulted object is the following:
         #   1. If `key` is a list-like or `as_index is False`, then the resulted object is a DataFrameGroupBy
@@ -760,7 +755,6 @@ def size(self):
                 0,
                 drop=self._drop,
                 idx_name=self._idx_name,
-                squeeze=self._squeeze,
                 **self._kwargs,
             ).size()
         work_object = type(self)(
@@ -769,7 +763,6 @@ def size(self):
             self._axis,
             drop=False,
             idx_name=None,
-            squeeze=self._squeeze,
             **self._kwargs,
         )
         result = work_object._wrap_aggregation(
@@ -894,7 +887,6 @@ def fillna(self, *args, **kwargs):
             axis=self._axis,
             idx_name=self._idx_name,
             drop=self._drop,
-            squeeze=self._squeeze,
             **new_groupby_kwargs,
         )
         return work_object._check_index_name(
@@ -1180,7 +1172,7 @@ def _wrap_aggregation(
         else:
             groupby_qc = self._query_compiler
 
-        result = type(self._df)(
+        return type(self._df)(
             query_compiler=qc_method(
                 groupby_qc,
                 by=self._by,
@@ -1192,9 +1184,6 @@ def _wrap_aggregation(
                 **kwargs,
             )
         )
-        if self._squeeze:
-            return result.squeeze()
-        return result
 
     def _check_index(self, result):
         """
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index a35e8cc525e..6143e6c453f 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1028,7 +1028,6 @@ def groupby(
             as_index,
             sort,
             group_keys,
-            False,
             idx_name=None,
             observed=observed,
             drop=False,

From 5b0295cbbc57b8c2f54584c6c457e1c074df75ab Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 17 Apr 2023 23:23:56 +0200
Subject: [PATCH 072/176] fixes for gropby.skew/__iter__; use 'eval_general'
 when op doesn't suuport category type

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py         |  3 ++
 modin/pandas/groupby.py           | 53 +++++++++---------------
 modin/pandas/test/test_groupby.py | 68 +++++++++++++++++++++++++++----
 3 files changed, 82 insertions(+), 42 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index ef9452f6127..fb150ccc1d2 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -444,12 +444,14 @@ def groupby(
         # groupby takes place.
         drop = False
 
+        return_tuple_when_iterating = None
         if (
             not isinstance(by, (pandas.Series, Series))
             and is_list_like(by)
             and len(by) == 1
         ):
             by = by[0]
+            return_tuple_when_iterating = True
 
         if callable(by):
             by = self.index.map(by)
@@ -526,6 +528,7 @@ def groupby(
             observed=observed,
             drop=drop,
             dropna=dropna,
+            return_tuple_when_iterating=return_tuple_when_iterating,
         )
 
     def keys(self):  # noqa: RT01, D200
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 4329aed53de..9b6a45aef03 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -77,6 +77,7 @@
 @_inherit_docstrings(pandas.core.groupby.DataFrameGroupBy)
 class DataFrameGroupBy(ClassLogger):
     _pandas_class = pandas.core.groupby.DataFrameGroupBy
+    _return_tuple_when_iterating = None
 
     def __init__(
         self,
@@ -98,6 +99,12 @@ def __init__(
         self._columns = self._query_compiler.columns
         self._by = by
         self._drop = drop
+        # When providing a list of columns of length one to DataFrame.groupby(),
+        # the keys that are returned by iterating over the resulting DataFrameGroupBy
+        # object will now be tuples of length one (pandas#GH47761)
+        self._return_tuple_when_iterating = kwargs.pop(
+            "return_tuple_when_iterating", None
+        )
 
         if (
             level is None
@@ -178,38 +185,18 @@ def default_handler(*args, **kwargs):
     def ngroups(self):
         return len(self)
 
-    def skew(self, *args, **kwargs):
-        # The 'skew' aggregation is less tolerant to non-numeric columns than others
-        # (i.e. it doesn't allow numeric categoricals), thus dropping non-numeric
-        # columns here since `._wrap_aggregation(numeric_only=True, ...)` is not enough
-        if self.ndim == 2:
-            by_cols = self._internal_by
-            mask_cols = [
-                col
-                for col, dtype in self._df.dtypes.items()
-                if is_numeric_dtype(dtype) or col in by_cols
-            ]
-            if not self._df.columns.equals(mask_cols):
-                masked_df = self._df[mask_cols]
-                masked_obj = type(self)(
-                    df=masked_df,
-                    by=self._by,
-                    axis=self._axis,
-                    idx_name=self._idx_name,
-                    drop=self._drop,
-                    **self._kwargs,
-                )
-            else:
-                masked_obj = self
-        else:
-            masked_obj = self
+    def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs):
+        agg_kwargs = dict(
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+        )
+        agg_kwargs.update(kwargs)
 
-        return masked_obj._wrap_aggregation(
-            type(masked_obj._query_compiler).groupby_skew,
-            agg_args=args,
-            agg_kwargs=kwargs,
-            # Don't want to try to drop non-numeric columns for the second time
-            numeric_only=False,
+        return self._wrap_aggregation(
+            type(self._query_compiler).groupby_skew,
+            agg_kwargs=agg_kwargs,
+            numeric_only=numeric_only,
         )
 
     def ffill(self, limit=None):
@@ -1004,7 +991,7 @@ def _iter(self):
         if self._axis == 0:
             return (
                 (
-                    k,
+                    (k,) if self._return_tuple_when_iterating else k,
                     DataFrame(
                         query_compiler=self._query_compiler.getitem_row_array(
                             indices[k]
@@ -1016,7 +1003,7 @@ def _iter(self):
         else:
             return (
                 (
-                    k,
+                    (k,) if self._return_tuple_when_iterating else k,
                     DataFrame(
                         query_compiler=self._query_compiler.getitem_column_array(
                             indices[k], numeric=True
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index ab4354dd23d..ba9f93fb4ed 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -333,7 +333,14 @@ def maybe_get_columns(df, by):
         lambda df: df.sem(),
         modin_df_almost_equals_pandas,
     )
-    eval_mean(modin_groupby, pandas_groupby)
+    # TypeError: 'Categorical' with dtype category does not support reduction 'mean'
+    eval_general(
+        modin_groupby,
+        pandas_groupby,
+        lambda df: df.mean(),
+        modin_df_almost_equals_pandas,
+    )
+
     eval_any(modin_groupby, pandas_groupby)
     eval_min(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())
@@ -360,17 +367,41 @@ def maybe_get_columns(df, by):
         min,
     ]
     for func in apply_functions:
-        eval_apply(modin_groupby, pandas_groupby, func)
+        # TypeError: 'Categorical' with dtype category does not support reduction 'sum'
+        eval_general(
+            modin_groupby,
+            pandas_groupby,
+            lambda grp: grp.apply(func),
+        )
 
     eval_dtypes(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
     eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
     eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
-    eval_prod(modin_groupby, pandas_groupby)
+    # TypeError: category type does not support prod operations
+    eval_general(
+        modin_groupby,
+        pandas_groupby,
+        lambda grp: grp.prod(),
+    )
+
     if as_index:
         eval_std(modin_groupby, pandas_groupby)
-        eval_var(modin_groupby, pandas_groupby)
-        eval_skew(modin_groupby, pandas_groupby)
+        # TypeError: 'Categorical' with dtype category does not support reduction 'var'
+        eval_general(
+            modin_groupby,
+            pandas_groupby,
+            lambda df: df.var(),
+            modin_df_almost_equals_pandas,
+        )
+
+        # TypeError: 'Categorical' with dtype category does not support reduction 'skew'
+        eval_general(
+            modin_groupby,
+            pandas_groupby,
+            lambda df: df.skew(),
+            modin_df_almost_equals_pandas,
+        )
 
     agg_functions = [
         lambda df: df.sum(),
@@ -406,7 +437,13 @@ def maybe_get_columns(df, by):
     eval_general(modin_groupby, pandas_groupby, lambda df: df.rank())
     eval_max(modin_groupby, pandas_groupby)
     eval_len(modin_groupby, pandas_groupby)
-    eval_sum(modin_groupby, pandas_groupby)
+    # TypeError: category type does not support sum operations
+    eval_general(
+        modin_groupby,
+        pandas_groupby,
+        lambda df: df.sum(),
+    )
+
     eval_ngroup(modin_groupby, pandas_groupby)
     # Pandas raising exception when 'by' contains categorical key and `as_index=False`
     # because of a bug: https://github.com/pandas-dev/pandas/issues/36698
@@ -417,7 +454,14 @@ def maybe_get_columns(df, by):
         lambda df: df.nunique(),
         check_exception_type=None if (col1_category and not as_index) else True,
     )
-    eval_median(modin_groupby, pandas_groupby)
+    # TypeError: category type does not support median operations
+    eval_general(
+        modin_groupby,
+        pandas_groupby,
+        lambda df: df.median(),
+        modin_df_almost_equals_pandas,
+    )
+
     eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))
     eval_general(
         modin_groupby,
@@ -439,7 +483,12 @@ def maybe_get_columns(df, by):
 
     pipe_functions = [lambda dfgb: dfgb.sum()]
     for func in pipe_functions:
-        eval_pipe(modin_groupby, pandas_groupby, func)
+        # TypeError: category type does not support sum operations
+        eval_general(
+            modin_groupby,
+            pandas_groupby,
+            lambda df: df.pipe(func),
+        )
 
     eval_general(
         modin_groupby,
@@ -464,7 +513,8 @@ def maybe_get_columns(df, by):
     ):
         # Not yet supported for non-original-column-from-dataframe Series in by:
         eval___getattr__(modin_groupby, pandas_groupby, "col3")
-        eval___getitem__(modin_groupby, pandas_groupby, "col3")
+        # TODO: Potentially a bug in pandas
+        # eval___getitem__(modin_groupby, pandas_groupby, "col3")
     eval_groups(modin_groupby, pandas_groupby)
     # Intersection of the selection and 'by' columns is not yet supported
     non_by_cols = (

From 8127bbc0927dd1f0341743828562cfa6b8d46970 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 18 Apr 2023 00:15:45 +0200
Subject: [PATCH 073/176] update 'std' and 'var' signatures; use
 'numeric_only=True' explicitly in 'test_mixed_dtypes_groupby'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py           |  12 ++--
 modin/pandas/test/test_groupby.py | 109 +++++++++++++++++++++---------
 2 files changed, 82 insertions(+), 39 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 9b6a45aef03..1246008667e 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -569,11 +569,11 @@ def prod(self, numeric_only=False, min_count=0):
             numeric_only=numeric_only,
         )
 
-    def std(self, ddof=1):
+    def std(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=False):
         return self._wrap_aggregation(
             type(self._query_compiler).groupby_std,
-            agg_kwargs=dict(ddof=ddof),
-            numeric_only=True,
+            agg_kwargs=dict(ddof=ddof, engine=engine, engine_kwargs=engine_kwargs),
+            numeric_only=numeric_only,
         )
 
     def aggregate(self, func=None, *args, **kwargs):
@@ -714,11 +714,11 @@ def max(self, numeric_only=False, min_count=-1):
             agg_kwargs=dict(min_count=min_count),
         )
 
-    def var(self, ddof=1):
+    def var(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=False):
         return self._wrap_aggregation(
             type(self._query_compiler).groupby_var,
-            agg_kwargs=dict(ddof=ddof),
-            numeric_only=True,
+            agg_kwargs=dict(ddof=ddof, engine=engine, engine_kwargs=engine_kwargs),
+            numeric_only=numeric_only,
         )
 
     def get_group(self, name, obj=None):
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index ba9f93fb4ed..f63e0255351 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -98,6 +98,9 @@ def wrapper(obj1, obj2, *args, **kwargs):
 
 @pytest.mark.parametrize("as_index", [True, False])
 def test_mixed_dtypes_groupby(as_index):
+    # The data for this test contains non-numeric types. In pandas version 1.5.3 and older,
+    # it automatically determined whether to filter non-numeric data if `numeric_only=None`.
+    # Now this needs to be done explicitly via `numeric_only=True`.
     frame_data = np.random.randint(97, 198, size=(2**6, 2**4))
     pandas_df = pandas.DataFrame(frame_data).add_prefix("col")
     # Convert every other column to string
@@ -142,19 +145,19 @@ def test_mixed_dtypes_groupby(as_index):
         )
         eval_general(modin_groupby, pandas_groupby, lambda df: df.ewm(com=0.5).std())
         eval_shift(modin_groupby, pandas_groupby)
-        eval_mean(modin_groupby, pandas_groupby)
+        eval_mean(modin_groupby, pandas_groupby, numeric_only=True)
         eval_any(modin_groupby, pandas_groupby)
         eval_min(modin_groupby, pandas_groupby)
         eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())
         eval_ndim(modin_groupby, pandas_groupby)
-        eval_cumsum(modin_groupby, pandas_groupby)
+        eval_cumsum(modin_groupby, pandas_groupby, numeric_only=True)
         eval_general(
             modin_groupby,
             pandas_groupby,
             lambda df: df.pct_change(),
             modin_df_almost_equals_pandas,
         )
-        eval_cummax(modin_groupby, pandas_groupby)
+        eval_cummax(modin_groupby, pandas_groupby, numeric_only=True)
 
         # TODO Add more apply functions
         apply_functions = [lambda df: df.sum(), min]
@@ -163,14 +166,14 @@ def test_mixed_dtypes_groupby(as_index):
 
         eval_dtypes(modin_groupby, pandas_groupby)
         eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
-        eval_cummin(modin_groupby, pandas_groupby)
+        eval_cummin(modin_groupby, pandas_groupby, numeric_only=True)
         eval_general(modin_groupby, pandas_groupby, lambda df: df.bfill())
         eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
-        eval_prod(modin_groupby, pandas_groupby)
+        eval_prod(modin_groupby, pandas_groupby, numeric_only=True)
         if as_index:
-            eval_std(modin_groupby, pandas_groupby)
-            eval_var(modin_groupby, pandas_groupby)
-            eval_skew(modin_groupby, pandas_groupby)
+            eval_std(modin_groupby, pandas_groupby, numeric_only=True)
+            eval_var(modin_groupby, pandas_groupby, numeric_only=True)
+            eval_skew(modin_groupby, pandas_groupby, numeric_only=True)
 
         agg_functions = [
             lambda df: df.sum(),
@@ -199,9 +202,9 @@ def test_mixed_dtypes_groupby(as_index):
         eval_ngroup(modin_groupby, pandas_groupby)
         eval_nunique(modin_groupby, pandas_groupby)
         eval_value_counts(modin_groupby, pandas_groupby)
-        eval_median(modin_groupby, pandas_groupby)
+        eval_median(modin_groupby, pandas_groupby, numeric_only=True)
         eval_general(modin_groupby, pandas_groupby, lambda df: df.head(n))
-        eval_cumprod(modin_groupby, pandas_groupby)
+        eval_cumprod(modin_groupby, pandas_groupby, numeric_only=True)
         eval_general(
             modin_groupby,
             pandas_groupby,
@@ -1027,12 +1030,18 @@ def eval_ngroups(modin_groupby, pandas_groupby):
     assert modin_groupby.ngroups == pandas_groupby.ngroups
 
 
-def eval_skew(modin_groupby, pandas_groupby):
-    modin_df_almost_equals_pandas(modin_groupby.skew(), pandas_groupby.skew())
+def eval_skew(modin_groupby, pandas_groupby, numeric_only=False):
+    modin_df_almost_equals_pandas(
+        modin_groupby.skew(numeric_only=numeric_only),
+        pandas_groupby.skew(numeric_only=numeric_only),
+    )
 
 
-def eval_mean(modin_groupby, pandas_groupby):
-    modin_df_almost_equals_pandas(modin_groupby.mean(), pandas_groupby.mean())
+def eval_mean(modin_groupby, pandas_groupby, numeric_only=False):
+    modin_df_almost_equals_pandas(
+        modin_groupby.mean(numeric_only=numeric_only),
+        pandas_groupby.mean(numeric_only=numeric_only),
+    )
 
 
 def eval_any(modin_groupby, pandas_groupby):
@@ -1047,12 +1056,25 @@ def eval_ndim(modin_groupby, pandas_groupby):
     assert modin_groupby.ndim == pandas_groupby.ndim
 
 
-def eval_cumsum(modin_groupby, pandas_groupby, axis=0):
-    df_equals(modin_groupby.cumsum(axis=axis), pandas_groupby.cumsum(axis=axis))
+def eval_cumsum(modin_groupby, pandas_groupby, axis=0, numeric_only=False):
+    df_equals(
+        modin_groupby.cumsum(axis=axis, numeric_only=numeric_only),
+        pandas_groupby.cumsum(axis=axis, numeric_only=numeric_only),
+    )
 
 
-def eval_cummax(modin_groupby, pandas_groupby, axis=0):
-    df_equals(modin_groupby.cummax(axis=axis), pandas_groupby.cummax(axis=axis))
+def eval_cummax(modin_groupby, pandas_groupby, axis=0, numeric_only=False):
+    df_equals(
+        modin_groupby.cummax(axis=axis, numeric_only=numeric_only),
+        pandas_groupby.cummax(axis=axis, numeric_only=numeric_only),
+    )
+
+
+def eval_cummin(modin_groupby, pandas_groupby, axis=0, numeric_only=False):
+    df_equals(
+        modin_groupby.cummin(axis=axis, numeric_only=numeric_only),
+        pandas_groupby.cummin(axis=axis, numeric_only=numeric_only),
+    )
 
 
 def eval_apply(modin_groupby, pandas_groupby, func):
@@ -1063,16 +1085,18 @@ def eval_dtypes(modin_groupby, pandas_groupby):
     df_equals(modin_groupby.dtypes, pandas_groupby.dtypes)
 
 
-def eval_cummin(modin_groupby, pandas_groupby, axis=0):
-    df_equals(modin_groupby.cummin(axis=axis), pandas_groupby.cummin(axis=axis))
-
-
-def eval_prod(modin_groupby, pandas_groupby):
-    df_equals(modin_groupby.prod(), pandas_groupby.prod())
+def eval_prod(modin_groupby, pandas_groupby, numeric_only=False):
+    df_equals(
+        modin_groupby.prod(numeric_only=numeric_only),
+        pandas_groupby.prod(numeric_only=numeric_only),
+    )
 
 
-def eval_std(modin_groupby, pandas_groupby):
-    modin_df_almost_equals_pandas(modin_groupby.std(), pandas_groupby.std())
+def eval_std(modin_groupby, pandas_groupby, numeric_only=False):
+    modin_df_almost_equals_pandas(
+        modin_groupby.std(numeric_only=numeric_only),
+        pandas_groupby.std(numeric_only=numeric_only),
+    )
 
 
 def eval_aggregate(modin_groupby, pandas_groupby, func):
@@ -1091,8 +1115,11 @@ def eval_max(modin_groupby, pandas_groupby):
     df_equals(modin_groupby.max(), pandas_groupby.max())
 
 
-def eval_var(modin_groupby, pandas_groupby):
-    modin_df_almost_equals_pandas(modin_groupby.var(), pandas_groupby.var())
+def eval_var(modin_groupby, pandas_groupby, numeric_only=False):
+    modin_df_almost_equals_pandas(
+        modin_groupby.var(numeric_only=numeric_only),
+        pandas_groupby.var(numeric_only=numeric_only),
+    )
 
 
 def eval_len(modin_groupby, pandas_groupby):
@@ -1115,13 +1142,22 @@ def eval_value_counts(modin_groupby, pandas_groupby):
     df_equals(modin_groupby.value_counts(), pandas_groupby.value_counts())
 
 
-def eval_median(modin_groupby, pandas_groupby):
-    modin_df_almost_equals_pandas(modin_groupby.median(), pandas_groupby.median())
+def eval_median(modin_groupby, pandas_groupby, numeric_only=False):
+    modin_df_almost_equals_pandas(
+        modin_groupby.median(numeric_only=numeric_only),
+        pandas_groupby.median(numeric_only=numeric_only),
+    )
 
 
-def eval_cumprod(modin_groupby, pandas_groupby, axis=0):
-    df_equals(modin_groupby.cumprod(), pandas_groupby.cumprod())
-    df_equals(modin_groupby.cumprod(axis=axis), pandas_groupby.cumprod(axis=axis))
+def eval_cumprod(modin_groupby, pandas_groupby, axis=0, numeric_only=False):
+    df_equals(
+        modin_groupby.cumprod(numeric_only=numeric_only),
+        pandas_groupby.cumprod(numeric_only=numeric_only),
+    )
+    df_equals(
+        modin_groupby.cumprod(axis=axis, numeric_only=numeric_only),
+        pandas_groupby.cumprod(axis=axis, numeric_only=numeric_only),
+    )
 
 
 def eval_transform(modin_groupby, pandas_groupby, func):
@@ -2023,6 +2059,13 @@ def test_handle_as_index(
             + "https://github.com/pandas-dev/pandas/issues/36698"
         )
 
+    if has_categorical_by and (
+        callable(agg_func) or ("apply_sum" in request.node.callspec.id.split("-"))
+    ):
+        pytest.skip(
+            "TypeError: 'Categorical' with dtype category does not support reduction 'sum'"
+        )
+
     df = pandas.DataFrame(test_groupby_data)
     external_by_cols = GroupBy.validate_by(df.add_prefix("external_"))
 

From aaa4c58610b4e6ee723aa6e96c2ef4d42dc9fb7f Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 18 Apr 2023 00:28:32 +0200
Subject: [PATCH 074/176] add 'numeric_only=False' for
 cumsum/cummax/cummin/cumprod

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 1246008667e..7c0adc719d6 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -370,13 +370,13 @@ def _shift(data, periods, freq, axis, fill_value, is_set_nan_rows=True):
     def nth(self, n, dropna=None):
         return self._default_to_pandas(lambda df: df.nth(n, dropna=dropna))
 
-    def cumsum(self, axis=0, *args, **kwargs):
+    def cumsum(self, axis=0, *args, numeric_only=False, **kwargs):
         return self._check_index_name(
             self._wrap_aggregation(
                 type(self._query_compiler).groupby_cumsum,
                 agg_args=args,
                 agg_kwargs=dict(axis=axis, **kwargs),
-                numeric_only=True,
+                numeric_only=numeric_only,
             )
         )
 
@@ -401,12 +401,12 @@ def filter(self, func, dropna=True, *args, **kwargs):
             lambda df: df.filter(func, dropna=dropna, *args, **kwargs)
         )
 
-    def cummax(self, axis=0, **kwargs):
+    def cummax(self, axis=0, numeric_only=False, **kwargs):
         return self._check_index_name(
             self._wrap_aggregation(
                 type(self._query_compiler).groupby_cummax,
                 agg_kwargs=dict(axis=axis, **kwargs),
-                numeric_only=False,
+                numeric_only=numeric_only,
             )
         )
 
@@ -547,12 +547,12 @@ def __getitem__(self, key):
             **kwargs,
         )
 
-    def cummin(self, axis=0, **kwargs):
+    def cummin(self, axis=0, numeric_only=False, **kwargs):
         return self._check_index_name(
             self._wrap_aggregation(
                 type(self._query_compiler).groupby_cummin,
                 agg_kwargs=dict(axis=axis, **kwargs),
-                numeric_only=False,
+                numeric_only=numeric_only,
             )
         )
 
@@ -834,13 +834,13 @@ def median(self, numeric_only=False):
     def head(self, n=5):
         return self._default_to_pandas(lambda df: df.head(n))
 
-    def cumprod(self, axis=0, *args, **kwargs):
+    def cumprod(self, axis=0, *args, numeric_only=False, **kwargs):
         return self._check_index_name(
             self._wrap_aggregation(
                 type(self._query_compiler).groupby_cumprod,
                 agg_args=args,
                 agg_kwargs=dict(axis=axis, **kwargs),
-                numeric_only=True,
+                numeric_only=numeric_only,
             )
         )
 

From 0ba1a5024c6f90382cd3e2e788eb33bcdeb61f56 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 18 Apr 2023 01:42:44 +0200
Subject: [PATCH 075/176] 'skew' fixes; add 'numeric_only=True' for
 cummax/cummin/cumprod/cumsum tests

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py           |  6 +++++-
 modin/pandas/test/test_groupby.py | 21 ++++++++++++---------
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 7c0adc719d6..3b3b03d16b4 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -185,7 +185,11 @@ def default_handler(*args, **kwargs):
     def ngroups(self):
         return len(self)
 
-    def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs):
+    def skew(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
+        # default behaviour for aggregations; for the reference see
+        # `_op_via_apply` func in pandas==2.0.0
+        if axis is None or axis is no_default:
+            axis = self._axis
         agg_kwargs = dict(
             axis=axis,
             skipna=skipna,
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index f63e0255351..478ff8db585 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -798,7 +798,9 @@ def test_simple_col_groupby():
     modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
     eval_ngroups(modin_groupby, pandas_groupby)
     eval_shift(modin_groupby, pandas_groupby)
-    eval_skew(modin_groupby, pandas_groupby)
+    # TODO: default axis value in that case - `1` that inherited from groupby call
+    # however axis=1 parameter isn't support on BaseOnPython.
+    eval_skew(modin_groupby, pandas_groupby, axis=0)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())
     eval_general(
         modin_groupby,
@@ -1030,10 +1032,10 @@ def eval_ngroups(modin_groupby, pandas_groupby):
     assert modin_groupby.ngroups == pandas_groupby.ngroups
 
 
-def eval_skew(modin_groupby, pandas_groupby, numeric_only=False):
+def eval_skew(modin_groupby, pandas_groupby, numeric_only=False, axis=0):
     modin_df_almost_equals_pandas(
-        modin_groupby.skew(numeric_only=numeric_only),
-        pandas_groupby.skew(numeric_only=numeric_only),
+        modin_groupby.skew(numeric_only=numeric_only, axis=axis),
+        pandas_groupby.skew(numeric_only=numeric_only, axis=axis),
     )
 
 
@@ -1960,7 +1962,8 @@ def test_multi_column_groupby_different_partitions(
         # using a custom comparator that allows slight numeric deviations.
         comparator=try_modin_df_almost_equals_compare,
     )
-    eval___getitem__(md_grp, pd_grp, md_df.columns[1])
+    # TODO: Potentially a bug in pandas
+    # eval___getitem__(md_grp, pd_grp, md_df.columns[1])
     eval___getitem__(md_grp, pd_grp, [md_df.columns[1], md_df.columns[2]])
 
 
@@ -2316,10 +2319,10 @@ def run_test(eval_function, *args, **kwargs):
     run_test(eval_any)
     run_test(eval_apply, func=lambda df: df.mean())
     run_test(eval_count)
-    run_test(eval_cummax)
-    run_test(eval_cummin)
-    run_test(eval_cumprod)
-    run_test(eval_cumsum)
+    run_test(eval_cummax, numeric_only=True)
+    run_test(eval_cummin, numeric_only=True)
+    run_test(eval_cumprod, numeric_only=True)
+    run_test(eval_cumsum, numeric_only=True)
     run_test(eval_dtypes)
     run_test(eval_fillna)
     run_test(eval_groups)

From fc4e1575a65c6e2f8acb1aa1992aa6897ca086c3 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 18 Apr 2023 01:45:16 +0200
Subject: [PATCH 076/176] add fix for 'test_general.py'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_general.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/test/test_general.py b/modin/pandas/test/test_general.py
index e95e41fd4c5..d8060428361 100644
--- a/modin/pandas/test/test_general.py
+++ b/modin/pandas/test/test_general.py
@@ -819,7 +819,7 @@ def test_create_categorical_dataframe_with_duplicate_column_name():
 @pytest.mark.parametrize(
     "func, regex",
     [
-        (lambda df: df.mean(level=0), r"DataFrame\.mean"),
+        (lambda df: df.mean(), r"DataFrame\.mean"),
         (lambda df: df + df, r"DataFrame\.add"),
         (lambda df: df.index, r"DataFrame\.get_axis\(0\)"),
         (

From c56acecd766277e2f24474d3e0f504e4df56cffd Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 18 Apr 2023 02:08:26 +0200
Subject: [PATCH 077/176] remove 'inplace' parameter for 'set_axis' (leftovers)

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../implementations/hdk_on_native/test/test_dataframe.py  | 8 +++-----
 modin/pandas/dataframe.py                                 | 4 ++--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
index eb0deaf862f..49cd9c1a37e 100644
--- a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
+++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
@@ -2499,21 +2499,19 @@ def set_axis(df, **kwargs):
             labels = [
                 np.nan if i % 2 == 0 else sort_index[i] for i in range(len(sort_index))
             ]
-            inplace = kwargs["set_axis_inplace"]
-            res = df.set_axis(labels, axis=1, inplace=inplace)
-            return df if inplace else res
+            return df.set_axis(labels, axis=1, copy=kwargs["copy"])
 
         run_and_compare(
             fn=set_axis,
             data=test_data["float_nan_data"],
             force_lazy=False,
-            set_axis_inplace=True,
+            copy=True,
         )
         run_and_compare(
             fn=set_axis,
             data=test_data["float_nan_data"],
             force_lazy=False,
-            set_axis_inplace=False,
+            copy=False,
         )
 
 
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index fb150ccc1d2..5c5c89f6ecf 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -170,9 +170,9 @@ def __init__(
             if columns is not None and not isinstance(columns, pandas.Index):
                 columns = pandas.Index(columns)
             if columns is not None:
-                self.set_axis(columns, axis=1, inplace=True)
+                self = self.set_axis(columns, axis=1, copy=False)
             if index is not None:
-                self.set_axis(index, axis=0, inplace=True)
+                self = self.set_axis(index, axis=0, copy=False)
             if dtype is not None:
                 casted_obj = self.astype(dtype, copy=False)
                 self._query_compiler = casted_obj._query_compiler

From 2abbf1f7049797070333e4c6c694572cecc2e2ea Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 18 Apr 2023 02:13:57 +0200
Subject: [PATCH 078/176] add 'dtype_backend' parameter for 'read_sql_table'
 base imple

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/io.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modin/core/io/io.py b/modin/core/io/io.py
index 665656363ce..5b7895f5535 100644
--- a/modin/core/io/io.py
+++ b/modin/core/io/io.py
@@ -504,6 +504,7 @@ def read_sql_table(
         parse_dates=None,
         columns=None,
         chunksize=None,
+        dtype_backend=no_default,
     ):  # noqa: PR01
         ErrorMessage.default_to_pandas("`read_sql_table`")
         return cls.from_pandas(
@@ -516,6 +517,7 @@ def read_sql_table(
                 parse_dates=parse_dates,
                 columns=columns,
                 chunksize=chunksize,
+                dtype_backend=dtype_backend,
             )
         )
 

From 22eddf09594fbb028cf402855ff153ce229b5ac1 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 19 Apr 2023 18:15:13 +0200
Subject: [PATCH 079/176] fix 'test_skew_corner_cases' on Dask engine

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 3b3b03d16b4..03e70e1fd79 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -190,16 +190,21 @@ def skew(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
         # `_op_via_apply` func in pandas==2.0.0
         if axis is None or axis is no_default:
             axis = self._axis
-        agg_kwargs = dict(
-            axis=axis,
-            skipna=skipna,
-            numeric_only=numeric_only,
-        )
-        agg_kwargs.update(kwargs)
+
+        # `groupby_skew` can't handle `axis`, `skipna` parameters
+        # that should be added into `agg_kwargs`;
+        # if the values of these parameters are different from the default ones,
+        # then we need to default to pandas
+        if axis != 0 or skipna != True:
+            return self._default_to_pandas(
+                lambda df: df.skew(
+                    axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
+                )
+            )
 
         return self._wrap_aggregation(
             type(self._query_compiler).groupby_skew,
-            agg_kwargs=agg_kwargs,
+            agg_kwargs=kwargs,
             numeric_only=numeric_only,
         )
 

From c8e4190c686595d206f6d5c372f21be771e2ffbf Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 19 Apr 2023 20:25:47 +0200
Subject: [PATCH 080/176] fixes for 'test_groupby.py'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/pandas/query_compiler.py | 4 +---
 modin/pandas/groupby.py                             | 2 +-
 modin/pandas/test/test_groupby.py                   | 6 +++++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index d0678eb256f..2f9435877a6 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -3030,9 +3030,7 @@ def groupby_dtypes(
             by=by,
             axis=axis,
             agg_func=lambda df: df.dtypes,
-            # passing 'group_wise' will make the function be applied to the 'by' columns as well,
-            # this is exactly what we want when 'as_index=False'
-            how="axis_wise" if groupby_kwargs.get("as_index", True) else "group_wise",
+            how="group_wise",
             agg_args=agg_args,
             agg_kwargs=agg_kwargs,
             groupby_kwargs=groupby_kwargs,
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 03e70e1fd79..a0d4805c2ff 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -195,7 +195,7 @@ def skew(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
         # that should be added into `agg_kwargs`;
         # if the values of these parameters are different from the default ones,
         # then we need to default to pandas
-        if axis != 0 or skipna != True:
+        if axis != 0 or not skipna:
             return self._default_to_pandas(
                 lambda df: df.skew(
                     axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 478ff8db585..91b1677e12e 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -1652,7 +1652,11 @@ def test_agg_exceptions(operation):
 
     data1 = {
         "column_to_by": ["foo", "bar", "baz", "bar"] * (N // 4),
-        "nan_column": [None] * N,
+        # Earlier, the type of this column was `object`. In such a situation,
+        # when performing aggregation on different column partitions, different
+        # exceptions were thrown. The exception that engines return to the main
+        # process was non-deterministic, either `TypeError` or `NotImplementedError`.
+        "nan_column": [np.nan] * N,
     }
 
     data2 = {

From 7dcd7a26c9c4c6613be6df4961bbadf37eff26e0 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 19 Apr 2023 23:33:34 +0200
Subject: [PATCH 081/176] fixes for 'test_groupby.py'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py           |  6 +++---
 modin/pandas/test/test_groupby.py | 19 +++++++++++--------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index a0d4805c2ff..46b497163d3 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -193,9 +193,9 @@ def skew(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
 
         # `groupby_skew` can't handle `axis`, `skipna` parameters
         # that should be added into `agg_kwargs`;
-        # if the values of these parameters are different from the default ones,
-        # then we need to default to pandas
-        if axis != 0 or not skipna:
+        # looks like an implicit supported combination of parameters in the
+        # previous implementation: axis == 1, skipna==True
+        if axis != 1 or not skipna:
             return self._default_to_pandas(
                 lambda df: df.skew(
                     axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 91b1677e12e..b7f0c5a3638 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -516,8 +516,7 @@ def maybe_get_columns(df, by):
     ):
         # Not yet supported for non-original-column-from-dataframe Series in by:
         eval___getattr__(modin_groupby, pandas_groupby, "col3")
-        # TODO: Potentially a bug in pandas
-        # eval___getitem__(modin_groupby, pandas_groupby, "col3")
+        eval___getitem__(modin_groupby, pandas_groupby, "col3")
     eval_groups(modin_groupby, pandas_groupby)
     # Intersection of the selection and 'by' columns is not yet supported
     non_by_cols = (
@@ -1032,10 +1031,13 @@ def eval_ngroups(modin_groupby, pandas_groupby):
     assert modin_groupby.ngroups == pandas_groupby.ngroups
 
 
-def eval_skew(modin_groupby, pandas_groupby, numeric_only=False, axis=0):
+def eval_skew(modin_groupby, pandas_groupby, numeric_only=False, axis=None):
+    kwargs = dict(numeric_only=numeric_only)
+    if axis is not None:
+        kwargs["axis"] = axis
     modin_df_almost_equals_pandas(
-        modin_groupby.skew(numeric_only=numeric_only, axis=axis),
-        pandas_groupby.skew(numeric_only=numeric_only, axis=axis),
+        modin_groupby.skew(**kwargs),
+        pandas_groupby.skew(**kwargs),
     )
 
 
@@ -1234,7 +1236,8 @@ def test(grp):
 
         return test
 
-    # issue-#3252
+    # issue-#3252, https://github.com/pandas-dev/pandas/issues/52760
+    """
     eval_general(
         md_grp,
         pd_grp,
@@ -1247,6 +1250,7 @@ def test(grp):
         build_list_agg(["mean", "count"]),
         comparator=build_types_asserter(df_equals),
     )
+    """
     # Explicit default-to-pandas test
     eval_general(
         md_grp,
@@ -1966,8 +1970,7 @@ def test_multi_column_groupby_different_partitions(
         # using a custom comparator that allows slight numeric deviations.
         comparator=try_modin_df_almost_equals_compare,
     )
-    # TODO: Potentially a bug in pandas
-    # eval___getitem__(md_grp, pd_grp, md_df.columns[1])
+    eval___getitem__(md_grp, pd_grp, md_df.columns[1])
     eval___getitem__(md_grp, pd_grp, [md_df.columns[1], md_df.columns[2]])
 
 

From 024b626d1cb0d316eab34cb2ca46bced3d495826 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 24 Apr 2023 19:56:39 +0200
Subject: [PATCH 082/176] fix 'test_to_dense'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_api.py | 1 -
 modin/pandas/test/test_io.py  | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/test/test_api.py b/modin/pandas/test/test_api.py
index c0515bdb09b..d31135b90e0 100644
--- a/modin/pandas/test/test_api.py
+++ b/modin/pandas/test/test_api.py
@@ -43,7 +43,6 @@ def test_top_level_api_equality():
         "Panel",  # This is deprecated and throws a warning every time.
         "SparseSeries",  # depreceted since pandas 1.0, not present in 1.4+
         "SparseDataFrame",  # depreceted since pandas 1.0, not present in 1.4+
-        "SparseArray",  # usually not available in top-level namespace
     ]
 
     ignore_modin = [
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index db16bd8ab51..2ea60f4bc31 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -2630,7 +2630,8 @@ def test_from_spmatrix():
     reason="The reason of tests fail in `cloud` mode is unknown for now - issue #3264",
 )
 def test_to_dense():
-    modin_df, pandas_df = create_test_dfs({"col1": pandas.SparseArray([0, 1, 0])})
+    data = {"col1": pandas.arrays.SparseArray([0, 1, 0])}
+    modin_df, pandas_df = create_test_dfs(data)
     df_equals(modin_df.sparse.to_dense(), pandas_df.sparse.to_dense())
 
 

From 6d88c9245bd60557a2cd6179590d0c5a78b7e8e9 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 24 Apr 2023 20:03:00 +0200
Subject: [PATCH 083/176] fix 'test_read_spss'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/io.py          | 8 ++++++--
 modin/pandas/test/test_io.py | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/modin/core/io/io.py b/modin/core/io/io.py
index 5b7895f5535..97466afe798 100644
--- a/modin/core/io/io.py
+++ b/modin/core/io/io.py
@@ -550,9 +550,13 @@ def read_sql_query(
         summary="Load an SPSS file from the file path, returning a query compiler",
         returns=_doc_returns_qc,
     )
-    def read_spss(cls, path, usecols, convert_categoricals):  # noqa: PR01
+    def read_spss(
+        cls, path, usecols, convert_categoricals, dtype_backend
+    ):  # noqa: PR01
         ErrorMessage.default_to_pandas("`read_spss`")
-        return cls.from_pandas(pandas.read_spss(path, usecols, convert_categoricals))
+        return cls.from_pandas(
+            pandas.read_spss(path, usecols, convert_categoricals, dtype_backend)
+        )
 
     @classmethod
     @_inherit_docstrings(pandas.DataFrame.to_sql, apilink="pandas.DataFrame.to_sql")
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 2ea60f4bc31..12ab5f52df7 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -2585,7 +2585,7 @@ class TestSpss:
     # In case of defaulting to pandas, it's enough
     # to check that the parameters are passed to pandas.
     def test_read_spss(self):
-        test_args = ("fake_path", ["A"], False)
+        test_args = ("fake_path", ["A"], False, lib.no_default)
         with mock.patch(
             "pandas.read_spss", return_value=pandas.DataFrame([])
         ) as read_spss:

From eeea136a5c3da7ec01af643ec8504afa2c8f5023 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 24 Apr 2023 21:56:13 +0200
Subject: [PATCH 084/176] fix 'test_read_orc'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_io.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 12ab5f52df7..c427f30a574 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -2572,7 +2572,11 @@ class TestOrc:
     # to check that the parameters are passed to pandas.
     def test_read_orc(self):
         test_args = ("fake_path",)
-        test_kwargs = {"columns": ["A"], "fake_kwarg": "some_pyarrow_parameter"}
+        test_kwargs = dict(
+            columns=["A"],
+            dtype_backend=lib.no_default,
+            fake_kwarg="some_pyarrow_parameter",
+        )
         with mock.patch(
             "pandas.read_orc", return_value=pandas.DataFrame([])
         ) as read_orc:

From 1ac3c8619090a71740832ce8567a744909d30524 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <fam1ly.n4me@yandex.ru>
Date: Fri, 28 Apr 2023 10:37:44 +0300
Subject: [PATCH 085/176] Fix df.mean(numeric_only=True)

Signed-off-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
---
 modin/core/storage_formats/pandas/query_compiler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 81ce35a84e8..b4115d25fe1 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -883,7 +883,7 @@ def mean(self, axis, **kwargs):
         # TODO-FIX: this function may work incorrectly with user-defined "numeric" values.
         # Since `count(numeric_only=True)` discards all unknown "numeric" types, we can get incorrect
         # divisor inside the reduce function.
-        def map_fn(df, **kwargs):
+        def map_fn(df, numeric_only=False, **kwargs):
             """
             Perform Map phase of the `mean`.
 
@@ -891,8 +891,8 @@ def map_fn(df, **kwargs):
             """
             result = pandas.DataFrame(
                 {
-                    "sum": df.sum(axis=axis, skipna=skipna),
-                    "count": df.count(axis=axis, numeric_only=True),
+                    "sum": df.sum(axis=axis, skipna=skipna, numeric_only=numeric_only),
+                    "count": df.count(axis=axis, numeric_only=numeric_only),
                 }
             )
             return result if axis else result.T

From d5a67c4338e5d32942eeb4c75bb11df6c96b3f84 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <fam1ly.n4me@yandex.ru>
Date: Fri, 28 Apr 2023 10:38:21 +0300
Subject: [PATCH 086/176] Properly validate kwargs for stat functions

Signed-off-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
---
 modin/pandas/base.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 220ef51c5a9..d78c0711516 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1868,6 +1868,10 @@ def _stat_operation(
         """
         axis = self._get_axis_number(axis)
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
+        if op_name == "median":
+            numpy_compat.function.validate_median((), kwargs)
+        else:
+            numpy_compat.function.validate_stat_func((), kwargs, fname=op_name)
 
         if not numeric_only:
             # fix for 'test_reduce_specific'

From 58dd2d7582e4d16760eb5cd63160f6a26d2e7c41 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <fam1ly.n4me@yandex.ru>
Date: Fri, 28 Apr 2023 10:39:07 +0300
Subject: [PATCH 087/176] Improve tests for udf, stop checking df stat funcs
 defaulting to pandas

Signed-off-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
---
 modin/pandas/test/dataframe/test_udf.py    | 5 +++--
 modin/pandas/test/dataframe/test_window.py | 5 +----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/modin/pandas/test/dataframe/test_udf.py b/modin/pandas/test/dataframe/test_udf.py
index e641bf1ceac..f7046495714 100644
--- a/modin/pandas/test/dataframe/test_udf.py
+++ b/modin/pandas/test/dataframe/test_udf.py
@@ -19,6 +19,7 @@
 import modin.pandas as pd
 
 from pandas.core.dtypes.common import is_list_like
+from pandas._libs.lib import no_default
 from modin.pandas.test.utils import (
     random_state,
     df_equals,
@@ -136,11 +137,11 @@ def test_apply_key_error(func):
 
 
 @pytest.mark.parametrize("axis", [0, 1])
-@pytest.mark.parametrize("level", [None, -1, 0, 1])
+@pytest.mark.parametrize("level", [no_default, None, -1, 0, 1])
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 @pytest.mark.parametrize("func", ["kurt", "count", "sum", "mean", "all", "any"])
 def test_apply_text_func_with_level(level, data, func, axis):
-    func_kwargs = {"level": level, "axis": axis}
+    func_kwargs = dict(axis=axis, **({"level": level} if level is not no_default else {}))
     rows_number = len(next(iter(data.values())))  # length of the first data column
     level_0 = np.random.choice([0, 1, 2], rows_number)
     level_1 = np.random.choice([3, 4, 5], rows_number)
diff --git a/modin/pandas/test/dataframe/test_window.py b/modin/pandas/test/dataframe/test_window.py
index 3831e1355a7..96083a9e32a 100644
--- a/modin/pandas/test/dataframe/test_window.py
+++ b/modin/pandas/test/dataframe/test_window.py
@@ -502,10 +502,7 @@ def test_median_skew_std_var_sem_1953(method):
     modin_df = pd.DataFrame(data, index=arrays)
     pandas_df = pandas.DataFrame(data, index=arrays)
 
-    # These shouldn't default to pandas: follow up on
-    # https://github.com/modin-project/modin/issues/1953
-    with warns_that_defaulting_to_pandas():
-        eval_general(modin_df, pandas_df, lambda df: getattr(df, method)())
+    eval_general(modin_df, pandas_df, lambda df: getattr(df, method)())
 
 
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)

From 6184f47f5eab468884668ae9d9965ff28393bcb2 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <fam1ly.n4me@yandex.ru>
Date: Fri, 28 Apr 2023 10:58:38 +0300
Subject: [PATCH 088/176] fixup! Properly validate kwargs for stat functions

---
 modin/pandas/base.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index d78c0711516..0e78abe336f 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1870,6 +1870,9 @@ def _stat_operation(
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
         if op_name == "median":
             numpy_compat.function.validate_median((), kwargs)
+        elif op_name in ("sem", "var", "std"):
+            val_kwargs = {k: v for k, v in kwargs.items() if k != "ddof"}
+            numpy_compat.function.validate_stat_ddof_func((), val_kwargs, fname=op_name)
         else:
             numpy_compat.function.validate_stat_func((), kwargs, fname=op_name)
 

From 0c1cd44a3e89a9f3d44988a2dd4cde968188f024 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <fam1ly.n4me@yandex.ru>
Date: Fri, 28 Apr 2023 11:00:39 +0300
Subject: [PATCH 089/176] Fix black formatting

Signed-off-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
---
 modin/pandas/test/dataframe/test_udf.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/test/dataframe/test_udf.py b/modin/pandas/test/dataframe/test_udf.py
index f7046495714..98ea9bc0e61 100644
--- a/modin/pandas/test/dataframe/test_udf.py
+++ b/modin/pandas/test/dataframe/test_udf.py
@@ -141,7 +141,9 @@ def test_apply_key_error(func):
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 @pytest.mark.parametrize("func", ["kurt", "count", "sum", "mean", "all", "any"])
 def test_apply_text_func_with_level(level, data, func, axis):
-    func_kwargs = dict(axis=axis, **({"level": level} if level is not no_default else {}))
+    func_kwargs = dict(
+        axis=axis, **({"level": level} if level is not no_default else {})
+    )
     rows_number = len(next(iter(data.values())))  # length of the first data column
     level_0 = np.random.choice([0, 1, 2], rows_number)
     level_1 = np.random.choice([3, 4, 5], rows_number)

From 09d7d5fca5a7456be9b8a32348e6d967423a6429 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 28 Apr 2023 17:26:50 +0200
Subject: [PATCH 090/176] fix flake8

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/dataframe/test_window.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modin/pandas/test/dataframe/test_window.py b/modin/pandas/test/dataframe/test_window.py
index 96083a9e32a..bc8f1467b45 100644
--- a/modin/pandas/test/dataframe/test_window.py
+++ b/modin/pandas/test/dataframe/test_window.py
@@ -42,7 +42,6 @@
     default_to_pandas_ignore_string,
 )
 from modin.config import NPartitions, StorageFormat
-from modin.test.test_utils import warns_that_defaulting_to_pandas
 
 NPartitions.put(4)
 

From 0ddb2c16905130d85a72cdaf08832d664f0d21ba Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <fam1ly.n4me@yandex.ru>
Date: Wed, 3 May 2023 12:21:39 +0300
Subject: [PATCH 091/176] Uncomment all tests to see the status

Signed-off-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
---
 modin/pandas/test/dataframe/test_default.py | 2 +-
 modin/pandas/test/test_groupby.py           | 3 +--
 modin/pandas/test/test_series.py            | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index c1206e41654..47ef4eb6c75 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -72,7 +72,7 @@
         ("interpolate", None),
         ("mask", lambda df: {"cond": df != 0}),
         ("pct_change", None),
-        # ("to_xarray", None),
+        ("to_xarray", None),
         ("flags", None),
         ("set_flags", lambda df: {"allows_duplicate_labels": False}),
     ],
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 00d48655d31..4aa5ddd679b 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -1261,7 +1261,6 @@ def test(grp):
         return test
 
     # issue-#3252, https://github.com/pandas-dev/pandas/issues/52760
-    """
     eval_general(
         md_grp,
         pd_grp,
@@ -1274,7 +1273,7 @@ def test(grp):
         build_list_agg(["mean", "count"]),
         comparator=build_types_asserter(df_equals),
     )
-    """
+
     # Explicit default-to-pandas test
     eval_general(
         md_grp,
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 22d05593644..34c26fc8f7b 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -3410,7 +3410,6 @@ def test_to_timestamp():
         series.to_period().to_timestamp()
 
 
-@pytest.mark.skip
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_to_xarray(data):
     modin_series, _ = create_test_series(data)  # noqa: F841

From 6702b3a32f1eb1966fb6982bbd378f9b0a152cc6 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 12:40:20 +0200
Subject: [PATCH 092/176] fix after merge

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 7274aa7666f..b5de538fd0f 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -140,7 +140,6 @@ def __override(self, **kwargs):
             df=self._df,
             by=self._by,
             axis=self._axis,
-            squeeze=self._squeeze,
             idx_name=self._idx_name,
             drop=self._drop,
             **self._kwargs,
@@ -1621,8 +1620,7 @@ def aggregate(self, func=None, *args, **kwargs):
             # because there is no need to identify which original column's aggregation
             # the new column represents. alternatively we could give the query compiler
             # a hint that it's for a series, not a dataframe.
-            maybe_squeezed = result.squeeze() if self._squeeze else result
-            return maybe_squeezed.set_axis(labels=self._try_get_str_func(func), axis=1)
+            return result.set_axis(labels=self._try_get_str_func(func), axis=1)
         else:
             return super().aggregate(func, *args, **kwargs)
 

From 548d1829f22588e8aa7b3fc0d1bbb84424ecbbb5 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 13:23:11 +0200
Subject: [PATCH 093/176] fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 environment-dev.yml                     | 2 +-
 modin/pandas/base.py                    | 4 +---
 requirements/env_hdk.yml                | 2 +-
 requirements/env_unidist.yml            | 2 +-
 requirements/requirements-no-engine.yml | 2 +-
 5 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index cc776f06dad..b745af957bd 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -12,7 +12,7 @@ dependencies:
   - dask>=2.22.0
   - distributed>=2.22.0
   - fsspec
-  # - xarray
+  - xarray
   - Jinja2
   - scipy
   - pip
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 5ca55ca13b8..23582609cc1 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1020,13 +1020,11 @@ def between_time(
         inclusive="both",
         axis=None,
     ):  # noqa: PR01, RT01, D200
-        left_inclusive, right_inclusive = validate_inclusive(inclusive)
         return self._create_or_update_from_compiler(
             self._query_compiler.between_time(
                 start_time=pandas.core.tools.times.to_time(start_time),
                 end_time=pandas.core.tools.times.to_time(end_time),
-                include_start=left_inclusive,
-                include_end=right_inclusive,
+                inclusive=inclusive,
                 axis=self._get_axis_number(axis),
             )
         )
diff --git a/requirements/env_hdk.yml b/requirements/env_hdk.yml
index 38c58b6e87d..ae7a90ef0e4 100644
--- a/requirements/env_hdk.yml
+++ b/requirements/env_hdk.yml
@@ -22,7 +22,7 @@ dependencies:
   - xgboost>=1.7.1,<2.0.0
   - scikit-learn-intelex
   - matplotlib
-  # - xarray
+  - xarray
   - pytables
   - fastparquet
   # code linters
diff --git a/requirements/env_unidist.yml b/requirements/env_unidist.yml
index 75fb75ad4b7..01e39b4a019 100644
--- a/requirements/env_unidist.yml
+++ b/requirements/env_unidist.yml
@@ -7,7 +7,7 @@ dependencies:
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
-  # - xarray
+  - xarray
   - Jinja2
   - scipy
   - pip
diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml
index 20a63c7bef8..69179b80d78 100644
--- a/requirements/requirements-no-engine.yml
+++ b/requirements/requirements-no-engine.yml
@@ -5,7 +5,7 @@ dependencies:
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
-  # - xarray
+  - xarray
   - Jinja2
   - scipy
   - pip

From 872fa5801a7ee244cba9fa9b4b3bf4a62d3070a0 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 13:33:08 +0200
Subject: [PATCH 094/176] fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 23582609cc1..73b7ddbac5c 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3540,9 +3540,7 @@ def __getitem__(self, key):
         # see if we can slice the rows
         # This lets us reuse code in pandas to error check
         indexer = None
-        if isinstance(key, slice) or (
-            isinstance(key, str) and (not self._is_dataframe or key not in self.columns)
-        ):
+        if isinstance(key, slice):
             indexer = self.index._convert_slice_indexer(key, kind="getitem")
         if indexer is not None:
             return self._getitem_slice(indexer)

From f941c5d2255da42af214d45d577e0e0eafc1f933 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 13:35:30 +0200
Subject: [PATCH 095/176] fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 73b7ddbac5c..71eca12118a 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -34,7 +34,6 @@
     validate_percentile,
     validate_bool_kwarg,
     validate_ascending,
-    validate_inclusive,
 )
 from pandas._libs.lib import no_default, NoDefault
 from pandas._libs.tslibs import to_offset

From 0c92b9e2a6b0b215309994d59ca7f28101449b19 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 13:48:03 +0200
Subject: [PATCH 096/176] fix expanding

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_expanding.py | 24 +++++++-----------------
 modin/pandas/window.py              |  9 ++-------
 2 files changed, 9 insertions(+), 24 deletions(-)

diff --git a/modin/pandas/test/test_expanding.py b/modin/pandas/test/test_expanding.py
index 9b2f587f3d1..e4017d8b1bd 100644
--- a/modin/pandas/test/test_expanding.py
+++ b/modin/pandas/test/test_expanding.py
@@ -64,9 +64,9 @@ def create_test_series(vals):
 def test_dataframe(data, min_periods, axis, method, kwargs):
     eval_general(
         *create_test_dfs(data),
-        lambda df: getattr(
-            df.expanding(min_periods=min_periods, center=True, axis=axis), method
-        )(**kwargs)
+        lambda df: getattr(df.expanding(min_periods=min_periods, axis=axis), method)(
+            **kwargs
+        )
     )
 
 
@@ -79,7 +79,7 @@ def test_dataframe_corr_cov(data, min_periods, axis, method):
         eval_general(
             *create_test_dfs(data),
             lambda df: getattr(
-                df.expanding(min_periods=min_periods, center=True, axis=axis), method
+                df.expanding(min_periods=min_periods, axis=axis), method
             )()
         )
 
@@ -104,12 +104,10 @@ def test_dataframe_agg(data, min_periods):
     pandas_df = pandas.DataFrame(data)
     pandas_expanded = pandas_df.expanding(
         min_periods=min_periods,
-        center=True,
         axis=0,
     )
     modin_expanded = modin_df.expanding(
         min_periods=min_periods,
-        center=True,
         axis=0,
     )
     # aggregates are only supported on axis 0
@@ -145,9 +143,7 @@ def test_dataframe_agg(data, min_periods):
 def test_series(data, min_periods, method, kwargs):
     eval_general(
         *create_test_series(data),
-        lambda df: getattr(df.expanding(min_periods=min_periods, center=True), method)(
-            **kwargs
-        )
+        lambda df: getattr(df.expanding(min_periods=min_periods), method)(**kwargs)
     )
 
 
@@ -155,14 +151,8 @@ def test_series(data, min_periods, method, kwargs):
 @pytest.mark.parametrize("min_periods", [None, 5])
 def test_series_agg(data, min_periods):
     modin_series, pandas_series = create_test_series(data)
-    pandas_expanded = pandas_series.expanding(
-        min_periods=min_periods,
-        center=True,
-    )
-    modin_expanded = modin_series.expanding(
-        min_periods=min_periods,
-        center=True,
-    )
+    pandas_expanded = pandas_series.expanding(min_periods=min_periods)
+    modin_expanded = modin_series.expanding(min_periods=min_periods)
 
     df_equals(modin_expanded.aggregate(np.sum), pandas_expanded.aggregate(np.sum))
     df_equals(
diff --git a/modin/pandas/window.py b/modin/pandas/window.py
index 7895930dbad..473b2a91d97 100644
--- a/modin/pandas/window.py
+++ b/modin/pandas/window.py
@@ -301,15 +301,10 @@ def rank(
     excluded=[pandas.core.window.expanding.Expanding.__init__],
 )
 class Expanding(ClassLogger):
-    def __init__(self, dataframe, min_periods=1, center=None, axis=0, method="single"):
+    def __init__(self, dataframe, min_periods=1, axis=0, method="single"):
         self._dataframe = dataframe
         self._query_compiler = dataframe._query_compiler
-        self.expanding_args = [
-            min_periods,
-            center,
-            axis,
-            method,
-        ]
+        self.expanding_args = [min_periods, axis, method]
         self.axis = axis
 
     def aggregate(self, func, *args, **kwargs):

From 52c0688d03f5df1728178e7250cdebe6579e9f06 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 14:31:47 +0200
Subject: [PATCH 097/176] use pandas==2.0.1

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 environment-dev.yml                     | 2 +-
 modin/pandas/__init__.py                | 2 +-
 modin/pandas/groupby.py                 | 6 +++---
 modin/pandas/test/test_groupby.py       | 1 -
 requirements/env_hdk.yml                | 2 +-
 requirements/env_unidist.yml            | 2 +-
 requirements/requirements-no-engine.yml | 2 +-
 setup.py                                | 2 +-
 8 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index b745af957bd..69c97da1939 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -2,7 +2,7 @@ name: modin
 channels:
   - conda-forge
 dependencies:
-  - pandas==2.0.0
+  - pandas==2.0.1
   - numpy>=1.18.5
   - ray-default>=1.13.0
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
index 2f554a13caa..f75f5cac0b4 100644
--- a/modin/pandas/__init__.py
+++ b/modin/pandas/__init__.py
@@ -14,7 +14,7 @@
 import pandas
 import warnings
 
-__pandas_version__ = "2.0.0"
+__pandas_version__ = "2.0.1"
 
 if pandas.__version__ != __pandas_version__:
     warnings.warn(
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index b5de538fd0f..73beafce1e8 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -199,7 +199,7 @@ def ngroups(self):
 
     def skew(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
         # default behaviour for aggregations; for the reference see
-        # `_op_via_apply` func in pandas==2.0.0
+        # `_op_via_apply` func in pandas==2.0.1
         if axis is None or axis is no_default:
             axis = self._axis
 
@@ -228,11 +228,11 @@ def ffill(self, limit=None):
         )
         return self.fillna(limit=limit, method="ffill")
 
-    def sem(self, ddof=1):
+    def sem(self, ddof=1, numeric_only=False):
         return self._wrap_aggregation(
             type(self._query_compiler).groupby_sem,
             agg_kwargs=dict(ddof=ddof),
-            numeric_only=True,
+            numeric_only=numeric_only,
         )
 
     def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None):
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index ee87acb8aa3..5350debf87a 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -384,7 +384,6 @@ def maybe_get_columns(df, by):
     eval_ngroups(modin_groupby, pandas_groupby)
     eval_shift(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())
-    eval_general(modin_groupby, pandas_groupby, lambda df: df.pad())
     if as_index:
         eval_general(modin_groupby, pandas_groupby, lambda df: df.nth(0))
     else:
diff --git a/requirements/env_hdk.yml b/requirements/env_hdk.yml
index ae7a90ef0e4..e8663d499e0 100644
--- a/requirements/env_hdk.yml
+++ b/requirements/env_hdk.yml
@@ -2,7 +2,7 @@ name: modin_on_hdk
 channels:
   - conda-forge
 dependencies:
-  - pandas==2.0.0
+  - pandas==2.0.1
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - numpy>=1.18.5
   - fsspec
diff --git a/requirements/env_unidist.yml b/requirements/env_unidist.yml
index 01e39b4a019..e1181e73bcf 100644
--- a/requirements/env_unidist.yml
+++ b/requirements/env_unidist.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
 dependencies:
   - unidist-mpi>=0.2.1
-  - pandas==2.0.0
+  - pandas==2.0.1
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml
index 69179b80d78..76d6b157616 100644
--- a/requirements/requirements-no-engine.yml
+++ b/requirements/requirements-no-engine.yml
@@ -1,7 +1,7 @@
 channels:
   - conda-forge
 dependencies:
-  - pandas==2.0.0
+  - pandas==2.0.1
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
diff --git a/setup.py b/setup.py
index 12642d72b89..2b5225783e6 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@ def make_distribution(self):
     long_description=long_description,
     long_description_content_type="text/markdown",
     install_requires=[
-        "pandas==2.0.0",
+        "pandas==2.0.1",
         "packaging",
         "numpy>=1.18.5",
         "fsspec",

From 916aed3abd586ff2166a920e6400c8d99655ed50 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 14:40:31 +0200
Subject: [PATCH 098/176] xarray still does not work with pandas>=2.0.0

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 environment-dev.yml                         | 2 +-
 modin/pandas/test/dataframe/test_default.py | 2 +-
 modin/pandas/test/test_series.py            | 1 +
 requirements/env_hdk.yml                    | 2 +-
 requirements/env_unidist.yml                | 2 +-
 requirements/requirements-no-engine.yml     | 2 +-
 6 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 69c97da1939..89dff2554d8 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -12,7 +12,7 @@ dependencies:
   - dask>=2.22.0
   - distributed>=2.22.0
   - fsspec
-  - xarray
+  # - xarray
   - Jinja2
   - scipy
   - pip
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index d6e3b0882d0..f76478d9e1e 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -71,7 +71,7 @@
         ("interpolate", None),
         ("mask", lambda df: {"cond": df != 0}),
         ("pct_change", None),
-        ("to_xarray", None),
+        # ("to_xarray", None),
         ("flags", None),
         ("set_flags", lambda df: {"allows_duplicate_labels": False}),
     ],
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 48a810b3869..a20f972ba00 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -3408,6 +3408,7 @@ def test_to_timestamp():
         series.to_period().to_timestamp()
 
 
+@pytest.mark.skip
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_to_xarray(data):
     modin_series, _ = create_test_series(data)  # noqa: F841
diff --git a/requirements/env_hdk.yml b/requirements/env_hdk.yml
index e8663d499e0..2106c5b454e 100644
--- a/requirements/env_hdk.yml
+++ b/requirements/env_hdk.yml
@@ -22,7 +22,7 @@ dependencies:
   - xgboost>=1.7.1,<2.0.0
   - scikit-learn-intelex
   - matplotlib
-  - xarray
+  # - xarray
   - pytables
   - fastparquet
   # code linters
diff --git a/requirements/env_unidist.yml b/requirements/env_unidist.yml
index e1181e73bcf..db3434fbcec 100644
--- a/requirements/env_unidist.yml
+++ b/requirements/env_unidist.yml
@@ -7,7 +7,7 @@ dependencies:
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
-  - xarray
+  # - xarray
   - Jinja2
   - scipy
   - pip
diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml
index 76d6b157616..734925dd07c 100644
--- a/requirements/requirements-no-engine.yml
+++ b/requirements/requirements-no-engine.yml
@@ -5,7 +5,7 @@ dependencies:
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
-  - xarray
+  # - xarray
   - Jinja2
   - scipy
   - pip

From b87a421c95fb5d864830d3aa909f3571d0ca9ed6 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 15:19:46 +0200
Subject: [PATCH 099/176] fix describe

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/doc_utils.py     |  2 --
 .../core/storage_formats/base/query_compiler.py  | 13 +++----------
 .../storage_formats/pandas/query_compiler.py     | 16 +++-------------
 modin/pandas/base.py                             |  6 +-----
 4 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/modin/core/storage_formats/base/doc_utils.py b/modin/core/storage_formats/base/doc_utils.py
index 963ba3f7941..847ddaecd2f 100644
--- a/modin/core/storage_formats/base/doc_utils.py
+++ b/modin/core/storage_formats/base/doc_utils.py
@@ -244,8 +244,6 @@ def doc_reduce_agg(method, refer_to, params=None, extra_params=None):
     if params is None:
         params = """
         axis : {{0, 1}}
-        level : None, default: None
-            Serves the compatibility purpose. Always has to be None.
         numeric_only : bool, optional"""
 
     extra_params_map = {
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 37647146c11..2c1c384949d 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -2129,18 +2129,13 @@ def var(self, **kwargs):  # noqa: PR02
     # END Abstract column/row partitions reduce operations
 
     @doc_utils.add_refer_to("DataFrame.describe")
-    def describe(
-        self,
-        percentiles: np.ndarray,
-        datetime_is_numeric: bool,
-    ):
+    def describe(self, percentiles: np.ndarray):
         """
         Generate descriptive statistics.
 
         Parameters
         ----------
         percentiles : list-like
-        datetime_is_numeric : bool
 
         Returns
         -------
@@ -2151,8 +2146,6 @@ def describe(
         return DataFrameDefault.register(pandas.DataFrame.describe)(
             self,
             percentiles=percentiles,
-            datetime_is_numeric=datetime_is_numeric,
-            include="all",
         )
 
     # Map across rows/columns
@@ -4615,11 +4608,11 @@ def dt_freq(self):
         return DateTimeDefault.register(pandas.Series.dt.freq)(self)
 
     @doc_utils.add_refer_to("Series.dt.unit")
-    def dt_unit(self):
+    def dt_unit(self):  # noqa: RT01
         return DateTimeDefault.register(pandas.Series.dt.unit)(self)
 
     @doc_utils.add_refer_to("Series.dt.as_unit")
-    def dt_as_unit(self, *args, **kwargs):
+    def dt_as_unit(self, *args, **kwargs):  # noqa: PR01, RT01
         return DateTimeDefault.register(pandas.Series.dt.as_unit)(self, *args, **kwargs)
 
     @doc_utils.doc_dt_timestamp(
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 571622897a8..93be211634e 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1999,18 +1999,12 @@ def last_valid_index_builder(df):
 
     # END Column/Row partitions reduce operations
 
-    def describe(
-        self,
-        percentiles: np.ndarray,
-        datetime_is_numeric: bool,
-    ):
+    def describe(self, percentiles: np.ndarray):
         # Use pandas to calculate the correct columns
         empty_df = (
             pandas.DataFrame(columns=self.columns)
             .astype(self.dtypes)
-            .describe(
-                percentiles, datetime_is_numeric=datetime_is_numeric, include="all"
-            )
+            .describe(percentiles, include="all")
         )
         new_index = empty_df.index
 
@@ -2025,11 +2019,7 @@ def describe_builder(df, internal_indices=[]):  # pragma: no cover
             # Thus, we must reindex each partition with the global new_index.
             return (
                 df.iloc[:, internal_indices]
-                .describe(
-                    percentiles=percentiles,
-                    datetime_is_numeric=datetime_is_numeric,
-                    include="all",
-                )
+                .describe(percentiles=percentiles, include="all")
                 .reindex(new_index)
             )
 
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 71eca12118a..40f22e50788 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1216,11 +1216,7 @@ def describe(
             # Match pandas error from concatenting empty list of series descriptions.
             raise ValueError("No objects to concatenate")
         return self.__constructor__(
-            query_compiler=data._query_compiler.describe(
-                percentiles=percentiles,
-                include=include,
-                exclude=exclude,
-            )
+            query_compiler=data._query_compiler.describe(percentiles=percentiles)
         )
 
     def diff(self, periods=1, axis=0):  # noqa: PR01, RT01, D200

From b53747a5fea25c1353219271de2ea8234353b02d Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 15:43:28 +0200
Subject: [PATCH 100/176] use 'format=mixed' for hdk tests

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../implementations/hdk_on_native/test/test_dataframe.py       | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
index 72ca8676daf..2736179fd8b 100644
--- a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
+++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
@@ -1850,7 +1850,8 @@ class TestDateTime:
                 "2018-10-26 13:00:15",
                 "2020-10-26 04:00:15",
                 "2020-10-26",
-            ]
+            ],
+            format="mixed",
         ),
     }
 

From 44b59513aa616555cb84b6716ff9fe9f72bd276a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 16:59:08 +0200
Subject: [PATCH 101/176] fix describe

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py | 1 +
 modin/pandas/base.py                              | 1 +
 modin/pandas/test/dataframe/test_reduce.py        | 5 ++---
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 2c1c384949d..d64a247502d 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -2146,6 +2146,7 @@ def describe(self, percentiles: np.ndarray):
         return DataFrameDefault.register(pandas.DataFrame.describe)(
             self,
             percentiles=percentiles,
+            include="all",
         )
 
     # Map across rows/columns
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 40f22e50788..63d5aab3154 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1199,6 +1199,7 @@ def describe(
             if (include is None) and (exclude is None):
                 # when some numerics are found, keep only numerics
                 default_include: list[npt.DTypeLike] = [np.number]
+                default_include.append("datetime")
                 data = self.select_dtypes(include=default_include)
                 if len(data.columns) == 0:
                     data = self
diff --git a/modin/pandas/test/dataframe/test_reduce.py b/modin/pandas/test/dataframe/test_reduce.py
index 41b0d48b87f..305d9a00223 100644
--- a/modin/pandas/test/dataframe/test_reduce.py
+++ b/modin/pandas/test/dataframe/test_reduce.py
@@ -118,8 +118,7 @@ def test_describe(data, percentiles):
 
 
 @pytest.mark.parametrize("has_numeric_column", [False, True])
-@pytest.mark.parametrize("datetime_is_numeric", [True, False, None])
-def test_2195(datetime_is_numeric, has_numeric_column):
+def test_2195(has_numeric_column):
     data = {
         "categorical": pd.Categorical(["d"] * 10**2),
         "date": [np.datetime64("2000-01-01")] * 10**2,
@@ -133,7 +132,7 @@ def test_2195(datetime_is_numeric, has_numeric_column):
     eval_general(
         modin_df,
         pandas_df,
-        lambda df: df.describe(datetime_is_numeric=datetime_is_numeric),
+        lambda df: df.describe(),
     )
 
 

From 63cf78b3ceb0bfc77f781e658936ca57e245a062 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 20:57:17 +0200
Subject: [PATCH 102/176] fixes for groupby

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py           | 12 ++++++++++--
 modin/pandas/test/test_groupby.py | 24 +++++++++++++++++-------
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 73beafce1e8..8d83fcc4536 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -335,7 +335,11 @@ def min(self, numeric_only=False, min_count=-1):
             agg_kwargs=dict(min_count=min_count),
         )
 
-    def idxmax(self, axis=0, skipna=True, numeric_only=True):
+    def idxmax(self, axis=None, skipna=True, numeric_only=False):
+        # default behaviour for aggregations; for the reference see
+        # `_op_via_apply` func in pandas==2.0.1
+        if axis is None:
+            axis = self._axis
         return self._wrap_aggregation(
             type(self._query_compiler).groupby_idxmax,
             agg_kwargs=dict(axis=axis, skipna=skipna),
@@ -667,7 +671,11 @@ def bfill(self, limit=None):
         )
         return self.fillna(limit=limit, method="bfill")
 
-    def idxmin(self, axis=0, skipna=True, numeric_only=True):
+    def idxmin(self, axis=None, skipna=True, numeric_only=False):
+        # default behaviour for aggregations; for the reference see
+        # `_op_via_apply` func in pandas==2.0.1
+        if axis is None:
+            axis = self._axis
         return self._wrap_aggregation(
             type(self._query_compiler).groupby_idxmin,
             agg_kwargs=dict(axis=axis, skipna=skipna),
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 5350debf87a..fbba5a10d8a 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -203,7 +203,10 @@ def test_mixed_dtypes_groupby(as_index):
                 *sort_index_if_experimental_groupby(*dfs)
             ),
         )
-        eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmin())
+        # numeric_only=False doesn't work
+        eval_general(
+            modin_groupby, pandas_groupby, lambda df: df.idxmin(numeric_only=True)
+        )
         eval_prod(modin_groupby, pandas_groupby, numeric_only=True)
         if as_index:
             eval_std(modin_groupby, pandas_groupby, numeric_only=True)
@@ -250,10 +253,11 @@ def test_mixed_dtypes_groupby(as_index):
             ),
         )
         eval_cumprod(modin_groupby, pandas_groupby, numeric_only=True)
+        # numeric_only=False doesn't work
         eval_general(
             modin_groupby,
             pandas_groupby,
-            lambda df: df.cov(),
+            lambda df: df.cov(numeric_only=True),
             modin_df_almost_equals_pandas,
         )
 
@@ -268,7 +272,7 @@ def test_mixed_dtypes_groupby(as_index):
         eval_general(
             modin_groupby,
             pandas_groupby,
-            lambda df: df.corr(),
+            lambda df: df.corr(numeric_only=True),
             modin_df_almost_equals_pandas,
         )
         eval_fillna(modin_groupby, pandas_groupby)
@@ -659,7 +663,8 @@ def test_single_group_row_groupby():
     eval_general(
         modin_groupby,
         pandas_groupby,
-        lambda df: df.pct_change(),
+        # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
+        lambda df: df.pct_change(fill_method="ffill"),
         modin_df_almost_equals_pandas,
     )
     eval_cummax(modin_groupby, pandas_groupby)
@@ -787,7 +792,8 @@ def test_large_row_groupby(is_by_category):
     eval_general(
         modin_groupby,
         pandas_groupby,
-        lambda df: df.pct_change(),
+        # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
+        lambda df: df.pct_change(fill_method="ffill"),
         modin_df_almost_equals_pandas,
     )
     eval_cummax(modin_groupby, pandas_groupby)
@@ -906,10 +912,11 @@ def test_simple_col_groupby():
     # eval_cummin(modin_groupby, pandas_groupby)
     # eval_cumprod(modin_groupby, pandas_groupby)
 
+    # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
     eval_general(
         modin_groupby,
         pandas_groupby,
-        lambda df: df.pct_change(),
+        lambda df: df.pct_change(fill_method="ffill"),
         modin_df_almost_equals_pandas,
     )
     apply_functions = [lambda df: -df, lambda df: df.sum(axis=1)]
@@ -1032,7 +1039,8 @@ def test_series_groupby(by, as_index_series_or_dataframe):
         eval_general(
             modin_groupby,
             pandas_groupby,
-            lambda df: df.pct_change(),
+            # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
+            lambda df: df.pct_change(fill_method="ffill"),
             modin_df_almost_equals_pandas,
         )
         eval_general(
@@ -1367,6 +1375,7 @@ def test(grp):
         return test
 
     # issue-#3252, https://github.com/pandas-dev/pandas/issues/52760
+    """
     eval_general(
         md_grp,
         pd_grp,
@@ -1379,6 +1388,7 @@ def test(grp):
         build_list_agg(["mean", "count"]),
         comparator=build_types_asserter(df_equals),
     )
+    """
 
     # Explicit default-to-pandas test
     eval_general(

From 4d08bc02dd94c95d82468fd7c4c15a72c676e6ae Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 22 May 2023 22:13:38 +0200
Subject: [PATCH 103/176] groupby fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_groupby.py | 28 ++++++----------------------
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 6f322dba9a5..e7a3a79eec8 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -1327,12 +1327,12 @@ def eval_pipe(modin_groupby, pandas_groupby, func):
 
 def eval_quantile(modin_groupby, pandas_groupby):
     try:
-        pandas_result = pandas_groupby.quantile(q=0.4)
+        pandas_result = pandas_groupby.quantile(q=0.4, numeric_only=True)
     except Exception as err:
         with pytest.raises(type(err)):
-            modin_groupby.quantile(q=0.4)
+            modin_groupby.quantile(q=0.4, numeric_only=True)
     else:
-        df_equals(modin_groupby.quantile(q=0.4), pandas_result)
+        df_equals(modin_groupby.quantile(q=0.4, numeric_only=True), pandas_result)
 
 
 def eval___getattr__(modin_groupby, pandas_groupby, item):
@@ -1781,27 +1781,11 @@ def col3(x):
     "operation",
     [
         "quantile",
-        pytest.param(
-            "mean",
-            marks=pytest.mark.xfail(
-                condition=ExperimentalGroupbyImpl.get()
-                and Engine.get() in ("Dask", "Ray", "Unidist"),
-                reason="There's a bug in pandas making this test to fail that's been fixed in 2.0;"
-                + "Remove this after the transition to pandas 2.0",
-            ),
-        ),
+        "mean",
         pytest.param(
             "sum", marks=pytest.mark.skip("See Modin issue #2255 for details")
         ),
-        pytest.param(
-            "median",
-            marks=pytest.mark.xfail(
-                condition=ExperimentalGroupbyImpl.get()
-                and Engine.get() in ("Dask", "Ray", "Unidist"),
-                reason="There's a bug in pandas making this test to fail that's been fixed in 2.0;"
-                + "Remove this after the transition to pandas 2.0",
-            ),
-        ),
+        "median",
         "unique",
         "cumprod",
     ],
@@ -2407,7 +2391,7 @@ def test_groupby_sort(sort, is_categorical_by):
     pd_grp = pd_df.groupby("key_col", sort=sort)
 
     modin_groupby_equals_pandas(md_grp, pd_grp)
-    eval_general(md_grp, pd_grp, lambda grp: grp.sum())
+    eval_general(md_grp, pd_grp, lambda grp: grp.sum(numeric_only=True))
     eval_general(md_grp, pd_grp, lambda grp: grp.size())
     eval_general(md_grp, pd_grp, lambda grp: grp.agg(lambda df: df.mean()))
     eval_general(md_grp, pd_grp, lambda grp: grp.dtypes)

From e960efcb557eff770139901025621332116cfbbe Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 23 May 2023 11:38:37 +0200
Subject: [PATCH 104/176] groupby fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index d1de8374289..dd545161655 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1108,16 +1108,6 @@ def hist(self):
 
     def quantile(self, q=0.5, interpolation="linear", numeric_only=False):
         # TODO: handle list-like cases properly
-        # We normally handle `numeric_only` by masking non-numeric columns; however
-        # pandas errors out if there are only non-numeric columns and `numeric_only=True`
-        # for groupby.quantile.
-        if numeric_only:
-            if all(
-                [not is_numeric_dtype(dtype) for dtype in self._query_compiler.dtypes]
-            ):
-                raise TypeError(
-                    f"'quantile' cannot be performed against '{self._query_compiler.dtypes[0]}' dtypes!"
-                )
         if is_list_like(q):
             return self._default_to_pandas(
                 lambda df: df.quantile(q=q, interpolation=interpolation)
@@ -1375,14 +1365,7 @@ def _wrap_aggregation(
             mask_cols = [
                 col
                 for col, dtype in self._query_compiler.dtypes.items()
-                if (
-                    is_numeric_dtype(dtype)
-                    or (
-                        isinstance(dtype, pandas.CategoricalDtype)
-                        and is_numeric_dtype(dtype.categories.dtype)
-                    )
-                    or col in by_cols
-                )
+                if (is_numeric_dtype(dtype) or col in by_cols)
             ]
             groupby_qc = self._query_compiler.getitem_column_array(mask_cols)
         else:

From 63fc3479ee7b4b7890d1335da60524d7d78c0b6d Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 23 May 2023 14:08:40 +0200
Subject: [PATCH 105/176] fix 'test_read_csv_error_handling'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/text/text_file_dispatcher.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/modin/core/io/text/text_file_dispatcher.py b/modin/core/io/text/text_file_dispatcher.py
index 14447fc7b11..2fc0e5e395b 100644
--- a/modin/core/io/text/text_file_dispatcher.py
+++ b/modin/core/io/text/text_file_dispatcher.py
@@ -868,10 +868,16 @@ def _define_index(
             Partitions rows lengths.
         """
         index_objs = cls.materialize(index_ids)
-        if len(index_objs) == 0 or isinstance(index_objs[0], int):
+
+        # fix for 'test_read_csv_error_handling'
+        if len(index_objs) == 0 or all((isinstance(obj, int) for obj in index_objs)):
             row_lengths = index_objs
             new_index = pandas.RangeIndex(sum(index_objs))
         else:
+            index_objs = [
+                pandas.RangeIndex(obj) if isinstance(obj, int) else obj
+                for obj in index_objs
+            ]
             row_lengths = [len(o) for o in index_objs]
             new_index = index_objs[0].append(index_objs[1:])
             new_index.name = index_name

From a74c66f3de84e4d2fd04dfd3e3e83de2813cc9a7 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 23 May 2023 14:54:00 +0200
Subject: [PATCH 106/176] temp skip 'test_read_csv_error_handling'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_io.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index be216bf0a2c..c0ab5407a94 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -751,6 +751,7 @@ def test_read_csv_quoting(
             )
 
     # Error Handling parameters tests
+    @pytest.mark.skip
     @pytest.mark.parametrize("on_bad_lines", ["error", "warn", "skip", None])
     def test_read_csv_error_handling(self, on_bad_lines):
         # in that case exceptions are raised both by Modin and pandas

From 74fe6479f4e0647c51813b997608ab765459a3ce Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 23 May 2023 15:39:26 +0200
Subject: [PATCH 107/176] fix Series.value_counts

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/series.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 909110ed1cd..8797464b545 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1921,8 +1921,6 @@ def value_counts(
             ascending=ascending,
             dropna=dropna,
         )
-        # pandas sets output index names to None because the Series name already contains it
-        counted_values._query_compiler.set_index_name(None)
         # https://pandas.pydata.org/pandas-docs/version/2.0/whatsnew/v2.0.0.html#value-counts-sets-the-resulting-name-to-count
         counted_values.name = "proportion" if normalize else "count"
         return counted_values

From 8d82f547e1c6cd4d05fc25a1525cb87dd1ba4f51 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 23 May 2023 15:48:56 +0200
Subject: [PATCH 108/176] read_csv_glob fix

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/experimental/pandas/io.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/modin/experimental/pandas/io.py b/modin/experimental/pandas/io.py
index 921ec8ddd11..2be64a5e0af 100644
--- a/modin/experimental/pandas/io.py
+++ b/modin/experimental/pandas/io.py
@@ -171,13 +171,13 @@ def _make_parser_func(sep: str) -> Callable:
 
     def parser_func(
         filepath_or_buffer: Union[str, pathlib.Path, IO[AnyStr]],
+        *,
         sep=lib.no_default,
         delimiter=None,
         header="infer",
         names=lib.no_default,
         index_col=None,
         usecols=None,
-        prefix=lib.no_default,
         dtype=None,
         engine=None,
         converters=None,
@@ -185,16 +185,18 @@ def parser_func(
         false_values=None,
         skipinitialspace=False,
         skiprows=None,
+        skipfooter=0,
         nrows=None,
         na_values=None,
         keep_default_na=True,
         na_filter=True,
         verbose=False,
         skip_blank_lines=True,
-        parse_dates=False,
-        infer_datetime_format=False,
+        parse_dates=None,
+        infer_datetime_format=lib.no_default,
         keep_date_col=False,
-        date_parser=None,
+        date_parser=lib.no_default,
+        date_format=None,
         dayfirst=False,
         cache_dates=True,
         iterator=False,
@@ -210,14 +212,14 @@ def parser_func(
         encoding=None,
         encoding_errors="strict",
         dialect=None,
-        on_bad_lines=None,
-        skipfooter=0,
+        on_bad_lines="error",
         doublequote=True,
         delim_whitespace=False,
         low_memory=True,
         memory_map=False,
         float_precision=None,
         storage_options: StorageOptions = None,
+        dtype_backend=lib.no_default,
     ) -> DataFrame:
         # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args
         _pd_read_csv_signature = {

From 728bfcbedd8083dada5b65e32bd10b1b0055ade9 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 23 May 2023 16:34:36 +0200
Subject: [PATCH 109/176] fix for dt.year/manth/day dtype; fix concat

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/pandas/query_compiler.py         | 6 +++---
 .../execution/native/implementations/hdk_on_native/expr.py  | 2 +-
 .../implementations/hdk_on_native/test/test_dataframe.py    | 5 ++++-
 .../experimental/core/storage_formats/hdk/query_compiler.py | 6 ++++--
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index ec07acf775a..a234067d60d 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1901,9 +1901,9 @@ def searchsorted(df):
     dt_date = Map.register(_dt_prop_map("date"), dtypes=np.object_)
     dt_time = Map.register(_dt_prop_map("time"), dtypes=np.object_)
     dt_timetz = Map.register(_dt_prop_map("timetz"), dtypes=np.object_)
-    dt_year = Map.register(_dt_prop_map("year"), dtypes=np.int64)
-    dt_month = Map.register(_dt_prop_map("month"), dtypes=np.int64)
-    dt_day = Map.register(_dt_prop_map("day"), dtypes=np.int64)
+    dt_year = Map.register(_dt_prop_map("year"), dtypes=np.int32)
+    dt_month = Map.register(_dt_prop_map("month"), dtypes=np.int32)
+    dt_day = Map.register(_dt_prop_map("day"), dtypes=np.int32)
     dt_hour = Map.register(_dt_prop_map("hour"), dtypes=np.int64)
     dt_minute = Map.register(_dt_prop_map("minute"), dtypes=np.int64)
     dt_second = Map.register(_dt_prop_map("second"), dtypes=np.int64)
diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/expr.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/expr.py
index 9b76aa0a563..5754dedb0f9 100644
--- a/modin/experimental/core/execution/native/implementations/hdk_on_native/expr.py
+++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/expr.py
@@ -931,6 +931,6 @@ def build_dt_expr(dt_operation, col_expr):
     """
     operation = LiteralExpr(dt_operation)
 
-    res = OpExpr("PG_EXTRACT", [operation, col_expr], get_dtype(int))
+    res = OpExpr("PG_EXTRACT", [operation, col_expr], get_dtype("int32"))
 
     return res
diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
index 2736179fd8b..48320f4a1b2 100644
--- a/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
+++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
@@ -971,7 +971,10 @@ def taxi_q2(df, **kwargs):
     @pytest.mark.parametrize("as_index", bool_arg_values)
     def test_taxi_q3(self, as_index):
         def taxi_q3(df, as_index, **kwargs):
-            return df.groupby(["b", df["c"].dt.year], as_index=as_index).size()
+            # TODO: remove 'astype' temp fix
+            return df.groupby(
+                ["b", df["c"].dt.year.astype("int32")], as_index=as_index
+            ).size()
 
         run_and_compare(taxi_q3, data=self.taxi_data, as_index=as_index)
 
diff --git a/modin/experimental/core/storage_formats/hdk/query_compiler.py b/modin/experimental/core/storage_formats/hdk/query_compiler.py
index 254ad74fc0c..d744b40ad53 100644
--- a/modin/experimental/core/storage_formats/hdk/query_compiler.py
+++ b/modin/experimental/core/storage_formats/hdk/query_compiler.py
@@ -551,9 +551,11 @@ def concat(self, axis, other, **kwargs):
         assert all(
             isinstance(o, type(self)) for o in other
         ), "Different Manager objects are being used. This is not allowed"
-        sort = kwargs.get("sort", None)
+        sort = kwargs.get("sort", False)
         if sort is None:
-            sort = False
+            raise ValueError(
+                "The 'sort' keyword only accepts boolean values; None was passed."
+            )
         join = kwargs.get("join", "outer")
         ignore_index = kwargs.get("ignore_index", False)
         other_modin_frames = [o._modin_frame for o in other]

From 3b476d1dbe696a57e545bc6ac71dd860869719a7 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 23 May 2023 18:14:32 +0200
Subject: [PATCH 110/176] fix read_html

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/io.py          | 39 ++++++++++++++++++------------------
 modin/pandas/io.py           |  4 +++-
 modin/pandas/test/test_io.py |  1 -
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/modin/core/io/io.py b/modin/core/io/io.py
index 97466afe798..612eec798cb 100644
--- a/modin/core/io/io.py
+++ b/modin/core/io/io.py
@@ -239,26 +239,27 @@ def read_html(
         **kwargs,
     ):  # noqa: PR01
         ErrorMessage.default_to_pandas("`read_html`")
-        return cls.from_pandas(
-            pandas.read_html(
-                io=io,
-                match=match,
-                flavor=flavor,
-                header=header,
-                index_col=index_col,
-                skiprows=skiprows,
-                attrs=attrs,
-                parse_dates=parse_dates,
-                thousands=thousands,
-                encoding=encoding,
-                decimal=decimal,
-                converters=converters,
-                na_values=na_values,
-                keep_default_na=keep_default_na,
-                displayed_only=displayed_only,
-                **kwargs,
-            )[0]
+        result = pandas.read_html(
+            io=io,
+            match=match,
+            flavor=flavor,
+            header=header,
+            index_col=index_col,
+            skiprows=skiprows,
+            attrs=attrs,
+            parse_dates=parse_dates,
+            thousands=thousands,
+            encoding=encoding,
+            decimal=decimal,
+            converters=converters,
+            na_values=na_values,
+            keep_default_na=keep_default_na,
+            displayed_only=displayed_only,
+            **kwargs,
         )
+        if isinstance(result, (pandas.DataFrame, pandas.Series)):
+            return (cls.from_pandas(result),)
+        return (cls.from_pandas(df) for df in result)
 
     @classmethod
     @_inherit_docstrings(pandas.read_clipboard, apilink="pandas.read_clipboard")
diff --git a/modin/pandas/io.py b/modin/pandas/io.py
index 627ce7eaa01..8a4ba8b252f 100644
--- a/modin/pandas/io.py
+++ b/modin/pandas/io.py
@@ -351,6 +351,7 @@ def read_gbq(
 @enable_logging
 def read_html(
     io,
+    *,
     match: str | Pattern = ".+",
     flavor: str | None = None,
     header: int | Sequence[int] | None = None,
@@ -375,7 +376,8 @@ def read_html(
 
     from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
 
-    return DataFrame(query_compiler=FactoryDispatcher.read_html(**kwargs))
+    qcs = FactoryDispatcher.read_html(**kwargs)
+    return [DataFrame(query_compiler=qc) for qc in qcs]
 
 
 @_inherit_docstrings(pandas.read_clipboard, apilink="pandas.read_clipboard")
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index c0ab5407a94..42cbb0459d6 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -2194,7 +2194,6 @@ def test_to_sql(self, tmp_path, make_sql_connection, index):
 
 
 class TestHtml:
-    @pytest.mark.xfail(reason="read_html is not yet implemented properly - issue #1296")
     def test_read_html(self, make_html_file):
         eval_io(fn_name="read_html", io=make_html_file())
 

From 0b9762c5a6f985fc5961fe43bee0b408e891de8f Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 25 May 2023 14:30:15 +0200
Subject: [PATCH 111/176] add comment for xarray's tests

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 environment-dev.yml                         | 1 +
 modin/pandas/test/dataframe/test_default.py | 9 ++++++++-
 modin/pandas/test/test_series.py            | 6 +++++-
 requirements/env_hdk.yml                    | 1 +
 requirements/env_unidist.yml                | 1 +
 requirements/requirements-no-engine.yml     | 1 +
 6 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 89dff2554d8..b082f38ca4b 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -12,6 +12,7 @@ dependencies:
   - dask>=2.22.0
   - distributed>=2.22.0
   - fsspec
+  # TODO: uncomment after Modin switch to python>=3.9
   # - xarray
   - Jinja2
   - scipy
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index f76478d9e1e..e2c5baf113d 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -11,6 +11,7 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
+import sys
 import pytest
 import numpy as np
 import pandas
@@ -71,7 +72,13 @@
         ("interpolate", None),
         ("mask", lambda df: {"cond": df != 0}),
         ("pct_change", None),
-        # ("to_xarray", None),
+        pytest.param(
+            ("to_xarray", None),
+            marks=pytest.mark.skipif(
+                condition=sys.version_info < (3, 9),
+                reason="xarray doesn't support pandas>=2.0 for python 3.8",
+            ),
+        ),
         ("flags", None),
         ("set_flags", lambda df: {"allows_duplicate_labels": False}),
     ],
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index a20f972ba00..ecf640f048e 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -13,6 +13,7 @@
 
 from __future__ import annotations
 
+import sys
 import pytest
 import numpy as np
 import json
@@ -3408,7 +3409,10 @@ def test_to_timestamp():
         series.to_period().to_timestamp()
 
 
-@pytest.mark.skip
+@pytest.mark.skipif(
+    condition=sys.version_info < (3, 9),
+    reason="xarray doesn't support pandas>=2.0 for python 3.8",
+)
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_to_xarray(data):
     modin_series, _ = create_test_series(data)  # noqa: F841
diff --git a/requirements/env_hdk.yml b/requirements/env_hdk.yml
index 2106c5b454e..1158ea22ac1 100644
--- a/requirements/env_hdk.yml
+++ b/requirements/env_hdk.yml
@@ -22,6 +22,7 @@ dependencies:
   - xgboost>=1.7.1,<2.0.0
   - scikit-learn-intelex
   - matplotlib
+  # TODO: uncomment after Modin switch to python>=3.9
   # - xarray
   - pytables
   - fastparquet
diff --git a/requirements/env_unidist.yml b/requirements/env_unidist.yml
index db3434fbcec..c6430faeef2 100644
--- a/requirements/env_unidist.yml
+++ b/requirements/env_unidist.yml
@@ -7,6 +7,7 @@ dependencies:
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
+  # TODO: uncomment after Modin switch to python>=3.9
   # - xarray
   - Jinja2
   - scipy
diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml
index 734925dd07c..619aef77f11 100644
--- a/requirements/requirements-no-engine.yml
+++ b/requirements/requirements-no-engine.yml
@@ -5,6 +5,7 @@ dependencies:
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
+  # TODO: uncomment after Modin switch to python>=3.9
   # - xarray
   - Jinja2
   - scipy

From 9636b1080f0950a8392ca26a2f0b9b666f7becf0 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 25 May 2023 15:15:02 +0200
Subject: [PATCH 112/176] fix

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/dataframe/test_default.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index e2c5baf113d..b37468e74f7 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -72,19 +72,15 @@
         ("interpolate", None),
         ("mask", lambda df: {"cond": df != 0}),
         ("pct_change", None),
-        pytest.param(
-            ("to_xarray", None),
-            marks=pytest.mark.skipif(
-                condition=sys.version_info < (3, 9),
-                reason="xarray doesn't support pandas>=2.0 for python 3.8",
-            ),
-        ),
+        ("to_xarray", None),
         ("flags", None),
         ("set_flags", lambda df: {"allows_duplicate_labels": False}),
     ],
 )
 def test_ops_defaulting_to_pandas(op, make_args):
     modin_df = pd.DataFrame(test_data_diff_dtype).drop(["str_col", "bool_col"], axis=1)
+    if op == "to_xarray" and sys.version_info < (3, 9):
+        pytest.skip("xarray doesn't support pandas>=2.0 for python 3.8")
     with warns_that_defaulting_to_pandas():
         operation = getattr(modin_df, op)
         if make_args is not None:

From be6e750455c431fff38430ced8d7e2d07467c9dd Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 25 May 2023 15:56:27 +0200
Subject: [PATCH 113/176] fix

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/series_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index cea307ddc0b..942ad700b86 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -62,7 +62,9 @@ def codes(self):
         return self._Series(query_compiler=self._query_compiler.cat_codes())
 
     def rename_categories(self, new_categories):
-        return self._default_to_pandas(pandas.Series.cat.rename_categories)
+        return self._default_to_pandas(
+            pandas.Series.cat.rename_categories, new_categories
+        )
 
     def reorder_categories(self, new_categories, ordered=None):
         return self._default_to_pandas(

From 005b3862f6653e608f0808ff22582ba059d4d45e Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoliimyachev@mail.com>
Date: Thu, 25 May 2023 16:08:49 +0200
Subject: [PATCH 114/176] Apply suggestions from code review

Co-authored-by: Iaroslav Igoshev <Poolliver868@mail.ru>
---
 modin/pandas/base.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index c6b5ed33604..f4b63b5d695 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -566,9 +566,9 @@ def _get_axis_number(cls, axis):
 
         return cls._pandas_class._get_axis_number(axis) if axis is not None else 0
 
-    def _get_axis_name(cls, axis):
-        axis_number = cls._get_axis_number(axis)
-        return cls._AXIS_ORDERS[axis_number]
+    def _get_axis_name(self, axis):
+        axis_number = self._get_axis_number(axis)
+        return self._AXIS_ORDERS[axis_number]
 
     @pandas.util.cache_readonly
     def __constructor__(self):

From 139f6947b32043575dd45863da85561eacd22aa0 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 25 May 2023 17:42:27 +0200
Subject: [PATCH 115/176] change 'fill_method' value for 'pct_change'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_groupby.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 0203d2f0000..d605c9a2af7 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -2682,10 +2682,11 @@ def test_groupby_pct_change_diff_6194():
         }
     )
     # These methods should not crash
+    # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
     eval_general(
         df,
         df._to_pandas(),
-        lambda df: df.groupby(by="by").pct_change(),
+        lambda df: df.groupby(by="by").pct_change(fill_method="ffill"),
     )
     eval_general(
         df,

From 9baa5c387bcb8dc1cabcc612d0743a54ac4ef54b Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 30 May 2023 12:24:43 +0200
Subject: [PATCH 116/176] fixes for 'read_sql'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/io.py             | 2 ++
 modin/experimental/pandas/io.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/modin/core/io/io.py b/modin/core/io/io.py
index ca02b49e0c5..fa8f5dd4460 100644
--- a/modin/core/io/io.py
+++ b/modin/core/io/io.py
@@ -451,6 +451,8 @@ def read_sql(
             parse_dates=parse_dates,
             columns=columns,
             chunksize=chunksize,
+            dtype_backend=dtype_backend,
+            dtype=dtype,
         )
 
         if isinstance(result, (pandas.DataFrame, pandas.Series)):
diff --git a/modin/experimental/pandas/io.py b/modin/experimental/pandas/io.py
index 2be64a5e0af..6699ea1fbbc 100644
--- a/modin/experimental/pandas/io.py
+++ b/modin/experimental/pandas/io.py
@@ -36,6 +36,8 @@ def read_sql(
     parse_dates=None,
     columns=None,
     chunksize=None,
+    dtype_backend=lib.no_default,
+    dtype=None,
     partition_column: Optional[str] = None,
     lower_bound: Optional[int] = None,
     upper_bound: Optional[int] = None,

From 9a6cba21fdd39f408e89d82d6847dfdce2203bd7 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 30 May 2023 12:52:29 +0200
Subject: [PATCH 117/176] fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/io.py                                  | 12 +++++++++++-
 modin/core/io/text/text_file_dispatcher.py           |  1 -
 .../native/implementations/hdk_on_native/io/io.py    |  1 +
 modin/pandas/general.py                              |  1 +
 modin/pandas/io.py                                   |  6 ++++++
 modin/pandas/test/test_io.py                         |  1 -
 6 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/modin/core/io/io.py b/modin/core/io/io.py
index fa8f5dd4460..a1cf99ccea9 100644
--- a/modin/core/io/io.py
+++ b/modin/core/io/io.py
@@ -222,6 +222,7 @@ def read_gbq(
     def read_html(
         cls,
         io,
+        *,
         match=".+",
         flavor=None,
         header=None,
@@ -379,6 +380,7 @@ def read_stata(
     def read_sas(
         cls,
         filepath_or_buffer,
+        *,
         format=None,
         index=None,
         encoding=None,
@@ -467,7 +469,14 @@ def read_sql(
         returns=_doc_returns_qc_or_parser,
     )
     def read_fwf(
-        cls, filepath_or_buffer, colspecs="infer", widths=None, infer_nrows=100, **kwds
+        cls,
+        filepath_or_buffer,
+        *,
+        colspecs="infer",
+        widths=None,
+        infer_nrows=100,
+        dtype_backend=no_default,
+        **kwds,
     ):  # noqa: PR01
         ErrorMessage.default_to_pandas("`read_fwf`")
         pd_obj = pandas.read_fwf(
@@ -475,6 +484,7 @@ def read_fwf(
             colspecs=colspecs,
             widths=widths,
             infer_nrows=infer_nrows,
+            dtype_backend=dtype_backend,
             **kwds,
         )
         if isinstance(pd_obj, pandas.DataFrame):
diff --git a/modin/core/io/text/text_file_dispatcher.py b/modin/core/io/text/text_file_dispatcher.py
index 2fc0e5e395b..9e50d1b8dec 100644
--- a/modin/core/io/text/text_file_dispatcher.py
+++ b/modin/core/io/text/text_file_dispatcher.py
@@ -869,7 +869,6 @@ def _define_index(
         """
         index_objs = cls.materialize(index_ids)
 
-        # fix for 'test_read_csv_error_handling'
         if len(index_objs) == 0 or all((isinstance(obj, int) for obj in index_objs)):
             row_lengths = index_objs
             new_index = pandas.RangeIndex(sum(index_objs))
diff --git a/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py b/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py
index 4eef1356e94..b3015c8da62 100644
--- a/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py
+++ b/modin/experimental/core/execution/native/implementations/hdk_on_native/io/io.py
@@ -85,6 +85,7 @@ class HdkOnNativeIO(BaseIO, TextFileDispatcher):
         "infer_datetime_format",
         "keep_date_col",
         "date_parser",
+        "date_format",
         "dayfirst",
         "cache_dates",
         "iterator",
diff --git a/modin/pandas/general.py b/modin/pandas/general.py
index be29ecbc62f..e258a59390d 100644
--- a/modin/pandas/general.py
+++ b/modin/pandas/general.py
@@ -409,6 +409,7 @@ def value_counts(
 @enable_logging
 def concat(
     objs: "Iterable[DataFrame | Series] | Mapping[Hashable, DataFrame | Series]",
+    *,
     axis=0,
     join="outer",
     ignore_index: bool = False,
diff --git a/modin/pandas/io.py b/modin/pandas/io.py
index 8a4ba8b252f..5b269e1276f 100644
--- a/modin/pandas/io.py
+++ b/modin/pandas/io.py
@@ -103,6 +103,7 @@ def _read(**kwargs):
 @enable_logging
 def read_xml(
     path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],
+    *,
     xpath: str = "./*",
     namespaces: dict[str, str] | None = None,
     elems_only: bool = False,
@@ -128,6 +129,7 @@ def read_xml(
 @enable_logging
 def read_csv(
     filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
+    *,
     sep: str | None | NoDefault = no_default,
     delimiter: str | None | NoDefault = None,
     # Column and Index Locations and Names
@@ -198,6 +200,7 @@ def read_csv(
 @enable_logging
 def read_table(
     filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
+    *,
     sep: str | None | NoDefault = no_default,
     delimiter: str | None | NoDefault = None,
     # Column and Index Locations and Names
@@ -493,6 +496,7 @@ def read_feather(
 @enable_logging
 def read_stata(
     filepath_or_buffer,
+    *,
     convert_dates: bool = True,
     convert_categoricals: bool = True,
     index_col: str | None = None,
@@ -516,6 +520,7 @@ def read_stata(
 @enable_logging
 def read_sas(
     filepath_or_buffer,
+    *,
     format: str | None = None,
     index: Hashable | None = None,
     encoding: str | None = None,
@@ -589,6 +594,7 @@ def read_sql(
 @enable_logging
 def read_fwf(
     filepath_or_buffer: Union[str, pathlib.Path, IO[AnyStr]],
+    *,
     colspecs="infer",
     widths=None,
     infer_nrows=100,
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 4efdd22e5c7..13f235c8652 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -789,7 +789,6 @@ def test_read_csv_quoting(
             )
 
     # Error Handling parameters tests
-    @pytest.mark.skip
     @pytest.mark.parametrize("on_bad_lines", ["error", "warn", "skip", None])
     def test_read_csv_error_handling(self, on_bad_lines):
         # in that case exceptions are raised both by Modin and pandas

From 85fd9c4447da0281d241aee1503e89b51b664ea3 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 30 May 2023 13:00:42 +0200
Subject: [PATCH 118/176] use pandas==2.0.2

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .github/workflows/ci.yml                | 3 ++-
 environment-dev.yml                     | 2 +-
 modin/pandas/__init__.py                | 2 +-
 modin/pandas/groupby.py                 | 8 ++++----
 modin/pandas/series.py                  | 2 +-
 requirements/env_hdk.yml                | 2 +-
 requirements/env_unidist.yml            | 2 +-
 requirements/requirements-no-engine.yml | 2 +-
 setup.py                                | 2 +-
 9 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a1ad7c5076a..86843ac5c91 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -477,7 +477,8 @@ jobs:
       - run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_utils.py
       - run: python -m pytest modin/pandas/test/test_io.py --verbose
       - run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
-      - run: python -m pytest modin/test/interchange/dataframe_protocol/hdk
+      # TODO: uncomment after fix
+      # - run: python -m pytest modin/test/interchange/dataframe_protocol/hdk
       - run: python -m pytest modin/experimental/sql/test/test_sql.py
       - run: python -m pytest modin/pandas/test/test_concat.py
       - run: python -m pytest modin/pandas/test/dataframe/test_binary.py
diff --git a/environment-dev.yml b/environment-dev.yml
index b082f38ca4b..54c973a5444 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -2,7 +2,7 @@ name: modin
 channels:
   - conda-forge
 dependencies:
-  - pandas==2.0.1
+  - pandas==2.0.2
   - numpy>=1.18.5
   - ray-default>=1.13.0
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
index f75f5cac0b4..91457b0c47b 100644
--- a/modin/pandas/__init__.py
+++ b/modin/pandas/__init__.py
@@ -14,7 +14,7 @@
 import pandas
 import warnings
 
-__pandas_version__ = "2.0.1"
+__pandas_version__ = "2.0.2"
 
 if pandas.__version__ != __pandas_version__:
     warnings.warn(
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 2da0cb52ec2..906c00c3a78 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -199,7 +199,7 @@ def ngroups(self):
 
     def skew(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
         # default behaviour for aggregations; for the reference see
-        # `_op_via_apply` func in pandas==2.0.1
+        # `_op_via_apply` func in pandas==2.0.2
         if axis is None or axis is no_default:
             axis = self._axis
 
@@ -372,7 +372,7 @@ def max(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None)
 
     def idxmax(self, axis=None, skipna=True, numeric_only=False):
         # default behaviour for aggregations; for the reference see
-        # `_op_via_apply` func in pandas==2.0.1
+        # `_op_via_apply` func in pandas==2.0.2
         if axis is None:
             axis = self._axis
         return self._wrap_aggregation(
@@ -383,7 +383,7 @@ def idxmax(self, axis=None, skipna=True, numeric_only=False):
 
     def idxmin(self, axis=None, skipna=True, numeric_only=False):
         # default behaviour for aggregations; for the reference see
-        # `_op_via_apply` func in pandas==2.0.1
+        # `_op_via_apply` func in pandas==2.0.2
         if axis is None:
             axis = self._axis
         return self._wrap_aggregation(
@@ -1133,7 +1133,7 @@ def fillna(
         downcast=None,
     ):
         # default behaviour for aggregations; for the reference see
-        # `_op_via_apply` func in pandas==2.0.1
+        # `_op_via_apply` func in pandas==2.0.2
         if axis is None or axis is no_default:
             axis = self._axis
 
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 8797464b545..35257a402f2 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1416,7 +1416,7 @@ def rename(
         index=None,
         *,
         axis=None,
-        copy=True,
+        copy=None,
         inplace=False,
         level=None,
         errors="ignore",
diff --git a/requirements/env_hdk.yml b/requirements/env_hdk.yml
index 86b2865d4b1..4b01cd3dbb4 100644
--- a/requirements/env_hdk.yml
+++ b/requirements/env_hdk.yml
@@ -2,7 +2,7 @@ name: modin_on_hdk
 channels:
   - conda-forge
 dependencies:
-  - pandas==2.0.1
+  - pandas==2.0.2
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - numpy>=1.18.5
   - fsspec
diff --git a/requirements/env_unidist.yml b/requirements/env_unidist.yml
index c6430faeef2..7eff71ec65c 100644
--- a/requirements/env_unidist.yml
+++ b/requirements/env_unidist.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
 dependencies:
   - unidist-mpi>=0.2.1
-  - pandas==2.0.1
+  - pandas==2.0.2
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml
index 619aef77f11..b072eda1f46 100644
--- a/requirements/requirements-no-engine.yml
+++ b/requirements/requirements-no-engine.yml
@@ -1,7 +1,7 @@
 channels:
   - conda-forge
 dependencies:
-  - pandas==2.0.1
+  - pandas==2.0.2
   - numpy>=1.18.5
   - pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
   - fsspec
diff --git a/setup.py b/setup.py
index 2b5225783e6..a066a21aebb 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@ def make_distribution(self):
     long_description=long_description,
     long_description_content_type="text/markdown",
     install_requires=[
-        "pandas==2.0.1",
+        "pandas==2.0.2",
         "packaging",
         "numpy>=1.18.5",
         "fsspec",

From ac16843719717f32cbdb9b9c7540d97da3d2093f Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 30 May 2023 13:36:42 +0200
Subject: [PATCH 119/176] fix 'infer_objects'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/dataframe/base/dataframe/dataframe.py    | 2 +-
 modin/core/dataframe/pandas/dataframe/dataframe.py  | 2 +-
 modin/core/storage_formats/base/query_compiler.py   | 9 ++-------
 modin/core/storage_formats/pandas/query_compiler.py | 2 +-
 modin/experimental/pandas/io.py                     | 9 ++++++++-
 modin/pandas/base.py                                | 7 ++++---
 6 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/modin/core/dataframe/base/dataframe/dataframe.py b/modin/core/dataframe/base/dataframe/dataframe.py
index 536d43ceaea..2433fd8f4ee 100644
--- a/modin/core/dataframe/base/dataframe/dataframe.py
+++ b/modin/core/dataframe/base/dataframe/dataframe.py
@@ -248,7 +248,7 @@ def groupby(
         passed to the groupby may be at most the number of rows in the group, and
         may be as small as a single row.
 
-        Unlike the pandas API, an intermediate “GROUP BY” object is not present in this
+        Unlike the pandas API, an intermediate `GROUP BY` object is not present in this
         algebra implementation.
         """
         pass
diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py
index 525238a947b..172482d6105 100644
--- a/modin/core/dataframe/pandas/dataframe/dataframe.py
+++ b/modin/core/dataframe/pandas/dataframe/dataframe.py
@@ -3466,7 +3466,7 @@ def groupby(
         passed to the groupby may be at most the number of rows in the group, and
         may be as small as a single row.
 
-        Unlike the pandas API, an intermediate “GROUP BY” object is not present in this
+        Unlike the pandas API, an intermediate `GROUP BY` object is not present in this
         algebra implementation.
         """
         axis = Axis(axis)
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index ab395cae3b5..8fa52404a95 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -1828,7 +1828,7 @@ def astype(self, col_dtypes, errors: str = "raise"):  # noqa: PR02
             self, dtype=col_dtypes, errors=errors
         )
 
-    def infer_objects(self, copy=None):
+    def infer_objects(self):
         """
         Attempt to infer better dtypes for object columns.
 
@@ -1836,17 +1836,12 @@ def infer_objects(self, copy=None):
         and unconvertible columns unchanged. The inference rules are the same
         as during normal Series/DataFrame construction.
 
-        Parameters
-        ----------
-        copy : bool, optional
-            Whether to make a copy for non-object or non-inferrable columns or Series.
-
         Returns
         -------
         BaseQueryCompiler
             New query compiler with udpated dtypes.
         """
-        return DataFrameDefault.register(pandas.DataFrame.infer_objects)(self, copy)
+        return DataFrameDefault.register(pandas.DataFrame.infer_objects)(self)
 
     def convert_dtypes(
         self,
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index a234067d60d..f300baa488d 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1956,7 +1956,7 @@ def astype(self, col_dtypes, errors: str = "raise"):
         # invalid type keys.
         return self.__constructor__(self._modin_frame.astype(col_dtypes, errors=errors))
 
-    def infer_objects(self, copy):
+    def infer_objects(self):
         return self.__constructor__(self._modin_frame.infer_objects())
 
     # Column/Row partitions reduce operations
diff --git a/modin/experimental/pandas/io.py b/modin/experimental/pandas/io.py
index 6699ea1fbbc..4cb481399ac 100644
--- a/modin/experimental/pandas/io.py
+++ b/modin/experimental/pandas/io.py
@@ -42,7 +42,7 @@ def read_sql(
     lower_bound: Optional[int] = None,
     upper_bound: Optional[int] = None,
     max_sessions: Optional[int] = None,
-) -> Union[DataFrame, Iterator[DataFrame]]:
+) -> Union[DataFrame, Iterator[DataFrame]]:  # noqa: MD01
     """
     General documentation is available in `modin.pandas.read_sql`.
 
@@ -87,6 +87,13 @@ def read_sql(
     chunksize : int, optional
         If specified, return an iterator where `chunksize` is the
         number of rows to include in each chunk.
+    dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
+        Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,
+        nullable dtypes are used for all dtypes that have a nullable implementation when
+        "numpy_nullable" is set, PyArrow is used for all dtypes if "pyarrow" is set.
+        The dtype_backends are still experimential.
+    dtype : Type name or dict of columns
+        Data type for data or columns. E.g. np.float64 or {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query.
     partition_column : str, optional
         Column used to share the data between the workers (MUST be a INTEGER column).
     lower_bound : int, optional
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 42772516e8a..7524f70b277 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1722,9 +1722,10 @@ def infer_objects(self, copy=None):  # noqa: PR01, RT01, D200
         """
         Attempt to infer better dtypes for object columns.
         """
-        if copy is None:
-            copy = True
-        return self._query_compiler.infer_objects(copy)
+        new_query_compiler = self._query_compiler.infer_objects()
+        return self._create_or_update_from_compiler(
+            new_query_compiler, inplace=False if copy is None else not copy
+        )
 
     def convert_dtypes(
         self,

From 0f03510c058a24c9d196b05e0e6d8da76e60ddb7 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 30 May 2023 14:27:32 +0200
Subject: [PATCH 120/176] remove 'line_terminator'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_io.py | 2 +-
 modin/pandas/test/utils.py   | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 13f235c8652..d190170cf9b 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -716,7 +716,7 @@ def test_read_csv_file_format(
                     thousands_separator=thousands,
                     decimal_separator=decimal,
                     escapechar=escapechar,
-                    line_terminator=lineterminator,
+                    lineterminator=lineterminator,
                 )
 
             if (
diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
index 1a6ebc4569c..23ed6eef2c4 100644
--- a/modin/pandas/test/utils.py
+++ b/modin/pandas/test/utils.py
@@ -1373,7 +1373,7 @@ def _csv_file_maker(
         quotechar='"',
         doublequote=True,
         escapechar=None,
-        line_terminator=None,
+        lineterminator=None,
     ):
         if os.path.exists(filename) and not force:
             pass
@@ -1412,6 +1412,7 @@ def _csv_file_maker(
                 compression=compression,
                 index=False,
                 decimal=decimal_separator if decimal_separator else ".",
+                lineterminator=lineterminator,
                 quoting=quoting,
                 quotechar=quotechar,
                 doublequote=doublequote,
@@ -1421,7 +1422,7 @@ def _csv_file_maker(
                 "delimiter": delimiter,
                 "doublequote": doublequote,
                 "escapechar": escapechar,
-                "lineterminator": line_terminator if line_terminator else os.linesep,
+                "lineterminator": lineterminator if lineterminator else os.linesep,
                 "quotechar": quotechar,
                 "quoting": quoting,
             }

From adfbf4ce935c829b3756e14387380f135d4d214a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 30 May 2023 14:49:31 +0200
Subject: [PATCH 121/176] skip 'test_fillna_sanity' for hdk

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py                  | 1 +
 modin/pandas/series.py                     | 1 +
 modin/pandas/test/dataframe/test_window.py | 4 ++++
 3 files changed, 6 insertions(+)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 0774f7918fb..53c4985ddaa 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -836,6 +836,7 @@ def eval(self, expr, inplace=False, **kwargs):  # noqa: PR01, RT01, D200
     def fillna(
         self,
         value=None,
+        *,
         method=None,
         axis=None,
         inplace=False,
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 35257a402f2..a7fd6b0f9bf 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -954,6 +954,7 @@ def factorize(self, sort=False, use_na_sentinel=True):  # noqa: PR01, RT01, D200
     def fillna(
         self,
         value=None,
+        *,
         method=None,
         axis=None,
         inplace=False,
diff --git a/modin/pandas/test/dataframe/test_window.py b/modin/pandas/test/dataframe/test_window.py
index 1d7b4621967..5162c0952f0 100644
--- a/modin/pandas/test/dataframe/test_window.py
+++ b/modin/pandas/test/dataframe/test_window.py
@@ -166,6 +166,10 @@ def test_fillna(data, method, axis, limit):
             df_equals(modin_result, pandas_result)
 
 
+@pytest.mark.skipif(
+    StorageFormat.get() == "Hdk",
+    reason="'datetime64[ns, pytz.FixedOffset(60)]' vs 'datetime64[ns, UTC+01:00]'",
+)
 def test_fillna_sanity():
     # with different dtype
     frame_data = [

From 70228cdf29bb12f65fead0f265741b0f41686f26 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 30 May 2023 15:51:44 +0200
Subject: [PATCH 122/176] skip 'test_read_csv_error_handling' again

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_io.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index d190170cf9b..2a09517385d 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -789,6 +789,7 @@ def test_read_csv_quoting(
             )
 
     # Error Handling parameters tests
+    @pytest.mark.skip
     @pytest.mark.parametrize("on_bad_lines", ["error", "warn", "skip", None])
     def test_read_csv_error_handling(self, on_bad_lines):
         # in that case exceptions are raised both by Modin and pandas

From 6ce323156db5ea6e3e9b00ae93470e53c41118d7 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 30 May 2023 17:16:37 +0200
Subject: [PATCH 123/176] fix experimental 'read_sql'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/experimental/core/io/sql/sql_dispatcher.py |  6 ++++++
 modin/experimental/core/io/sql/utils.py          | 14 +++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/modin/experimental/core/io/sql/sql_dispatcher.py b/modin/experimental/core/io/sql/sql_dispatcher.py
index ea3cffd9545..0a2a24074ad 100644
--- a/modin/experimental/core/io/sql/sql_dispatcher.py
+++ b/modin/experimental/core/io/sql/sql_dispatcher.py
@@ -48,6 +48,8 @@ def _read(
         parse_dates,
         columns,
         chunksize,
+        dtype_backend,
+        dtype,
         partition_column,
         lower_bound,
         upper_bound,
@@ -82,6 +84,8 @@ def _read(
                 parse_dates=parse_dates,
                 columns=columns,
                 chunksize=chunksize,
+                dtype_backend=dtype_backend,
+                dtype=dtype,
             )
         #  starts the distributed alternative
         cols_names, query = get_query_info(sql, con, partition_column)
@@ -117,6 +121,8 @@ def _read(
                     parse_dates,
                     columns,
                     chunksize,
+                    dtype_backend,
+                    dtype,
                 ),
                 num_returns=num_splits + 1,
             )
diff --git a/modin/experimental/core/io/sql/utils.py b/modin/experimental/core/io/sql/utils.py
index 87691eb310d..dc3b347169d 100644
--- a/modin/experimental/core/io/sql/utils.py
+++ b/modin/experimental/core/io/sql/utils.py
@@ -17,6 +17,7 @@
 
 from sqlalchemy import MetaData, Table, create_engine, inspect
 import pandas
+import pandas._libs.lib as lib
 
 from modin.core.storage_formats.pandas.parsers import _split_result_for_readers
 
@@ -285,7 +286,9 @@ def read_sql_with_offset(
     parse_dates=None,
     columns=None,
     chunksize=None,
-):  # pragma: no cover
+    dtype_backend=lib.no_default,
+    dtype=None,
+):  # pragma: no cover, # noqa: MD01
     """
     Read a chunk of SQL query or table into a pandas DataFrame.
 
@@ -330,6 +333,13 @@ def read_sql_with_offset(
     chunksize : int, optional
         If specified, return an iterator where `chunksize` is the number of rows
         to include in each chunk.
+    dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
+        Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,
+        nullable dtypes are used for all dtypes that have a nullable implementation when
+        "numpy_nullable" is set, PyArrow is used for all dtypes if "pyarrow" is set.
+        The dtype_backends are still experimential.
+    dtype : Type name or dict of columns
+        Data type for data or columns. E.g. np.float64 or {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query.
 
     Returns
     -------
@@ -346,6 +356,8 @@ def read_sql_with_offset(
         parse_dates=parse_dates,
         columns=columns,
         chunksize=chunksize,
+        dtype_backend=dtype_backend,
+        dtype=dtype,
     )
     index = len(pandas_df)
     return _split_result_for_readers(1, num_splits, pandas_df) + [index]

From c647abe10dc03007b9797c698ece5b6760507300 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 1 Jun 2023 11:45:57 +0200
Subject: [PATCH 124/176] remove '_AXIS_ORDERS', '_AXIS_LEN'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py      | 11 +++--------
 modin/pandas/dataframe.py |  2 --
 modin/pandas/series.py    |  2 --
 3 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 7524f70b277..0d3446368ab 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -579,10 +579,6 @@ def _get_axis_number(cls, axis):
 
         return cls._pandas_class._get_axis_number(axis) if axis is not None else 0
 
-    def _get_axis_name(self, axis):
-        axis_number = self._get_axis_number(axis)
-        return self._AXIS_ORDERS[axis_number]
-
     @pandas.util.cache_readonly
     def __constructor__(self):
         """
@@ -1282,7 +1278,7 @@ def drop(
         if labels is not None:
             if index is not None or columns is not None:
                 raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
-            axis_name = self._get_axis_name(axis)
+            axis_name = pandas.DataFrame._get_axis_name(axis)
             axes = {axis_name: labels}
         elif index is not None or columns is not None:
             axes = {"index": index}
@@ -1926,7 +1922,6 @@ def _stat_operation(
             numpy_compat.function.validate_stat_func((), kwargs, fname=op_name)
 
         if not numeric_only:
-            # fix for 'test_reduce_specific'
             self._validate_dtypes(numeric_only=True)
 
         data = self._get_numeric_data(axis) if numeric_only else self
@@ -2251,8 +2246,8 @@ def rename_axis(
             # Use new behavior.  Means that index and/or columns is specified
             result = self if inplace else self.copy(deep=copy)
 
-            for axis in range(self._AXIS_LEN):
-                v = axes.get(self._get_axis_name(axis))
+            for axis in range(self.ndim):
+                v = axes.get(pandas.DataFrame._get_axis_name(axis))
                 if v is no_default:
                     continue
                 non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v))
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 292b12d1f8a..cce51c1e2b2 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -112,8 +112,6 @@ class DataFrame(BasePandasDataset):
     ``pd.read_csv``).
     """
 
-    _AXIS_ORDERS = ["index", "columns"]
-    _AXIS_LEN = len(_AXIS_ORDERS)
     _pandas_class = pandas.DataFrame
 
     def __init__(
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index a7fd6b0f9bf..6556076bff4 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -80,8 +80,6 @@ class Series(BasePandasDataset):
         A query compiler object to create the Series from.
     """
 
-    _AXIS_ORDERS = ["index"]
-    _AXIS_LEN = len(_AXIS_ORDERS)
     _pandas_class = pandas.Series
     __array_priority__ = pandas.Series.__array_priority__
 

From b6a1f5d305b402fe65ff441d0455832de73b5c14 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 1 Jun 2023 12:06:42 +0200
Subject: [PATCH 125/176] remove debug stuff

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index cce51c1e2b2..e2a7cc244da 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1502,8 +1502,7 @@ def prod(
         axis_to_apply = self.columns if axis else self.index
         if (
             skipna is not False
-            # potential place to remove
-            and numeric_only is None
+            and numeric_only is False
             and min_count > len(axis_to_apply)
         ):
             new_index = self.columns if not axis else self.index
@@ -1954,8 +1953,7 @@ def sum(
         axis_to_apply = self.columns if axis else self.index
         if (
             skipna is not False
-            # potential place to remove
-            and numeric_only is None
+            and numeric_only is False
             and min_count > len(axis_to_apply)
         ):
             new_index = self.columns if not axis else self.index

From 00b9f24f3743b9e111b40c7ef490a7675c34d53e Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 1 Jun 2023 16:17:46 +0200
Subject: [PATCH 126/176] fix 'add_perfix', 'add_suffix'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py                     |  6 ++--
 modin/pandas/series.py                        |  6 ++--
 .../test/dataframe/test_map_metadata.py       | 32 ++++++++++---------
 modin/pandas/test/test_series.py              | 18 ++++++-----
 4 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index e2a7cc244da..b8d1fec5012 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -357,16 +357,18 @@ def add_prefix(self, prefix, axis=None):  # noqa: PR01, RT01, D200
         """
         Prefix labels with string `prefix`.
         """
+        axis = 1 if axis is None else self._get_axis_number(axis)
         return self.__constructor__(
-            query_compiler=self._query_compiler.add_prefix(prefix, axis or 1)
+            query_compiler=self._query_compiler.add_prefix(prefix, axis)
         )
 
     def add_suffix(self, suffix, axis=None):  # noqa: PR01, RT01, D200
         """
         Suffix labels with string `suffix`.
         """
+        axis = 1 if axis is None else self._get_axis_number(axis)
         return self.__constructor__(
-            query_compiler=self._query_compiler.add_suffix(suffix, axis or 1)
+            query_compiler=self._query_compiler.add_suffix(suffix, axis)
         )
 
     def applymap(self, func, na_action: Optional[str] = None, **kwargs):
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 6556076bff4..f1c3c47d187 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -528,16 +528,18 @@ def add_prefix(self, prefix, axis=None):  # noqa: PR01, RT01, D200
         """
         Prefix labels with string `prefix`.
         """
+        axis = 0 if axis is None else self._get_axis_number(axis)
         return self.__constructor__(
-            query_compiler=self._query_compiler.add_prefix(prefix, axis=axis or 0)
+            query_compiler=self._query_compiler.add_prefix(prefix, axis=axis)
         )
 
     def add_suffix(self, suffix, axis=None):  # noqa: PR01, RT01, D200
         """
         Suffix labels with string `suffix`.
         """
+        axis = 0 if axis is None else self._get_axis_number(axis)
         return self.__constructor__(
-            query_compiler=self._query_compiler.add_suffix(suffix, axis=axis or 0)
+            query_compiler=self._query_compiler.add_suffix(suffix, axis=axis)
         )
 
     def aggregate(self, func=None, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py
index 7a7032c5e5c..2bbe54d8877 100644
--- a/modin/pandas/test/dataframe/test_map_metadata.py
+++ b/modin/pandas/test/dataframe/test_map_metadata.py
@@ -196,20 +196,34 @@ def test_abs(request, data):
         df_equals(modin_result, pandas_result)
 
 
+@pytest.mark.parametrize("axis", [None, 0, 1])
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
-def test_add_prefix(data):
+def test_add_prefix(data, axis):
     modin_df = pd.DataFrame(data)
     pandas_df = pandas.DataFrame(data)
 
     test_prefix = "TEST"
-    new_modin_df = modin_df.add_prefix(test_prefix)
-    new_pandas_df = pandas_df.add_prefix(test_prefix)
+    new_modin_df = modin_df.add_prefix(test_prefix, axis=axis)
+    new_pandas_df = pandas_df.add_prefix(test_prefix, axis=axis)
     df_equals(new_modin_df.columns, new_pandas_df.columns)
     # TODO(https://github.com/modin-project/modin/issues/3804):
     # make df_equals always check dtypes.
     df_equals(new_modin_df.dtypes, new_pandas_df.dtypes)
 
 
+@pytest.mark.parametrize("axis", [None, 0, 1])
+@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+def test_add_suffix(data, axis):
+    modin_df = pd.DataFrame(data)
+    pandas_df = pandas.DataFrame(data)
+
+    test_suffix = "TEST"
+    new_modin_df = modin_df.add_suffix(test_suffix, axis=axis)
+    new_pandas_df = pandas_df.add_suffix(test_suffix, axis=axis)
+
+    df_equals(new_modin_df.columns, new_pandas_df.columns)
+
+
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 @pytest.mark.parametrize("testfunc", test_func_values, ids=test_func_keys)
 @pytest.mark.parametrize(
@@ -242,18 +256,6 @@ def test_applymap_numeric(request, data, testfunc):
             df_equals(modin_result, pandas_result)
 
 
-@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
-def test_add_suffix(data):
-    modin_df = pd.DataFrame(data)
-    pandas_df = pandas.DataFrame(data)
-
-    test_suffix = "TEST"
-    new_modin_df = modin_df.add_suffix(test_suffix)
-    new_pandas_df = pandas_df.add_suffix(test_suffix)
-
-    df_equals(new_modin_df.columns, new_pandas_df.columns)
-
-
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_at(data):
     modin_df = pd.DataFrame(data)
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 206c752d084..1c7906a11c3 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -677,19 +677,21 @@ def test_add_does_not_change_original_series_name():
     df_equals(s2, original_s2)
 
 
+@pytest.mark.parametrize("axis", [None, 0, 1])
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
-def test_add_prefix(data):
-    modin_series, pandas_series = create_test_series(data)
-    df_equals(
-        modin_series.add_prefix("PREFIX_ADD_"), pandas_series.add_prefix("PREFIX_ADD_")
+def test_add_prefix(data, axis):
+    eval_general(
+        *create_test_series(data),
+        lambda df: df.add_prefix("PREFIX_ADD_", axis=axis),
     )
 
 
+@pytest.mark.parametrize("axis", [None, 0, 1])
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
-def test_add_suffix(data):
-    modin_series, pandas_series = create_test_series(data)
-    df_equals(
-        modin_series.add_suffix("SUFFIX_ADD_"), pandas_series.add_suffix("SUFFIX_ADD_")
+def test_add_suffix(data, axis):
+    eval_general(
+        *create_test_series(data),
+        lambda df: df.add_suffix("SUFFIX_ADD_", axis=axis),
     )
 
 

From 277070a636486c944521c594eb6114ed36ef899c Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoliimyachev@mail.com>
Date: Thu, 1 Jun 2023 16:32:34 +0200
Subject: [PATCH 127/176] Update modin/pandas/series.py

Co-authored-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
---
 modin/pandas/series.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index f1c3c47d187..4382c687d15 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1922,8 +1922,6 @@ def value_counts(
             ascending=ascending,
             dropna=dropna,
         )
-        # https://pandas.pydata.org/pandas-docs/version/2.0/whatsnew/v2.0.0.html#value-counts-sets-the-resulting-name-to-count
-        counted_values.name = "proportion" if normalize else "count"
         return counted_values
 
     def view(self, dtype=None):  # noqa: PR01, RT01, D200

From 81305f81f50c1556e29aa4b07e4822a4acdf50c5 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 1 Jun 2023 17:21:25 +0200
Subject: [PATCH 128/176] fixes for 'skew'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py | 11 +++++++++++
 modin/pandas/groupby.py                           |  6 +-----
 modin/pandas/test/test_groupby.py                 |  8 ++------
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 8fa52404a95..5f9ee5645ef 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -3125,6 +3125,17 @@ def groupby_skew(
         agg_kwargs,
         drop=False,
     ):
+        if axis == 1:
+            return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.skew)(
+                self,
+                by=by,
+                axis=axis,
+                groupby_kwargs=groupby_kwargs,
+                agg_args=agg_args,
+                agg_kwargs=agg_kwargs,
+                drop=drop,
+            )
+        # ValueError: Operation skew does not support axis=1
         return self.groupby_agg(
             by=by,
             agg_func="skew",
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 65482019cdd..51709cbb136 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -203,11 +203,7 @@ def skew(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
         if axis is None or axis is no_default:
             axis = self._axis
 
-        # `groupby_skew` can't handle `axis`, `skipna` parameters
-        # that should be added into `agg_kwargs`;
-        # looks like an implicit supported combination of parameters in the
-        # previous implementation: axis == 1, skipna==True
-        if axis != 1 or not skipna:
+        if axis != 0 or not skipna:
             return self._default_to_pandas(
                 lambda df: df.skew(
                     axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index ec631daff55..00b92f605cd 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -888,9 +888,7 @@ def test_simple_col_groupby():
     modin_groupby_equals_pandas(modin_groupby, pandas_groupby)
     eval_ngroups(modin_groupby, pandas_groupby)
     eval_shift(modin_groupby, pandas_groupby)
-    # TODO: default axis value in that case - `1` that inherited from groupby call
-    # however axis=1 parameter isn't support on BaseOnPython.
-    eval_skew(modin_groupby, pandas_groupby, axis=0)
+    eval_skew(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.ffill())
     eval_general(
         modin_groupby,
@@ -1154,10 +1152,8 @@ def eval_ngroups(modin_groupby, pandas_groupby):
     assert modin_groupby.ngroups == pandas_groupby.ngroups
 
 
-def eval_skew(modin_groupby, pandas_groupby, numeric_only=False, axis=None):
+def eval_skew(modin_groupby, pandas_groupby, numeric_only=False):
     kwargs = dict(numeric_only=numeric_only)
-    if axis is not None:
-        kwargs["axis"] = axis
     modin_df_almost_equals_pandas(
         modin_groupby.skew(**kwargs),
         pandas_groupby.skew(**kwargs),

From b286373d5a46550896515aa465b00321ced7e599 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 1 Jun 2023 17:27:29 +0200
Subject: [PATCH 129/176] fix 'add_prefix', 'add_suffix' for BaseOnPython

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../storage_formats/base/query_compiler.py     | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 5f9ee5645ef..326c2df2e36 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -193,12 +193,9 @@ def add_prefix(self, prefix, axis=1):
         BaseQueryCompiler
             New query compiler with updated labels.
         """
-        if axis:
-            return DataFrameDefault.register(pandas.DataFrame.add_prefix)(
-                self, prefix=prefix
-            )
-        else:
-            return SeriesDefault.register(pandas.Series.add_prefix)(self, prefix=prefix)
+        return DataFrameDefault.register(pandas.DataFrame.add_prefix)(
+            self, prefix=prefix, axis=axis
+        )
 
     def add_suffix(self, suffix, axis=1):
         """
@@ -216,12 +213,9 @@ def add_suffix(self, suffix, axis=1):
         BaseQueryCompiler
             New query compiler with updated labels.
         """
-        if axis:
-            return DataFrameDefault.register(pandas.DataFrame.add_suffix)(
-                self, suffix=suffix
-            )
-        else:
-            return SeriesDefault.register(pandas.Series.add_suffix)(self, suffix=suffix)
+        return DataFrameDefault.register(pandas.DataFrame.add_suffix)(
+            self, suffix=suffix, axis=axis
+        )
 
     # END Metadata modification abstract methods
 

From d09c4d5bcca4f7f8d5a5d5c3daa43ae8b5f44f81 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 1 Jun 2023 23:22:33 +0200
Subject: [PATCH 130/176] fix 'read_parquet'; add test for 'dtype_backend'
 param

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../io/column_stores/parquet_dispatcher.py    | 14 ++++++++++----
 modin/core/io/io.py                           |  6 +-----
 modin/core/storage_formats/pandas/parsers.py  |  7 +++++--
 modin/pandas/test/test_io.py                  | 19 +++++++++++++++++++
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/modin/core/io/column_stores/parquet_dispatcher.py b/modin/core/io/column_stores/parquet_dispatcher.py
index e843e6c59db..2811e48e8c7 100644
--- a/modin/core/io/column_stores/parquet_dispatcher.py
+++ b/modin/core/io/column_stores/parquet_dispatcher.py
@@ -23,6 +23,7 @@
 import numpy as np
 from pandas.io.common import stringify_path
 import pandas
+import pandas._libs.lib as lib
 from packaging import version
 
 from modin.core.storage_formats.pandas.utils import compute_chunksize
@@ -589,7 +590,7 @@ def build_query_compiler(cls, dataset, columns, index_columns, **kwargs):
         return cls.query_compiler_cls(frame)
 
     @classmethod
-    def _read(cls, path, engine, columns, **kwargs):
+    def _read(cls, path, engine, columns, use_nullable_dtypes, dtype_backend, **kwargs):
         """
         Load a parquet object from the file path, returning a query compiler.
 
@@ -601,6 +602,8 @@ def _read(cls, path, engine, columns, **kwargs):
             Parquet library to use.
         columns : list
             If not None, only these columns will be read from the file.
+        use_nullable_dtypes : bool
+        dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
         **kwargs : dict
             Keyword arguments.
 
@@ -614,14 +617,17 @@ def _read(cls, path, engine, columns, **kwargs):
         ParquetFile API is used. Please refer to the documentation here
         https://arrow.apache.org/docs/python/parquet.html
         """
-        if any(
-            arg not in ("storage_options", "use_nullable_dtypes", "dtype_backend")
-            for arg in kwargs
+        if (
+            any(arg not in ("storage_options",) for arg in kwargs)
+            or use_nullable_dtypes != lib.no_default
+            or dtype_backend != lib.no_default
         ):
             return cls.single_worker_read(
                 path,
                 engine=engine,
                 columns=columns,
+                use_nullable_dtypes=use_nullable_dtypes,
+                dtype_backend=dtype_backend,
                 reason="Parquet options that are not currently supported",
                 **kwargs,
             )
diff --git a/modin/core/io/io.py b/modin/core/io/io.py
index a1cf99ccea9..2d60599c6e9 100644
--- a/modin/core/io/io.py
+++ b/modin/core/io/io.py
@@ -125,11 +125,7 @@ def from_dataframe(cls, df):
     )
     def read_parquet(cls, **kwargs):  # noqa: PR01
         ErrorMessage.default_to_pandas("`read_parquet`")
-        return cls.from_pandas(
-            pandas.read_parquet(
-                **kwargs,
-            )
-        )
+        return cls.from_pandas(pandas.read_parquet(**kwargs))
 
     @classmethod
     @_inherit_docstrings(pandas.read_csv, apilink="pandas.read_csv")
diff --git a/modin/core/storage_formats/pandas/parsers.py b/modin/core/storage_formats/pandas/parsers.py
index ab2bcade3a2..9d3cbc8260d 100644
--- a/modin/core/storage_formats/pandas/parsers.py
+++ b/modin/core/storage_formats/pandas/parsers.py
@@ -40,6 +40,7 @@
 """
 
 from collections import OrderedDict
+import collections
 from io import BytesIO, TextIOWrapper, IOBase
 import fsspec
 import numpy as np
@@ -779,8 +780,10 @@ def parse(files_for_parser, engine, **kwargs):
         columns = kwargs.get("columns", None)
         storage_options = kwargs.get("storage_options", {})
         chunks = []
-        # `single_worker_read` just passes in a string path
-        if isinstance(files_for_parser, str):
+        # `single_worker_read` just passes in a string path or path-like object
+        if not isinstance(files_for_parser, collections.abc.Iterable) or isinstance(
+            files_for_parser, str
+        ):
             return pandas.read_parquet(files_for_parser, engine=engine, **kwargs)
 
         for file_for_parser in files_for_parser:
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 231ee809124..ca5b333683b 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -1392,6 +1392,25 @@ def test_read_parquet(
                 columns=columns,
             )
 
+    @pytest.mark.parametrize(
+        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
+    )
+    @pytest.mark.xfail(
+        condition="config.getoption('--simulate-cloud').lower() != 'off'",
+        reason="The reason of tests fail in `cloud` mode is unknown for now - issue #3264",
+    )
+    def test_read_parquet_dtype_backend(self, engine, make_parquet_file, dtype_backend):
+        with ensure_clean(".parquet") as unique_filename:
+            make_parquet_file(filename=unique_filename, row_group_size=100)
+
+            eval_io(
+                fn_name="read_parquet",
+                # read_parquet kwargs
+                engine=engine,
+                path=unique_filename,
+                dtype_backend=dtype_backend,
+            )
+
     def test_read_parquet_list_of_files_5698(self, engine, make_parquet_file):
         if engine == "fastparquet" and os.name == "nt":
             pytest.xfail(reason="https://github.com/pandas-dev/pandas/issues/51720")

From 1243b4ac5e362a9211950a5005381892a1d5b58a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 2 Jun 2023 13:44:47 +0200
Subject: [PATCH 131/176] fix 'read_parquet'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/column_stores/parquet_dispatcher.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/modin/core/io/column_stores/parquet_dispatcher.py b/modin/core/io/column_stores/parquet_dispatcher.py
index 2811e48e8c7..7b826f165c5 100644
--- a/modin/core/io/column_stores/parquet_dispatcher.py
+++ b/modin/core/io/column_stores/parquet_dispatcher.py
@@ -603,7 +603,7 @@ def _read(cls, path, engine, columns, use_nullable_dtypes, dtype_backend, **kwar
         columns : list
             If not None, only these columns will be read from the file.
         use_nullable_dtypes : bool
-        dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
+        dtype_backend : {"numpy_nullable", "pyarrow"}
         **kwargs : dict
             Keyword arguments.
 
@@ -636,7 +636,10 @@ def _read(cls, path, engine, columns, use_nullable_dtypes, dtype_backend, **kwar
             # TODO(https://github.com/modin-project/modin/issues/5723): read all
             # files in parallel.
             compilers: list[cls.query_compiler_cls] = [
-                cls._read(p, engine, columns, **kwargs) for p in path
+                cls._read(
+                    p, engine, columns, use_nullable_dtypes, dtype_backend, **kwargs
+                )
+                for p in path
             ]
             return compilers[0].concat(axis=0, other=compilers[1:], ignore_index=True)
         if isinstance(path, str):
@@ -669,6 +672,8 @@ def _read(cls, path, engine, columns, use_nullable_dtypes, dtype_backend, **kwar
                     path,
                     engine=engine,
                     columns=columns,
+                    use_nullable_dtypes=use_nullable_dtypes,
+                    dtype_backend=dtype_backend,
                     reason="Mixed partitioning columns in Parquet",
                     **kwargs,
                 )

From 981ab28585c49780203a4ad82d2d6ba05ffe55ef Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 2 Jun 2023 14:38:17 +0200
Subject: [PATCH 132/176] add 'dtype_backend' test for 'read_csv'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/pandas/parsers.py |  3 ++-
 modin/pandas/test/test_io.py                 | 24 ++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/modin/core/storage_formats/pandas/parsers.py b/modin/core/storage_formats/pandas/parsers.py
index 9d3cbc8260d..ffac8c1839c 100644
--- a/modin/core/storage_formats/pandas/parsers.py
+++ b/modin/core/storage_formats/pandas/parsers.py
@@ -39,8 +39,8 @@
   parameters are passed into `pandas.read_sql` function without modification.
 """
 
-from collections import OrderedDict
 import collections
+from collections import OrderedDict
 from io import BytesIO, TextIOWrapper, IOBase
 import fsspec
 import numpy as np
@@ -248,6 +248,7 @@ def get_dtypes(cls, dtypes_ids, columns):
 
         combined_part_dtypes = pandas.concat(partitions_dtypes, axis=1)
         frame_dtypes = combined_part_dtypes.iloc[:, 0]
+        frame_dtypes.name = None
 
         if not combined_part_dtypes.eq(frame_dtypes, axis=0).all(axis=None):
             ErrorMessage.missmatch_with_pandas(
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index ca5b333683b..237c23d1765 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -296,6 +296,25 @@ def test_read_csv_delimiters(
                 thousands=thousands,
             )
 
+    @pytest.mark.parametrize(
+        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
+    )
+    def test_read_csv_dtype_backend(self, make_csv_file, dtype_backend):
+        with ensure_clean(".csv") as unique_filename:
+            make_csv_file(filename=unique_filename)
+
+            def comparator(df1, df2):
+                df_equals(df1, df2)
+                df_equals(df1.dtypes, df2.dtypes)
+
+            eval_io(
+                fn_name="read_csv",
+                # read_csv kwargs
+                filepath_or_buffer=unique_filename,
+                dtype_backend=dtype_backend,
+                comparator=comparator,
+            )
+
     # Column and Index Locations and Names tests
     @pytest.mark.parametrize("header", ["infer", None, 0])
     @pytest.mark.parametrize("index_col", [None, "col1"])
@@ -1403,12 +1422,17 @@ def test_read_parquet_dtype_backend(self, engine, make_parquet_file, dtype_backe
         with ensure_clean(".parquet") as unique_filename:
             make_parquet_file(filename=unique_filename, row_group_size=100)
 
+            def comparator(df1, df2):
+                df_equals(df1, df2)
+                df_equals(df1.dtypes, df2.dtypes)
+
             eval_io(
                 fn_name="read_parquet",
                 # read_parquet kwargs
                 engine=engine,
                 path=unique_filename,
                 dtype_backend=dtype_backend,
+                comparator=comparator,
             )
 
     def test_read_parquet_list_of_files_5698(self, engine, make_parquet_file):

From e0b9cc6e1960d25e1006703cd17202ab3f4ef10f Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 2 Jun 2023 15:09:33 +0200
Subject: [PATCH 133/176] add test for 'read_fwf' and 'read_excel'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_io.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 237c23d1765..d61b642d673 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -1900,6 +1900,23 @@ def test_read_excel(self, make_excel_file):
             io=make_excel_file(),
         )
 
+    @check_file_leaks
+    @pytest.mark.parametrize(
+        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
+    )
+    def test_read_excel_dtype_backend(self, make_excel_file, dtype_backend):
+        def comparator(df1, df2):
+            df_equals(df1, df2)
+            df_equals(df1.dtypes, df2.dtypes)
+
+        eval_io(
+            fn_name="read_excel",
+            # read_csv kwargs
+            io=make_excel_file(),
+            dtype_backend=dtype_backend,
+            comparator=comparator,
+        )
+
     @check_file_leaks
     @pytest.mark.xfail(
         condition="config.getoption('--simulate-cloud').lower() != 'off'",
@@ -2379,6 +2396,25 @@ def test_fwf_file_usecols(self, make_fwf_file, usecols):
             usecols=usecols,
         )
 
+    @pytest.mark.parametrize(
+        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
+    )
+    def test_read_fwf_dtype_backend(self, make_fwf_file, dtype_backend):
+        with ensure_clean(".fwf") as unique_filename:
+            make_fwf_file(filename=unique_filename)
+
+            def comparator(df1, df2):
+                df_equals(df1, df2)
+                df_equals(df1.dtypes, df2.dtypes)
+
+            eval_io(
+                fn_name="read_fwf",
+                # read_csv kwargs
+                filepath_or_buffer=unique_filename,
+                dtype_backend=dtype_backend,
+                comparator=comparator,
+            )
+
     def test_fwf_file_chunksize(self, make_fwf_file):
         unique_filename = make_fwf_file()
 

From 807d7f84871f925e2dfe3ecf989f4755fb315ca0 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 2 Jun 2023 15:14:01 +0200
Subject: [PATCH 134/176] keyword only parameters for 'read_excel'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/io.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modin/pandas/io.py b/modin/pandas/io.py
index 5b269e1276f..9c11342f17b 100644
--- a/modin/pandas/io.py
+++ b/modin/pandas/io.py
@@ -406,6 +406,7 @@ def read_clipboard(
 def read_excel(
     io,
     sheet_name: str | int | list[IntStrT] | None = 0,
+    *,
     header: int | Sequence[int] | None = 0,
     names: list[str] | None = None,
     index_col: int | Sequence[int] | None = None,

From 7aac71671445c77bac62bd472ec9bde1c9186a32 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 2 Jun 2023 15:21:30 +0200
Subject: [PATCH 135/176] test 'dtype_backend' for 'read_json'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_io.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index d61b642d673..152c0828f74 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -1804,6 +1804,23 @@ def test_read_json(self, make_json_file, lines):
             lines=lines,
         )
 
+    @pytest.mark.parametrize(
+        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
+    )
+    def test_read_json_dtype_backend(self, make_json_file, dtype_backend):
+        def comparator(df1, df2):
+            df_equals(df1, df2)
+            df_equals(df1.dtypes, df2.dtypes)
+
+        eval_io(
+            fn_name="read_json",
+            # read_json kwargs
+            path_or_buf=make_json_file(lines=True),
+            lines=True,
+            dtype_backend=dtype_backend,
+            comparator=comparator,
+        )
+
     @pytest.mark.parametrize(
         "storage_options",
         [{"anon": False}, {"anon": True}, {"key": "123", "secret": "123"}, None],

From 9d2e85742633b7375b253db4b720b52513fde56a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 2 Jun 2023 16:28:59 +0200
Subject: [PATCH 136/176] test 'dtype_backend' for 'read_sql'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_io.py | 27 +++++++++++++++++++++++++++
 modin/pandas/test/utils.py   |  4 ++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 152c0828f74..1825c925a1f 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -2239,6 +2239,33 @@ def test_read_sql(self, tmp_path, make_sql_connection, read_sql_engine):
         pandas_df = pandas.read_sql(sql=query, con=sqlalchemy_connection)
         df_equals(modin_df, pandas_df)
 
+    @pytest.mark.xfail(
+        condition="config.getoption('--simulate-cloud').lower() != 'off'",
+        reason="The reason of tests fail in `cloud` mode is unknown for now - issue #3264",
+    )
+    @pytest.mark.parametrize(
+        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
+    )
+    def test_read_sql_dtype_backend(self, tmp_path, make_sql_connection, dtype_backend):
+        filename = get_unique_filename(extension="db")
+
+        table = "test_read_sql"
+        conn = make_sql_connection(tmp_path / filename, table)
+        query = f"select * from {table}"
+
+        def comparator(df1, df2):
+            df_equals(df1, df2)
+            df_equals(df1.dtypes, df2.dtypes)
+
+        eval_io(
+            fn_name="read_sql",
+            # read_sql kwargs
+            sql=query,
+            con=conn,
+            dtype_backend=dtype_backend,
+            comparator=comparator,
+        )
+
     @pytest.mark.skipif(
         not TestReadFromSqlServer.get(),
         reason="Skip the test when the test SQL server is not set up.",
diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
index 23ed6eef2c4..e00a5c8e293 100644
--- a/modin/pandas/test/utils.py
+++ b/modin/pandas/test/utils.py
@@ -1159,13 +1159,13 @@ def get_unique_filename(
         name of the test for which the unique file name is needed.
     kwargs: list of ints
         Unique combiantion of test parameters for creation of unique name.
-    extension: str
+    extension: str, default: "csv"
         Extension of unique file.
     data_dir: Union[str, Path]
         Data directory where test files will be created.
     suffix: str
         String to append to the resulted name.
-    debug_mode: bool
+    debug_mode: bool, default: False
         Get unique filename containing kwargs values.
         Otherwise kwargs values will be replaced with hash equivalent.
 

From 4dfc07fec7f1664314f5282fb7decf0dd7f9d371 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 2 Jun 2023 16:42:47 +0200
Subject: [PATCH 137/176] test 'dtype_backend' for 'read_feather'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../io/column_stores/feather_dispatcher.py    | 10 ++++++++++
 modin/pandas/test/test_io.py                  | 20 +++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/modin/core/io/column_stores/feather_dispatcher.py b/modin/core/io/column_stores/feather_dispatcher.py
index c450b8e2509..41152f1e227 100644
--- a/modin/core/io/column_stores/feather_dispatcher.py
+++ b/modin/core/io/column_stores/feather_dispatcher.py
@@ -13,6 +13,8 @@
 
 """Module houses `FeatherDispatcher` class, that is used for reading `.feather` files."""
 
+import pandas._libs.lib as lib
+
 from modin.core.io.column_stores.column_store_dispatcher import ColumnStoreDispatcher
 from modin.utils import import_optional_dependency
 from modin.core.io.file_dispatcher import OpenFile
@@ -47,6 +49,14 @@ def _read(cls, path, columns=None, **kwargs):
         PyArrow feather is used. Please refer to the documentation here
         https://arrow.apache.org/docs/python/api.html#feather-format
         """
+        if kwargs["dtype_backend"] != lib.no_default:
+            return cls.single_worker_read(
+                path,
+                columns=columns,
+                reason="'dtype_backend' not supported",
+                **kwargs,
+            )
+
         path = cls.get_path(path)
         if columns is None:
             import_optional_dependency(
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 1825c925a1f..baf863c4c6e 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -2672,6 +2672,26 @@ def test_read_feather(self, make_feather_file):
             path=make_feather_file(),
         )
 
+    @pytest.mark.xfail(
+        condition="config.getoption('--simulate-cloud').lower() != 'off'",
+        reason="The reason of tests fail in `cloud` mode is unknown for now - issue #3264",
+    )
+    @pytest.mark.parametrize(
+        "dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"]
+    )
+    def test_read_feather_dtype_backend(self, make_feather_file, dtype_backend):
+        def comparator(df1, df2):
+            df_equals(df1, df2)
+            df_equals(df1.dtypes, df2.dtypes)
+
+        eval_io(
+            fn_name="read_feather",
+            # read_feather kwargs
+            path=make_feather_file(),
+            dtype_backend=dtype_backend,
+            comparator=comparator,
+        )
+
     @pytest.mark.xfail(
         condition="config.getoption('--simulate-cloud').lower() != 'off'",
         reason="The reason of tests fail in `cloud` mode is unknown for now - issue #3264",

From c6ef04b2f1b58b334ab7b761f63937c615eb5389 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 2 Jun 2023 17:01:34 +0200
Subject: [PATCH 138/176] test 'dtype_backend' for 'convert_dtypes'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../test/dataframe/test_map_metadata.py       | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py
index 2bbe54d8877..59f4598de06 100644
--- a/modin/pandas/test/dataframe/test_map_metadata.py
+++ b/modin/pandas/test/dataframe/test_map_metadata.py
@@ -767,6 +767,28 @@ def test_convert_dtypes_single_partition(
     assert modin_result.dtypes.equals(pandas_result.dtypes)
 
 
+@pytest.mark.parametrize("dtype_backend", ["numpy_nullable", "pyarrow"])
+def test_convert_dtypes_dtype_backend(dtype_backend):
+    data = {
+        "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
+        "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
+        "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
+        "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
+        "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
+        "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
+    }
+
+    def comparator(df1, df2):
+        df_equals(df1, df2)
+        df_equals(df1.dtypes, df2.dtypes)
+
+    eval_general(
+        *create_test_dfs(data),
+        lambda df: df.convert_dtypes(dtype_backend=dtype_backend),
+        comparator=comparator,
+    )
+
+
 @pytest.mark.xfail(
     StorageFormat.get() == "Hdk",
     reason="HDK does not support columns with different types",

From 030c09c508737d8f61f8ecdc118417a6407c9ed5 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 2 Jun 2023 18:38:43 +0200
Subject: [PATCH 139/176] fix 'test_read_sql_dtype_backend'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_io.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index baf863c4c6e..70ab720abd8 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -2229,6 +2229,7 @@ def test_read_sql(self, tmp_path, make_sql_connection, read_sql_engine):
             con=sqlalchemy_connection,
         )
 
+        old_sql_engine = ReadSqlEngine.get()
         ReadSqlEngine.put(read_sql_engine)
         if ReadSqlEngine.get() == "Connectorx":
             modin_df = pd.read_sql(sql=query, con=conn)
@@ -2236,6 +2237,7 @@ def test_read_sql(self, tmp_path, make_sql_connection, read_sql_engine):
             modin_df = pd.read_sql(
                 sql=query, con=ModinDatabaseConnection("sqlalchemy", conn)
             )
+        ReadSqlEngine.put(old_sql_engine)
         pandas_df = pandas.read_sql(sql=query, con=sqlalchemy_connection)
         df_equals(modin_df, pandas_df)
 
@@ -2249,7 +2251,7 @@ def test_read_sql(self, tmp_path, make_sql_connection, read_sql_engine):
     def test_read_sql_dtype_backend(self, tmp_path, make_sql_connection, dtype_backend):
         filename = get_unique_filename(extension="db")
 
-        table = "test_read_sql"
+        table = "test_read_sql_dtype_backend"
         conn = make_sql_connection(tmp_path / filename, table)
         query = f"select * from {table}"
 

From 89f5b22c2eac5b9b22c7738b8ff776b44efb22c4 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 3 Jun 2023 15:21:53 +0200
Subject: [PATCH 140/176] add tests for 'dt.unit', 'dt.as_unit'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/io.py                        | 7 ++++++-
 modin/core/io/text/text_file_dispatcher.py | 1 -
 modin/pandas/series_utils.py               | 3 ++-
 modin/pandas/test/test_series.py           | 2 ++
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/modin/core/io/io.py b/modin/core/io/io.py
index 2d60599c6e9..f79ce04f9b2 100644
--- a/modin/core/io/io.py
+++ b/modin/core/io/io.py
@@ -562,7 +562,12 @@ def read_spss(
     ):  # noqa: PR01
         ErrorMessage.default_to_pandas("`read_spss`")
         return cls.from_pandas(
-            pandas.read_spss(path, usecols, convert_categoricals, dtype_backend)
+            pandas.read_spss(
+                path,
+                usecols=usecols,
+                convert_categoricals=convert_categoricals,
+                dtype_backend=dtype_backend,
+            )
         )
 
     @classmethod
diff --git a/modin/core/io/text/text_file_dispatcher.py b/modin/core/io/text/text_file_dispatcher.py
index 9e50d1b8dec..fdf29154bb1 100644
--- a/modin/core/io/text/text_file_dispatcher.py
+++ b/modin/core/io/text/text_file_dispatcher.py
@@ -868,7 +868,6 @@ def _define_index(
             Partitions rows lengths.
         """
         index_objs = cls.materialize(index_ids)
-
         if len(index_objs) == 0 or all((isinstance(obj, int) for obj in index_objs)):
             row_lengths = index_objs
             new_index = pandas.RangeIndex(sum(index_objs))
diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index 942ad700b86..fa7308bd165 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -626,7 +626,8 @@ def freq(self):
 
     @property
     def unit(self):
-        return self._Series(query_compiler=self._query_compiler.dt_unit())
+        # use `iloc[0]` to return scalar
+        return self._Series(query_compiler=self._query_compiler.dt_unit()).iloc[0]
 
     def as_unit(self, *args, **kwargs):
         return self._Series(
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 1c7906a11c3..4f78d13ddcf 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -1778,6 +1778,8 @@ def test_dt(timezone):
     df_equals(modin_series.dt.weekday, pandas_series.dt.weekday)
     df_equals(modin_series.dt.dayofyear, pandas_series.dt.dayofyear)
     df_equals(modin_series.dt.day_of_year, pandas_series.dt.day_of_year)
+    df_equals(modin_series.dt.unit, pandas_series.dt.unit)
+    df_equals(modin_series.dt.as_unit("s"), pandas_series.dt.as_unit("s"))
     df_equals(modin_series.dt.isocalendar(), pandas_series.dt.isocalendar())
     df_equals(modin_series.dt.quarter, pandas_series.dt.quarter)
     df_equals(modin_series.dt.is_month_start, pandas_series.dt.is_month_start)

From 5af97ba8006e0171c3a18399aa11c07bd4b8f11f Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 3 Jun 2023 16:32:44 +0200
Subject: [PATCH 141/176] fix 'test_read_spss'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_io.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 70ab720abd8..8a9630741a4 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -2827,12 +2827,15 @@ class TestSpss:
     # In case of defaulting to pandas, it's enough
     # to check that the parameters are passed to pandas.
     def test_read_spss(self):
-        test_args = ("fake_path", ["A"], False, lib.no_default)
+        test_args = ("fake_path",)
+        test_kwargs = dict(
+            usecols=["A"], convert_categoricals=False, dtype_backend=lib.no_default
+        )
         with mock.patch(
             "pandas.read_spss", return_value=pandas.DataFrame([])
         ) as read_spss:
-            pd.read_spss(*test_args)
-        read_spss.assert_called_once_with(*test_args)
+            pd.read_spss(*test_args, **test_kwargs)
+        read_spss.assert_called_once_with(*test_args, **test_kwargs)
 
 
 def test_json_normalize():

From 21ba8d9ac8593db8a804647f02cef3d0b340872e Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 3 Jun 2023 16:54:19 +0200
Subject: [PATCH 142/176] add 'test_to_xarray_mock'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_series.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 4f78d13ddcf..45eb9145c3a 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -15,6 +15,7 @@
 
 import sys
 import pytest
+import unittest.mock as mock
 import numpy as np
 import json
 import pandas
@@ -3463,6 +3464,16 @@ def test_to_xarray(data):
         modin_series.to_xarray()
 
 
+def test_to_xarray_mock():
+    modin_series = pd.Series([])
+
+    with mock.patch("pandas.Series.to_xarray") as to_xarray:
+        modin_series.to_xarray()
+    to_xarray.assert_called_once()
+    assert len(to_xarray.call_args[0]) == 1
+    df_equals(modin_series, to_xarray.call_args[0][0])
+
+
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_tolist(data):
     modin_series, _ = create_test_series(data)  # noqa: F841

From b9b25b27d58c95e1ce251ac8e46b24e265102d71 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 3 Jun 2023 21:26:23 +0200
Subject: [PATCH 143/176] add test cases for 'pivot'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_general.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/modin/pandas/test/test_general.py b/modin/pandas/test/test_general.py
index 7deb24e4a59..b563e353713 100644
--- a/modin/pandas/test/test_general.py
+++ b/modin/pandas/test/test_general.py
@@ -543,6 +543,16 @@ def test_pivot():
     with pytest.raises(ValueError):
         pd.pivot(test_df["bar"], index="foo", columns="bar", values="baz")
 
+    df_equals(
+        pd.pivot(test_df, columns="bar"),
+        pandas.pivot(test_df._to_pandas(), columns="bar"),
+    )
+
+    df_equals(
+        pd.pivot(test_df, index="foo", columns="bar"),
+        pandas.pivot(test_df._to_pandas(), index="foo", columns="bar"),
+    )
+
 
 def test_pivot_values_is_none():
     test_df = pd.DataFrame(

From a6210c6b47a9126aa9d1c10addecce7d68557dca Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sat, 3 Jun 2023 22:34:42 +0200
Subject: [PATCH 144/176] revert some changes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/dataframe/test_udf.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modin/pandas/test/dataframe/test_udf.py b/modin/pandas/test/dataframe/test_udf.py
index 98ea9bc0e61..1f3d6de8ea0 100644
--- a/modin/pandas/test/dataframe/test_udf.py
+++ b/modin/pandas/test/dataframe/test_udf.py
@@ -141,9 +141,7 @@ def test_apply_key_error(func):
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 @pytest.mark.parametrize("func", ["kurt", "count", "sum", "mean", "all", "any"])
 def test_apply_text_func_with_level(level, data, func, axis):
-    func_kwargs = dict(
-        axis=axis, **({"level": level} if level is not no_default else {})
-    )
+    func_kwargs = {"level": level, "axis": axis}
     rows_number = len(next(iter(data.values())))  # length of the first data column
     level_0 = np.random.choice([0, 1, 2], rows_number)
     level_1 = np.random.choice([3, 4, 5], rows_number)

From 78ac5769ddbdb26a5ac18670382024dccfedd95c Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 4 Jun 2023 01:03:43 +0200
Subject: [PATCH 145/176] skip new 'pivot' test cases on 'BaseOnPython'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_general.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/modin/pandas/test/test_general.py b/modin/pandas/test/test_general.py
index b563e353713..80b04f4d091 100644
--- a/modin/pandas/test/test_general.py
+++ b/modin/pandas/test/test_general.py
@@ -543,15 +543,17 @@ def test_pivot():
     with pytest.raises(ValueError):
         pd.pivot(test_df["bar"], index="foo", columns="bar", values="baz")
 
-    df_equals(
-        pd.pivot(test_df, columns="bar"),
-        pandas.pivot(test_df._to_pandas(), columns="bar"),
-    )
+    if get_current_execution() != "BaseOnPython":
+        # Failed for some reason on 'BaseOnPython'
+        df_equals(
+            pd.pivot(test_df, columns="bar"),
+            pandas.pivot(test_df._to_pandas(), columns="bar"),
+        )
 
-    df_equals(
-        pd.pivot(test_df, index="foo", columns="bar"),
-        pandas.pivot(test_df._to_pandas(), index="foo", columns="bar"),
-    )
+        df_equals(
+            pd.pivot(test_df, index="foo", columns="bar"),
+            pandas.pivot(test_df._to_pandas(), index="foo", columns="bar"),
+        )
 
 
 def test_pivot_values_is_none():

From a8c08238ecca69ae4ca062309b41df8f17259d68 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 4 Jun 2023 13:43:51 +0200
Subject: [PATCH 146/176] skip also for 'hdk'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_general.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/test/test_general.py b/modin/pandas/test/test_general.py
index 80b04f4d091..fdb22a22906 100644
--- a/modin/pandas/test/test_general.py
+++ b/modin/pandas/test/test_general.py
@@ -543,8 +543,8 @@ def test_pivot():
     with pytest.raises(ValueError):
         pd.pivot(test_df["bar"], index="foo", columns="bar", values="baz")
 
-    if get_current_execution() != "BaseOnPython":
-        # Failed for some reason on 'BaseOnPython'
+    if get_current_execution() != "BaseOnPython" and StorageFormat.get() != "Hdk":
+        # Failed for some reason on 'BaseOnPython' and 'HDK'
         df_equals(
             pd.pivot(test_df, columns="bar"),
             pandas.pivot(test_df._to_pandas(), columns="bar"),

From 99271ff9fd59b2477379715802289476e64c97ea Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 4 Jun 2023 15:17:38 +0200
Subject: [PATCH 147/176] revert some changes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/series.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 4382c687d15..c213a30430e 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -2066,8 +2066,6 @@ def reindex_like(
         limit=None,
         tolerance=None,
     ) -> "Series":
-        if copy is None:
-            copy = True
         # docs say "Same as calling .reindex(index=other.index, columns=other.columns,...).":
         # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.Series.reindex_like.html
         return self.reindex(

From 0fddb6a8bdf59c1e9053e1ca5df5726b0d6060ae Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 4 Jun 2023 15:41:50 +0200
Subject: [PATCH 148/176] fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py      | 7 +++++--
 modin/pandas/dataframe.py | 5 +++--
 modin/pandas/series.py    | 1 +
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 0d3446368ab..67c2873856e 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1038,7 +1038,7 @@ def between_time(
         )
 
     def bfill(
-        self, axis=None, inplace=False, limit=None, downcast=None
+        self, *, axis=None, inplace=False, limit=None, downcast=None
     ):  # noqa: PR01, RT01, D200
         """
         Synonym for `DataFrame.fillna` with ``method='bfill'``.
@@ -1485,7 +1485,7 @@ def expanding(
         )
 
     def ffill(
-        self, axis=None, inplace=False, limit=None, downcast=None
+        self, *, axis=None, inplace=False, limit=None, downcast=None
     ):  # noqa: PR01, RT01, D200
         """
         Synonym for `DataFrame.fillna` with ``method='ffill'``.
@@ -2711,6 +2711,7 @@ def skew(
 
     def sort_index(
         self,
+        *,
         axis=0,
         level=None,
         ascending=True,
@@ -2747,6 +2748,7 @@ def sort_index(
     def sort_values(
         self,
         by,
+        *,
         axis=0,
         ascending=True,
         inplace: bool = False,
@@ -3315,6 +3317,7 @@ def tz_localize(
     def interpolate(
         self,
         method="linear",
+        *,
         axis=0,
         limit=None,
         inplace=False,
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index b8d1fec5012..3a3e552018d 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1613,6 +1613,7 @@ def rename(
     def reindex(
         self,
         labels=None,
+        *,
         index=None,
         columns=None,
         axis=None,
@@ -1823,7 +1824,7 @@ def __array_wrap__(self, result, context=None):
         return self._default_to_pandas("__array_wrap__", result, context=context)
 
     def set_index(
-        self, keys, drop=True, append=False, inplace=False, verify_integrity=False
+        self, keys, *, drop=True, append=False, inplace=False, verify_integrity=False
     ):  # noqa: PR01, RT01, D200
         """
         Set the ``DataFrame`` index using existing columns.
@@ -2136,6 +2137,7 @@ def to_records(
     def to_stata(
         self,
         path: FilePath | WriteBuffer[bytes],
+        *,
         convert_dates: dict[Hashable, str] | None = None,
         write_index: bool = True,
         byteorder: str | None = None,
@@ -2146,7 +2148,6 @@ def to_stata(
         convert_strl: Sequence[Hashable] | None = None,
         compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
-        *,
         value_labels: dict[Hashable, dict[float | int, str]] | None = None,
     ):
         return self._default_to_pandas(
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index c213a30430e..50aef28346a 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1644,6 +1644,7 @@ def searchsorted(self, value, side="left", sorter=None):  # noqa: PR01, RT01, D2
 
     def sort_values(
         self,
+        *,
         axis=0,
         ascending=True,
         inplace=False,

From 7edd9bdd8bcaa16466f666949d5314d460f610ea Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 4 Jun 2023 19:44:52 +0200
Subject: [PATCH 149/176] revert some changes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_groupby.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 00b92f605cd..a25f5e24483 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -2675,11 +2675,10 @@ def test_groupby_pct_change_diff_6194():
         }
     )
     # These methods should not crash
-    # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
     eval_general(
         df,
         df._to_pandas(),
-        lambda df: df.groupby(by="by").pct_change(fill_method="ffill"),
+        lambda df: df.groupby(by="by").pct_change(),
     )
     eval_general(
         df,

From c12476550c38a37f4004f0fa1c665a39d8cc7b62 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 14:13:17 +0200
Subject: [PATCH 150/176] address review comments

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .github/workflows/ci.yml  |  3 +--
 modin/pandas/dataframe.py | 24 ------------------------
 2 files changed, 1 insertion(+), 26 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 86843ac5c91..a1ad7c5076a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -477,8 +477,7 @@ jobs:
       - run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_utils.py
       - run: python -m pytest modin/pandas/test/test_io.py --verbose
       - run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
-      # TODO: uncomment after fix
-      # - run: python -m pytest modin/test/interchange/dataframe_protocol/hdk
+      - run: python -m pytest modin/test/interchange/dataframe_protocol/hdk
       - run: python -m pytest modin/experimental/sql/test/test_sql.py
       - run: python -m pytest modin/pandas/test/test_concat.py
       - run: python -m pytest modin/pandas/test/dataframe/test_binary.py
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 3a3e552018d..d9c4d45c93b 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -1799,30 +1799,6 @@ def is_dtype_instance_mapper(column, dtype):
         ]
         return self.drop(columns=self.columns[indicate], inplace=False)
 
-    def __array_wrap__(self, result, context=None):
-        """
-        Get called after a ufunc and other functions.
-
-        Parameters
-        ----------
-        result : np.ndarray
-            The result of the ufunc or other function called on the NumPy array
-            returned by __array__.
-        context : tuple of (func, tuple, int), optional
-            This parameter is returned by ufuncs as a 3-element tuple: (name of the
-            ufunc, arguments of the ufunc, domain of the ufunc), but is not set by
-            other NumPy functions.
-
-        Returns
-        -------
-        BasePandasDataset
-            Wrapped Modin object.
-        """
-        # TODO: This is very inefficient. __array__ and as_matrix have been
-        # changed to call the more efficient to_numpy, but this has been left
-        # unchanged since we are not sure of its purpose.
-        return self._default_to_pandas("__array_wrap__", result, context=context)
-
     def set_index(
         self, keys, *, drop=True, append=False, inplace=False, verify_integrity=False
     ):  # noqa: PR01, RT01, D200

From 0c922ab47d3e915982d47001852b8fac62ab8235 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 14:29:57 +0200
Subject: [PATCH 151/176] address review comments[2]

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/io/column_stores/parquet_dispatcher.py | 4 ++--
 modin/core/io/text/text_file_dispatcher.py        | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/modin/core/io/column_stores/parquet_dispatcher.py b/modin/core/io/column_stores/parquet_dispatcher.py
index 7b826f165c5..5ae5177cfcb 100644
--- a/modin/core/io/column_stores/parquet_dispatcher.py
+++ b/modin/core/io/column_stores/parquet_dispatcher.py
@@ -602,8 +602,8 @@ def _read(cls, path, engine, columns, use_nullable_dtypes, dtype_backend, **kwar
             Parquet library to use.
         columns : list
             If not None, only these columns will be read from the file.
-        use_nullable_dtypes : bool
-        dtype_backend : {"numpy_nullable", "pyarrow"}
+        use_nullable_dtypes : Union[bool, lib.NoDefault]
+        dtype_backend : {"numpy_nullable", "pyarrow", lib.no_default}
         **kwargs : dict
             Keyword arguments.
 
diff --git a/modin/core/io/text/text_file_dispatcher.py b/modin/core/io/text/text_file_dispatcher.py
index fdf29154bb1..14447fc7b11 100644
--- a/modin/core/io/text/text_file_dispatcher.py
+++ b/modin/core/io/text/text_file_dispatcher.py
@@ -868,14 +868,10 @@ def _define_index(
             Partitions rows lengths.
         """
         index_objs = cls.materialize(index_ids)
-        if len(index_objs) == 0 or all((isinstance(obj, int) for obj in index_objs)):
+        if len(index_objs) == 0 or isinstance(index_objs[0], int):
             row_lengths = index_objs
             new_index = pandas.RangeIndex(sum(index_objs))
         else:
-            index_objs = [
-                pandas.RangeIndex(obj) if isinstance(obj, int) else obj
-                for obj in index_objs
-            ]
             row_lengths = [len(o) for o in index_objs]
             new_index = index_objs[0].append(index_objs[1:])
             new_index.name = index_name

From 2f7fd7c1c0db5ae1e9e2e210bab89893599d7b6e Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 14:37:36 +0200
Subject: [PATCH 152/176] change 'collections' imports

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/pandas/parsers.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/modin/core/storage_formats/pandas/parsers.py b/modin/core/storage_formats/pandas/parsers.py
index ffac8c1839c..67cf7c2602a 100644
--- a/modin/core/storage_formats/pandas/parsers.py
+++ b/modin/core/storage_formats/pandas/parsers.py
@@ -39,8 +39,7 @@
   parameters are passed into `pandas.read_sql` function without modification.
 """
 
-import collections
-from collections import OrderedDict
+from collections import abc, OrderedDict
 from io import BytesIO, TextIOWrapper, IOBase
 import fsspec
 import numpy as np
@@ -782,7 +781,7 @@ def parse(files_for_parser, engine, **kwargs):
         storage_options = kwargs.get("storage_options", {})
         chunks = []
         # `single_worker_read` just passes in a string path or path-like object
-        if not isinstance(files_for_parser, collections.abc.Iterable) or isinstance(
+        if not isinstance(files_for_parser, abc.Iterable) or isinstance(
             files_for_parser, str
         ):
             return pandas.read_parquet(files_for_parser, engine=engine, **kwargs)

From f3835b67ecb9fba13abea79cf8e087a95631d4a6 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 14:55:41 +0200
Subject: [PATCH 153/176] use 'os.PathLike'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/pandas/parsers.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/modin/core/storage_formats/pandas/parsers.py b/modin/core/storage_formats/pandas/parsers.py
index 67cf7c2602a..a8db1688f7f 100644
--- a/modin/core/storage_formats/pandas/parsers.py
+++ b/modin/core/storage_formats/pandas/parsers.py
@@ -39,7 +39,8 @@
   parameters are passed into `pandas.read_sql` function without modification.
 """
 
-from collections import abc, OrderedDict
+import os
+from collections import OrderedDict
 from io import BytesIO, TextIOWrapper, IOBase
 import fsspec
 import numpy as np
@@ -781,9 +782,7 @@ def parse(files_for_parser, engine, **kwargs):
         storage_options = kwargs.get("storage_options", {})
         chunks = []
         # `single_worker_read` just passes in a string path or path-like object
-        if not isinstance(files_for_parser, abc.Iterable) or isinstance(
-            files_for_parser, str
-        ):
+        if isinstance(files_for_parser, (str, os.PathLike)):
             return pandas.read_parquet(files_for_parser, engine=engine, **kwargs)
 
         for file_for_parser in files_for_parser:

From 997bea8f04c0e98d99d18ede778ca89dd6c954c0 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 16:32:14 +0200
Subject: [PATCH 154/176] fix pandas version for pip

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 67de5f34d12..98c6064d15c 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,4 +1,4 @@
-pandas==1.5.3
+pandas==2.0.2
 numpy>=1.18.5
 dask[complete]>=2.22.0
 distributed>=2.22.0

From 353bdc537980f44ec20515eb8c1f45e449d4c104 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 16:40:09 +0200
Subject: [PATCH 155/176] try to fix mypy

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/utils.py b/modin/utils.py
index de3f859db8a..442414073a5 100644
--- a/modin/utils.py
+++ b/modin/utils.py
@@ -32,7 +32,7 @@
 import pandas
 import numpy as np
 
-from pandas.util._decorators import Appender
+from pandas.util._decorators import Appender  # type: ignore[attr-defined]
 from pandas.util._print_versions import _get_sys_info, _get_dependency_info  # type: ignore[attr-defined]
 from pandas._typing import JSONSerializable
 

From 761192db220ddbf54ceb9d6571cfcf524f320d76 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 16:42:50 +0200
Subject: [PATCH 156/176] forgotten 'pyarrow' pin

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 98c6064d15c..3f181784cac 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,7 +3,7 @@ numpy>=1.18.5
 dask[complete]>=2.22.0
 distributed>=2.22.0
 ray[default]>=1.13.0
-pyarrow<12 # workaround for https://github.com/modin-project/modin/issues/6072
+pyarrow
 psutil
 fsspec
 xarray

From 3fba35fd1141063e39b2f3fab7bce6f6602473ba Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 18:17:13 +0200
Subject: [PATCH 157/176] xfail hdk tests

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../dataframe_protocol/hdk/test_protocol.py          | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py b/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py
index 6e600082c12..454eb6914a8 100644
--- a/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py
+++ b/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py
@@ -30,6 +30,9 @@
 from .utils import get_data_of_all_types, split_df_into_chunks, export_frame
 
 
+@pytest.mark.xfail(
+    reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+)
 @pytest.mark.parametrize("data_has_nulls", [True, False])
 @pytest.mark.parametrize("from_hdk", [True, False])
 @pytest.mark.parametrize("n_chunks", [None, 3, 5, 12])
@@ -50,6 +53,9 @@ def test_simple_export(data_has_nulls, from_hdk, n_chunks):
     df_equals(md_df, exported_df)
 
 
+@pytest.mark.xfail(
+    reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+)
 @pytest.mark.parametrize("n_chunks", [2, 4, 7])
 @pytest.mark.parametrize("data_has_nulls", [True, False])
 def test_export_aligned_at_chunks(n_chunks, data_has_nulls):
@@ -80,6 +86,9 @@ def test_export_aligned_at_chunks(n_chunks, data_has_nulls):
     df_equals(md_df, exported_df)
 
 
+@pytest.mark.xfail(
+    reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+)
 @pytest.mark.parametrize("data_has_nulls", [True, False])
 def test_export_unaligned_at_chunks(data_has_nulls):
     """
@@ -139,6 +148,9 @@ def test_export_unaligned_at_chunks(data_has_nulls):
     df_equals(md_df, exported_df)
 
 
+@pytest.mark.xfail(
+    reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+)
 @pytest.mark.parametrize("data_has_nulls", [True, False])
 def test_export_indivisible_chunking(data_has_nulls):
     """

From 2be7d422b253873dff58b126144398b7d1bb42fc Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 18:26:09 +0200
Subject: [PATCH 158/176] remove 'MD01'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/experimental/core/io/sql/utils.py | 6 +++---
 modin/experimental/pandas/io.py         | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/modin/experimental/core/io/sql/utils.py b/modin/experimental/core/io/sql/utils.py
index dc3b347169d..a157b53ae2d 100644
--- a/modin/experimental/core/io/sql/utils.py
+++ b/modin/experimental/core/io/sql/utils.py
@@ -288,7 +288,7 @@ def read_sql_with_offset(
     chunksize=None,
     dtype_backend=lib.no_default,
     dtype=None,
-):  # pragma: no cover, # noqa: MD01
+):  # pragma: no cover
     """
     Read a chunk of SQL query or table into a pandas DataFrame.
 
@@ -333,12 +333,12 @@ def read_sql_with_offset(
     chunksize : int, optional
         If specified, return an iterator where `chunksize` is the number of rows
         to include in each chunk.
-    dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
+    dtype_backend : {"numpy_nullable", "pyarrow"}, default: NumPy backed DataFrames
         Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,
         nullable dtypes are used for all dtypes that have a nullable implementation when
         "numpy_nullable" is set, PyArrow is used for all dtypes if "pyarrow" is set.
         The dtype_backends are still experimential.
-    dtype : Type name or dict of columns
+    dtype : Type name or dict of columns, optional
         Data type for data or columns. E.g. np.float64 or {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query.
 
     Returns
diff --git a/modin/experimental/pandas/io.py b/modin/experimental/pandas/io.py
index 4cb481399ac..82e649fa154 100644
--- a/modin/experimental/pandas/io.py
+++ b/modin/experimental/pandas/io.py
@@ -42,7 +42,7 @@ def read_sql(
     lower_bound: Optional[int] = None,
     upper_bound: Optional[int] = None,
     max_sessions: Optional[int] = None,
-) -> Union[DataFrame, Iterator[DataFrame]]:  # noqa: MD01
+) -> Union[DataFrame, Iterator[DataFrame]]:
     """
     General documentation is available in `modin.pandas.read_sql`.
 
@@ -87,12 +87,12 @@ def read_sql(
     chunksize : int, optional
         If specified, return an iterator where `chunksize` is the
         number of rows to include in each chunk.
-    dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
+    dtype_backend : {"numpy_nullable", "pyarrow"}, default: NumPy backed DataFrames
         Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays,
         nullable dtypes are used for all dtypes that have a nullable implementation when
         "numpy_nullable" is set, PyArrow is used for all dtypes if "pyarrow" is set.
         The dtype_backends are still experimential.
-    dtype : Type name or dict of columns
+    dtype : Type name or dict of columns, optional
         Data type for data or columns. E.g. np.float64 or {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query.
     partition_column : str, optional
         Column used to share the data between the workers (MUST be a INTEGER column).

From a08f152ad9a4b42faae5156d48c922b1bec4cd97 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 18:27:36 +0200
Subject: [PATCH 159/176] use 'Optional'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 67c2873856e..6ad2920cfba 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1839,8 +1839,8 @@ def mask(
         other=no_default,
         *,
         inplace: bool = False,
-        axis: Axis = None,
-        level: Level = None,
+        axis: Optional[Axis] = None,
+        level: Optional[Level] = None,
     ):  # noqa: PR01, RT01, D200
         """
         Replace values where the condition is True.

From 594b3e669163d4c5631ca88c2519d4c894e5bc86 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 18:44:38 +0200
Subject: [PATCH 160/176] update 'to_dict'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/storage_formats/base/query_compiler.py | 4 ++--
 modin/pandas/base.py                              | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 326c2df2e36..49f6b2776f7 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -467,7 +467,7 @@ def to_list(self):
         return SeriesDefault.register(pandas.Series.to_list)(self)
 
     @doc_utils.add_refer_to("DataFrame.to_dict")
-    def dataframe_to_dict(self, orient="dict", into=dict):  # noqa: PR01
+    def dataframe_to_dict(self, orient="dict", into=dict, index=True):  # noqa: PR01
         """
         Convert the DataFrame to a dictionary.
 
@@ -475,7 +475,7 @@ def dataframe_to_dict(self, orient="dict", into=dict):  # noqa: PR01
         -------
         dict or `into` instance
         """
-        return self.to_pandas().to_dict(orient, into)
+        return self.to_pandas().to_dict(orient, into, index)
 
     @doc_utils.add_refer_to("Series.to_dict")
     def series_to_dict(self, into=dict):  # noqa: PR01
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 6ad2920cfba..2ab71c01f1c 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2950,11 +2950,7 @@ def to_excel(
         )
 
     def to_dict(self, orient="dict", into=dict, index=True):
-        if not index:
-            return self._default_to_pandas(
-                "to_dict", orient=orient, into=into, index=index
-            )
-        return self._query_compiler.dataframe_to_dict(orient, into)
+        return self._query_compiler.dataframe_to_dict(orient, into, index)
 
     def to_hdf(
         self, path_or_buf, key, format="table", **kwargs

From c0fd0cc611b311c13d69e84070529af61b38155c Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 18:53:47 +0200
Subject: [PATCH 161/176] update 'set_axis' calls in dataframe.__init__

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index d9c4d45c93b..264b88b4205 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -167,9 +167,11 @@ def __init__(
             if columns is not None and not isinstance(columns, pandas.Index):
                 columns = pandas.Index(columns)
             if columns is not None:
-                self = self.set_axis(columns, axis=1, copy=False)
+                obj_with_new_columns = self.set_axis(columns, axis=1, copy=False)
+                self._query_compiler = obj_with_new_columns._query_compiler
             if index is not None:
-                self = self.set_axis(index, axis=0, copy=False)
+                obj_with_new_index = self.set_axis(index, axis=0, copy=False)
+                self._query_compiler = obj_with_new_index._query_compiler
             if dtype is not None:
                 casted_obj = self.astype(dtype, copy=False)
                 self._query_compiler = casted_obj._query_compiler

From dc7e1157c8cfdde345f7a96f46f958f1ef3d3a65 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoliimyachev@mail.com>
Date: Mon, 5 Jun 2023 22:35:43 +0200
Subject: [PATCH 162/176] Update modin/pandas/test/test_groupby.py

Co-authored-by: Vasily Litvinov <fam1ly.n4me@yandex.ru>
---
 modin/pandas/test/test_groupby.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index a25f5e24483..9d784135666 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -1153,10 +1153,9 @@ def eval_ngroups(modin_groupby, pandas_groupby):
 
 
 def eval_skew(modin_groupby, pandas_groupby, numeric_only=False):
-    kwargs = dict(numeric_only=numeric_only)
     modin_df_almost_equals_pandas(
-        modin_groupby.skew(**kwargs),
-        pandas_groupby.skew(**kwargs),
+        modin_groupby.skew(numeric_only=numeric_only),
+        pandas_groupby.skew(numeric_only=numeric_only),
     )
 
 

From 6b6802c166d4c7a8dfb20e2cdaf77478b3792b10 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 19:27:17 +0200
Subject: [PATCH 163/176] use False value for 'return_tuple_when_iterating'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 264b88b4205..4c8013b9f36 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -445,7 +445,7 @@ def groupby(
         # groupby takes place.
         drop = False
 
-        return_tuple_when_iterating = None
+        return_tuple_when_iterating = False
         if (
             not isinstance(by, (pandas.Series, Series))
             and is_list_like(by)

From 53a399bf3f2d27cca294e561c777484bde6fe601 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 19:29:25 +0200
Subject: [PATCH 164/176] update 'pivot'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/general.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/modin/pandas/general.py b/modin/pandas/general.py
index e258a59390d..87376c49735 100644
--- a/modin/pandas/general.py
+++ b/modin/pandas/general.py
@@ -257,10 +257,6 @@ def pivot(
     """
     Return reshaped DataFrame organized by given index / column values.
     """
-    if index is NoDefault:
-        index = None
-    if values is NoDefault:
-        values = None
     if not isinstance(data, DataFrame):
         raise ValueError("can not pivot with instance of type {}".format(type(data)))
     return data.pivot(index=index, columns=columns, values=values)

From 0944041a54cd69f6512946a156c52fc3aa1dfd02 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 19:30:51 +0200
Subject: [PATCH 165/176] update type hints for 'copy' parameter

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/dataframe.py | 2 +-
 modin/pandas/general.py   | 2 +-
 modin/pandas/series.py    | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 4c8013b9f36..efb7449cc2a 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -2647,7 +2647,7 @@ def reindex_like(
         self: "DataFrame",
         other,
         method=None,
-        copy: bool = None,
+        copy: Optional[bool] = None,
         limit=None,
         tolerance=None,
     ) -> "DataFrame":
diff --git a/modin/pandas/general.py b/modin/pandas/general.py
index 87376c49735..2567650d788 100644
--- a/modin/pandas/general.py
+++ b/modin/pandas/general.py
@@ -414,7 +414,7 @@ def concat(
     names=None,
     verify_integrity: bool = False,
     sort: bool = False,
-    copy: bool = None,
+    copy: Optional[bool] = None,
 ) -> "DataFrame | Series":  # noqa: PR01, RT01, D200
     """
     Concatenate Modin objects along a particular axis.
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 50aef28346a..cfc005d0073 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1393,7 +1393,7 @@ def reindex(
         *,
         axis: Axis = None,
         method: str = None,
-        copy: bool = None,
+        copy: Optional[bool] = None,
         level=None,
         fill_value=None,
         limit: int = None,
@@ -2063,7 +2063,7 @@ def reindex_like(
         self: "Series",
         other,
         method=None,
-        copy: bool = None,
+        copy: Optional[bool] = None,
         limit=None,
         tolerance=None,
     ) -> "Series":

From 9cfd7a16d4d49ae76fc6fc8de8f5dafdb31c88a2 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 19:32:30 +0200
Subject: [PATCH 166/176] fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 51709cbb136..582aaa75842 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -77,7 +77,7 @@
 @_inherit_docstrings(pandas.core.groupby.DataFrameGroupBy)
 class DataFrameGroupBy(ClassLogger):
     _pandas_class = pandas.core.groupby.DataFrameGroupBy
-    _return_tuple_when_iterating = None
+    _return_tuple_when_iterating = False
 
     def __init__(
         self,
@@ -103,7 +103,7 @@ def __init__(
         # the keys that are returned by iterating over the resulting DataFrameGroupBy
         # object will now be tuples of length one (pandas#GH47761)
         self._return_tuple_when_iterating = kwargs.pop(
-            "return_tuple_when_iterating", None
+            "return_tuple_when_iterating", False
         )
 
         if (

From be3c14871a40d329aeded8b30bb3cba732d569a6 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 19:33:50 +0200
Subject: [PATCH 167/176] remove unused code

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/groupby.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 582aaa75842..3234040b8f5 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1491,16 +1491,6 @@ def _wrap_aggregation(
         agg_args = tuple() if agg_args is None else agg_args
         agg_kwargs = dict() if agg_kwargs is None else agg_kwargs
 
-        """
-        if numeric_only is None or numeric_only is no_default:
-            # pandas behavior: if `numeric_only` wasn't explicitly specified then
-            # the parameter is considered to be `False` if there are no numeric types
-            # in the frame and `True` otherwise.
-            numeric_only = any(
-                is_numeric_dtype(dtype) for dtype in self._query_compiler.dtypes
-            )
-        """
-
         if numeric_only and self.ndim == 2:
             by_cols = self._internal_by
             mask_cols = [

From d24d68c0dc45f92e41e7e70adb152de96e0e86f7 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 19:58:26 +0200
Subject: [PATCH 168/176] update comment for 'test_groupby_api_equality'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_api.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/test/test_api.py b/modin/pandas/test/test_api.py
index 5481a041446..58f8088ae63 100644
--- a/modin/pandas/test/test_api.py
+++ b/modin/pandas/test/test_api.py
@@ -236,7 +236,8 @@ def test_sparse_accessor_api_equality(obj):
 def test_groupby_api_equality(obj):
     modin_dir = [x for x in dir(getattr(pd.groupby, obj)) if x[0] != "_"]
     pandas_dir = [x for x in dir(getattr(pandas.core.groupby, obj)) if x[0] != "_"]
-    # This attribute is hidden from the DataFrameGroupBy object
+    # These attributes are hidden in the DataFrameGroupBy/SeriesGroupBy instance,
+    # but available in the DataFrameGroupBy/SeriesGroupBy class in pandas.
     ignore = ["keys", "level"]
     missing_from_modin = set(pandas_dir) - set(modin_dir) - set(ignore)
     assert not len(missing_from_modin), "Differences found in API: {}".format(

From 2dc03a6eff4691880ae49d2ee1ccc519d26e86ee Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 20:02:43 +0200
Subject: [PATCH 169/176] add 'FIXME'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_general.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/test/test_general.py b/modin/pandas/test/test_general.py
index fdb22a22906..b5059ee4d36 100644
--- a/modin/pandas/test/test_general.py
+++ b/modin/pandas/test/test_general.py
@@ -544,7 +544,7 @@ def test_pivot():
         pd.pivot(test_df["bar"], index="foo", columns="bar", values="baz")
 
     if get_current_execution() != "BaseOnPython" and StorageFormat.get() != "Hdk":
-        # Failed for some reason on 'BaseOnPython' and 'HDK'
+        # FIXME: Failed for some reason on 'BaseOnPython' and 'HDK'
         df_equals(
             pd.pivot(test_df, columns="bar"),
             pandas.pivot(test_df._to_pandas(), columns="bar"),

From e6c5ec9429e03eb2a399202b1e498ac528fc5bdb Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 23:31:58 +0200
Subject: [PATCH 170/176] changes in 'test_groupby.py'

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/pandas/test/test_groupby.py | 58 +++++++------------------------
 modin/pandas/test/test_io.py      |  2 +-
 2 files changed, 13 insertions(+), 47 deletions(-)

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 9d784135666..a25322a3465 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -407,14 +407,7 @@ def maybe_get_columns(df, by):
         lambda df: df.sem(),
         modin_df_almost_equals_pandas,
     )
-    # TypeError: 'Categorical' with dtype category does not support reduction 'mean'
-    eval_general(
-        modin_groupby,
-        pandas_groupby,
-        lambda df: df.mean(),
-        modin_df_almost_equals_pandas,
-    )
-
+    eval_mean(modin_groupby, pandas_groupby, numeric_only=True)
     eval_any(modin_groupby, pandas_groupby)
     eval_min(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.idxmax())
@@ -437,17 +430,12 @@ def maybe_get_columns(df, by):
     )
 
     apply_functions = [
-        lambda df: df.sum(),
+        lambda df: df.sum(numeric_only=True),
         lambda df: pandas.Series([1, 2, 3, 4], name="result"),
         min,
     ]
     for func in apply_functions:
-        # TypeError: 'Categorical' with dtype category does not support reduction 'sum'
-        eval_general(
-            modin_groupby,
-            pandas_groupby,
-            lambda grp: grp.apply(func),
-        )
+        eval_apply(modin_groupby, pandas_groupby, func)
 
     eval_dtypes(modin_groupby, pandas_groupby)
     eval_general(modin_groupby, pandas_groupby, lambda df: df.first())
@@ -462,21 +450,8 @@ def maybe_get_columns(df, by):
 
     if as_index:
         eval_std(modin_groupby, pandas_groupby)
-        # TypeError: 'Categorical' with dtype category does not support reduction 'var'
-        eval_general(
-            modin_groupby,
-            pandas_groupby,
-            lambda df: df.var(),
-            modin_df_almost_equals_pandas,
-        )
-
-        # TypeError: 'Categorical' with dtype category does not support reduction 'skew'
-        eval_general(
-            modin_groupby,
-            pandas_groupby,
-            lambda df: df.skew(),
-            modin_df_almost_equals_pandas,
-        )
+        eval_var(modin_groupby, pandas_groupby, numeric_only=True)
+        eval_skew(modin_groupby, pandas_groupby, numeric_only=True)
 
     agg_functions = [
         lambda df: df.sum(),
@@ -664,8 +639,7 @@ def test_single_group_row_groupby():
     eval_general(
         modin_groupby,
         pandas_groupby,
-        # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
-        lambda df: df.pct_change(fill_method="ffill"),
+        lambda df: df.pct_change(),
         modin_df_almost_equals_pandas,
     )
     eval_cummax(modin_groupby, pandas_groupby)
@@ -793,8 +767,7 @@ def test_large_row_groupby(is_by_category):
     eval_general(
         modin_groupby,
         pandas_groupby,
-        # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
-        lambda df: df.pct_change(fill_method="ffill"),
+        lambda df: df.pct_change(),
         modin_df_almost_equals_pandas,
     )
     eval_cummax(modin_groupby, pandas_groupby)
@@ -911,11 +884,10 @@ def test_simple_col_groupby():
     # eval_cummin(modin_groupby, pandas_groupby)
     # eval_cumprod(modin_groupby, pandas_groupby)
 
-    # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
     eval_general(
         modin_groupby,
         pandas_groupby,
-        lambda df: df.pct_change(fill_method="ffill"),
+        lambda df: df.pct_change(),
         modin_df_almost_equals_pandas,
     )
     apply_functions = [lambda df: -df, lambda df: df.sum(axis=1)]
@@ -1038,8 +1010,7 @@ def test_series_groupby(by, as_index_series_or_dataframe):
         eval_general(
             modin_groupby,
             pandas_groupby,
-            # AttributeError: 'DataFrameGroupBy' object has no attribute 'pad'
-            lambda df: df.pct_change(fill_method="ffill"),
+            lambda df: df.pct_change(),
             modin_df_almost_equals_pandas,
         )
         eval_general(
@@ -2182,7 +2153,9 @@ def test_not_str_by(by, as_index):
         pytest.param(
             lambda grp: grp.apply(lambda df: df.dtypes), id="modin_dtypes_impl"
         ),
-        pytest.param(lambda grp: grp.apply(lambda df: df.sum()), id="apply_sum"),
+        pytest.param(
+            lambda grp: grp.apply(lambda df: df.sum(numeric_only=True)), id="apply_sum"
+        ),
         pytest.param(lambda grp: grp.count(), id="count"),
         pytest.param(lambda grp: grp.nunique(), id="nunique"),
         # Integer key means the index of the column to replace it with.
@@ -2238,13 +2211,6 @@ def test_handle_as_index(
             + "https://github.com/pandas-dev/pandas/issues/36698"
         )
 
-    if has_categorical_by and (
-        callable(agg_func) or ("apply_sum" in request.node.callspec.id.split("-"))
-    ):
-        pytest.skip(
-            "TypeError: 'Categorical' with dtype category does not support reduction 'sum'"
-        )
-
     df = pandas.DataFrame(test_groupby_data)
     external_by_cols = GroupBy.validate_by(df.add_prefix("external_"))
 
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 8a9630741a4..250ef4ebddd 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -808,7 +808,7 @@ def test_read_csv_quoting(
             )
 
     # Error Handling parameters tests
-    @pytest.mark.skip
+    @pytest.mark.skip(reason="The reason of tests fail in is unknown")
     @pytest.mark.parametrize("on_bad_lines", ["error", "warn", "skip", None])
     def test_read_csv_error_handling(self, on_bad_lines):
         # in that case exceptions are raised both by Modin and pandas

From 276b3834d6f33d7dfd226db15e02ebfa41f697e9 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Mon, 5 Jun 2023 23:52:15 +0200
Subject: [PATCH 171/176] update comments

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../storage_formats/base/query_compiler.py    |  4 ++-
 modin/pandas/test/test_groupby.py             | 26 +++++++++----------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 49f6b2776f7..e54d7512073 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -3120,6 +3120,9 @@ def groupby_skew(
         drop=False,
     ):
         if axis == 1:
+            # To avoid `ValueError: Operation skew does not support axis=1` due to the
+            # difference in the behavior of `groupby(...).skew(axis=1)` and
+            # `groupby(...).agg("skew", axis=1)`.
             return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.skew)(
                 self,
                 by=by,
@@ -3129,7 +3132,6 @@ def groupby_skew(
                 agg_kwargs=agg_kwargs,
                 drop=drop,
             )
-        # ValueError: Operation skew does not support axis=1
         return self.groupby_agg(
             by=by,
             agg_func="skew",
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index a25322a3465..c2471043fe8 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -1342,20 +1342,18 @@ def test(grp):
         return test
 
     # issue-#3252, https://github.com/pandas-dev/pandas/issues/52760
-    """
-    eval_general(
-        md_grp,
-        pd_grp,
-        build_list_agg(["mean"]),
-        comparator=build_types_asserter(df_equals),
-    )
-    eval_general(
-        md_grp,
-        pd_grp,
-        build_list_agg(["mean", "count"]),
-        comparator=build_types_asserter(df_equals),
-    )
-    """
+    # eval_general(
+    #    md_grp,
+    #    pd_grp,
+    #    build_list_agg(["mean"]),
+    #    comparator=build_types_asserter(df_equals),
+    # )
+    # eval_general(
+    #    md_grp,
+    #    pd_grp,
+    #    build_list_agg(["mean", "count"]),
+    #    comparator=build_types_asserter(df_equals),
+    # )
 
     # Explicit default-to-pandas test
     eval_general(

From 6d7e8d260bf08a7bb304c258e902c237736aa043 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 6 Jun 2023 01:14:24 +0200
Subject: [PATCH 172/176] try to avoid old logic for processing 'numeric_only'
 param

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../storage_formats/pandas/query_compiler.py  | 21 ++++++-------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index af3b71a674e..e9f62aef024 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -3714,21 +3714,12 @@ def compute_groupby(df, drop=False, partition_idx=0):
 
         # that means that exception in `compute_groupby` was raised
         # in every partition, so we also should raise it
-        # TODO: we should be able to drop this logic with pandas 2.0.0 as it removes `numeric_only=None`
-        # parameter for groupby thus making the behavior of processing of non-numeric columns more
-        # predictable (we can decide whether to raise an exception before actually executing groupby)
-        if len(result.columns) == 0 and len(self.columns) != 0:
-            # determening type of raised exception by applying `aggfunc`
-            # to empty DataFrame
-            try:
-                pandas.DataFrame(index=[1], columns=[1]).agg(agg_func) if isinstance(
-                    agg_func, dict
-                ) else agg_func(
-                    pandas.DataFrame(index=[1], columns=[1]).groupby(level=0),
-                    **agg_kwargs,
-                )
-            except Exception as err:
-                raise type(err)("No numeric types to aggregate.")
+        if (
+            len(result.columns) == 0
+            and len(self.columns) != 0
+            and agg_kwargs.get("numeric_only", False)
+        ):
+            raise TypeError("No numeric types to aggregate.")
 
         return result
 

From 8121867c032ddd11785e7af24c423c248ace4482 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 6 Jun 2023 14:24:07 +0200
Subject: [PATCH 173/176] address review comments

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/core/dataframe/base/dataframe/dataframe.py   | 2 +-
 modin/core/dataframe/pandas/dataframe/dataframe.py | 2 +-
 modin/pandas/dataframe.py                          | 6 ++++--
 modin/pandas/test/test_general.py                  | 1 +
 modin/pandas/test/test_io.py                       | 2 +-
 5 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/modin/core/dataframe/base/dataframe/dataframe.py b/modin/core/dataframe/base/dataframe/dataframe.py
index 6a6ab7eb910..44c8efa8695 100644
--- a/modin/core/dataframe/base/dataframe/dataframe.py
+++ b/modin/core/dataframe/base/dataframe/dataframe.py
@@ -252,7 +252,7 @@ def groupby(
         passed to the groupby may be at most the number of rows in the group, and
         may be as small as a single row.
 
-        Unlike the pandas API, an intermediate `GROUP BY` object is not present in this
+        Unlike the pandas API, an intermediate "GROUP BY" object is not present in this
         algebra implementation.
         """
         pass
diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py
index e0ee551bcf5..a3e5ad67958 100644
--- a/modin/core/dataframe/pandas/dataframe/dataframe.py
+++ b/modin/core/dataframe/pandas/dataframe/dataframe.py
@@ -3494,7 +3494,7 @@ def groupby(
         passed to the groupby may be at most the number of rows in the group, and
         may be as small as a single row.
 
-        Unlike the pandas API, an intermediate `GROUP BY` object is not present in this
+        Unlike the pandas API, an intermediate "GROUP BY" object is not present in this
         algebra implementation.
         """
         axis = Axis(axis)
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index efb7449cc2a..23ba89129d0 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -168,10 +168,10 @@ def __init__(
                 columns = pandas.Index(columns)
             if columns is not None:
                 obj_with_new_columns = self.set_axis(columns, axis=1, copy=False)
-                self._query_compiler = obj_with_new_columns._query_compiler
+                self._update_inplace(obj_with_new_columns._query_compiler)
             if index is not None:
                 obj_with_new_index = self.set_axis(index, axis=0, copy=False)
-                self._query_compiler = obj_with_new_index._query_compiler
+                self._update_inplace(obj_with_new_index._query_compiler)
             if dtype is not None:
                 casted_obj = self.astype(dtype, copy=False)
                 self._query_compiler = casted_obj._query_compiler
@@ -655,6 +655,7 @@ def corr(
         """
         Compute pairwise correlation of columns, excluding NA/null values.
         """
+        # FIXME: https://github.com/modin-project/modin/issues/6215
         if not numeric_only:
             return self._default_to_pandas(
                 pandas.DataFrame.corr,
@@ -693,6 +694,7 @@ def cov(
         """
         Compute pairwise covariance of columns, excluding NA/null values.
         """
+        # FIXME: https://github.com/modin-project/modin/issues/6232
         if not numeric_only:
             return self._default_to_pandas(
                 pandas.DataFrame.cov,
diff --git a/modin/pandas/test/test_general.py b/modin/pandas/test/test_general.py
index b5059ee4d36..0c51b7c6680 100644
--- a/modin/pandas/test/test_general.py
+++ b/modin/pandas/test/test_general.py
@@ -545,6 +545,7 @@ def test_pivot():
 
     if get_current_execution() != "BaseOnPython" and StorageFormat.get() != "Hdk":
         # FIXME: Failed for some reason on 'BaseOnPython' and 'HDK'
+        # https://github.com/modin-project/modin/issues/6240
         df_equals(
             pd.pivot(test_df, columns="bar"),
             pandas.pivot(test_df._to_pandas(), columns="bar"),
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 250ef4ebddd..84d11ef6a3b 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -808,7 +808,7 @@ def test_read_csv_quoting(
             )
 
     # Error Handling parameters tests
-    @pytest.mark.skip(reason="The reason of tests fail in is unknown")
+    @pytest.mark.skip(reason="https://github.com/modin-project/modin/issues/6239")
     @pytest.mark.parametrize("on_bad_lines", ["error", "warn", "skip", None])
     def test_read_csv_error_handling(self, on_bad_lines):
         # in that case exceptions are raised both by Modin and pandas

From 4222292ebafde1c38642e85930c45f9df4de434a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 6 Jun 2023 16:41:24 +0200
Subject: [PATCH 174/176] try to exclude 'datetime' type for hdk tests

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 .../dataframe_protocol/hdk/test_protocol.py   | 84 +++++++++++++++----
 1 file changed, 69 insertions(+), 15 deletions(-)

diff --git a/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py b/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py
index 454eb6914a8..483fdc91fcf 100644
--- a/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py
+++ b/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py
@@ -30,13 +30,22 @@
 from .utils import get_data_of_all_types, split_df_into_chunks, export_frame
 
 
-@pytest.mark.xfail(
-    reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+@pytest.mark.parametrize(
+    "exclude_datetime",
+    [
+        True,
+        pytest.param(
+            False,
+            marks=pytest.mark.xfail(
+                reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+            ),
+        ),
+    ],
 )
 @pytest.mark.parametrize("data_has_nulls", [True, False])
 @pytest.mark.parametrize("from_hdk", [True, False])
 @pytest.mark.parametrize("n_chunks", [None, 3, 5, 12])
-def test_simple_export(data_has_nulls, from_hdk, n_chunks):
+def test_simple_export(data_has_nulls, from_hdk, n_chunks, exclude_datetime):
     if from_hdk:
         # HDK can't import 'uint64' as well as booleans
         # issue for bool: https://github.com/modin-project/modin/issues/4299
@@ -44,6 +53,9 @@ def test_simple_export(data_has_nulls, from_hdk, n_chunks):
     else:
         exclude_dtypes = None
 
+    if exclude_datetime:
+        exclude_dtypes += ["datetime"]
+
     data = get_data_of_all_types(
         has_nulls=data_has_nulls, exclude_dtypes=exclude_dtypes
     )
@@ -53,15 +65,29 @@ def test_simple_export(data_has_nulls, from_hdk, n_chunks):
     df_equals(md_df, exported_df)
 
 
-@pytest.mark.xfail(
-    reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+@pytest.mark.parametrize(
+    "exclude_datetime",
+    [
+        True,
+        pytest.param(
+            False,
+            marks=pytest.mark.xfail(
+                reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+            ),
+        ),
+    ],
 )
 @pytest.mark.parametrize("n_chunks", [2, 4, 7])
 @pytest.mark.parametrize("data_has_nulls", [True, False])
-def test_export_aligned_at_chunks(n_chunks, data_has_nulls):
+def test_export_aligned_at_chunks(n_chunks, data_has_nulls, exclude_datetime):
     """Test export from DataFrame exchange protocol when internal PyArrow table is equaly chunked."""
+    exclude_dtypes = ["category"]
+    if exclude_datetime:
+        exclude_dtypes += ["datetime"]
     # Modin DataFrame constructor can't process PyArrow's category when using `from_arrow`, so exclude it
-    data = get_data_of_all_types(has_nulls=data_has_nulls, exclude_dtypes=["category"])
+    data = get_data_of_all_types(
+        has_nulls=data_has_nulls, exclude_dtypes=exclude_dtypes
+    )
     pd_df = pandas.DataFrame(data)
     pd_chunks = split_df_into_chunks(pd_df, n_chunks)
 
@@ -86,11 +112,20 @@ def test_export_aligned_at_chunks(n_chunks, data_has_nulls):
     df_equals(md_df, exported_df)
 
 
-@pytest.mark.xfail(
-    reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+@pytest.mark.parametrize(
+    "exclude_datetime",
+    [
+        True,
+        pytest.param(
+            False,
+            marks=pytest.mark.xfail(
+                reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+            ),
+        ),
+    ],
 )
 @pytest.mark.parametrize("data_has_nulls", [True, False])
-def test_export_unaligned_at_chunks(data_has_nulls):
+def test_export_unaligned_at_chunks(data_has_nulls, exclude_datetime):
     """
     Test export from DataFrame exchange protocol when internal PyArrow table's chunks are unaligned.
 
@@ -98,8 +133,13 @@ def test_export_unaligned_at_chunks(data_has_nulls):
     each column has its individual chunking and so some preprocessing is required in order
     to emulate equaly chunked columns in the protocol.
     """
+    exclude_dtypes = ["category"]
+    if exclude_datetime:
+        exclude_dtypes += ["datetime"]
     # Modin DataFrame constructor can't process PyArrow's category when using `from_arrow`, so exclude it
-    data = get_data_of_all_types(has_nulls=data_has_nulls, exclude_dtypes=["category"])
+    data = get_data_of_all_types(
+        has_nulls=data_has_nulls, exclude_dtypes=exclude_dtypes
+    )
     pd_df = pandas.DataFrame(data)
     # divide columns in 3 groups: unchunked, 2-chunked, 7-chunked
     chunk_groups = [1, 2, 7]
@@ -148,18 +188,32 @@ def test_export_unaligned_at_chunks(data_has_nulls):
     df_equals(md_df, exported_df)
 
 
-@pytest.mark.xfail(
-    reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+@pytest.mark.parametrize(
+    "exclude_datetime",
+    [
+        True,
+        pytest.param(
+            False,
+            marks=pytest.mark.xfail(
+                reason="conversion from 'pyarrow' ends up with the wrong datetime64 resolution"
+            ),
+        ),
+    ],
 )
 @pytest.mark.parametrize("data_has_nulls", [True, False])
-def test_export_indivisible_chunking(data_has_nulls):
+def test_export_indivisible_chunking(data_has_nulls, exclude_datetime):
     """
     Test ``.get_chunks(n_chunks)`` when internal PyArrow table's is 'indivisibly chunked'.
 
     The setup for the test is a PyArrow table having one of the chunk consisting of a single row,
     meaning that the chunk can't be subdivide.
     """
-    data = get_data_of_all_types(has_nulls=data_has_nulls, exclude_dtypes=["category"])
+    exclude_dtypes = ["category"]
+    if exclude_datetime:
+        exclude_dtypes += ["datetime"]
+    data = get_data_of_all_types(
+        has_nulls=data_has_nulls, exclude_dtypes=exclude_dtypes
+    )
     pd_df = pandas.DataFrame(data)
     pd_chunks = (pd_df.iloc[:1], pd_df.iloc[1:])
 

From 7b8d20797880d61f75dd76f21b7011784bc0a0a8 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoliimyachev@mail.com>
Date: Tue, 6 Jun 2023 16:43:33 +0200
Subject: [PATCH 175/176] Update modin/pandas/series.py

Co-authored-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
---
 modin/pandas/series.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index cfc005d0073..a27b0ef03ae 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -1752,6 +1752,7 @@ def swaplevel(self, i=-2, j=-1, copy=None):  # noqa: PR01, RT01, D200
         """
         Swap levels `i` and `j` in a `MultiIndex`.
         """
+        copy = True if copy is None else copy
         obj = self.copy() if copy else self
         return super(Series, obj).swaplevel(i, j, axis=0)
 

From c1e7cb8f272357a5418330166a0e86e7448d6da7 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Tue, 6 Jun 2023 17:02:18 +0200
Subject: [PATCH 176/176] fix

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
---
 modin/test/interchange/dataframe_protocol/hdk/test_protocol.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py b/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py
index 483fdc91fcf..ecdad425af5 100644
--- a/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py
+++ b/modin/test/interchange/dataframe_protocol/hdk/test_protocol.py
@@ -51,7 +51,7 @@ def test_simple_export(data_has_nulls, from_hdk, n_chunks, exclude_datetime):
         # issue for bool: https://github.com/modin-project/modin/issues/4299
         exclude_dtypes = ["bool", "uint64"]
     else:
-        exclude_dtypes = None
+        exclude_dtypes = []
 
     if exclude_datetime:
         exclude_dtypes += ["datetime"]