diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 842629ba331d6..08c41a1eeb21f 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -107,7 +107,7 @@ jobs: services: mysql: - image: mysql:8 + image: mysql:9 env: MYSQL_ALLOW_EMPTY_PASSWORD: yes MYSQL_DATABASE: pandas @@ -120,7 +120,7 @@ jobs: - 3306:3306 postgres: - image: postgres:16 + image: postgres:17 env: PGUSER: postgres POSTGRES_USER: postgres @@ -135,7 +135,7 @@ jobs: - 5432:5432 moto: - image: motoserver/moto:5.0.0 + image: motoserver/moto:5.0.27 env: AWS_ACCESS_KEY_ID: foobar_key AWS_SECRET_ACCESS_KEY: foobar_secret @@ -242,15 +242,14 @@ jobs: - name: Build environment and Run Tests # https://github.com/numpy/numpy/issues/24703#issuecomment-1722379388 run: | - /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev + /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev . ~/virtualenvs/pandas-dev/bin/activate python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1 python -m pip install numpy -Csetup-args="-Dallow-noblas=true" python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror" python -m pip list --no-cache-dir - export PANDAS_CI=1 - python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml + PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit @@ -259,7 +258,7 @@ jobs: Linux-Musl: runs-on: ubuntu-22.04 container: - image: quay.io/pypa/musllinux_1_1_x86_64 + image: quay.io/pypa/musllinux_1_2_x86_64 steps: - name: Checkout pandas Repo # actions/checkout does not work since it requires node @@ -281,7 +280,7 @@ jobs: apk add musl-locales - name: Build environment run: | - /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev + /opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev . ~/virtualenvs/pandas-dev/bin/activate python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0 @@ -291,8 +290,7 @@ jobs: - name: Run Tests run: | . ~/virtualenvs/pandas-dev/bin/activate - export PANDAS_CI=1 - python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml + PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl @@ -357,8 +355,7 @@ jobs: python --version python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1 python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy - python -m pip install versioneer[toml] - python -m pip install python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov + python -m pip install versioneer[toml] python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror" python -m pip list @@ -375,7 +372,7 @@ jobs: concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-python-freethreading-dev + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-python-freethreading-dev cancel-in-progress: true env: @@ -396,14 +393,11 @@ jobs: nogil: true - name: Build Environment - # TODO: Once numpy 2.2.1 is out, don't install nightly version - # Tests segfault with numpy 2.2.0: https://github.com/numpy/numpy/pull/27955 run: | python --version - python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1 - python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython numpy - python -m pip install versioneer[toml] - python -m pip install python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov + python -m pip install --upgrade pip setuptools wheel numpy meson[ninja]==1.2.1 meson-python==0.13.1 + python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython + python -m pip install versioneer[toml] python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror" python -m pip list diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1dd8dfc54111e..77bcadf57dd2d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ ci: skip: [pyright, mypy] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.6 + rev: v0.9.4 hooks: - id: ruff args: [--exit-non-zero-on-fix] @@ -41,7 +41,7 @@ repos: pass_filenames: true require_serial: false - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.1 hooks: - id: codespell types_or: [python, rst, markdown, cython, c] @@ -70,7 +70,7 @@ repos: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] - repo: https://github.com/PyCQA/isort - rev: 5.13.2 + rev: 6.0.0 hooks: - id: isort - repo: https://github.com/asottile/pyupgrade @@ -95,14 +95,14 @@ repos: - id: sphinx-lint args: ["--enable", "all", "--disable", "line-too-long"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v19.1.6 + rev: v19.1.7 hooks: - id: clang-format files: ^pandas/_libs/src|^pandas/_libs/include args: [-i] types_or: [c, c++] - repo: https://github.com/trim21/pre-commit-mirror-meson - rev: v1.6.1 + rev: v1.7.0 hooks: - id: meson-fmt args: ['--inplace'] diff --git a/asv_bench/benchmarks/io/style.py b/asv_bench/benchmarks/io/style.py index 24fd8a0d20aba..0486cabb29845 100644 --- a/asv_bench/benchmarks/io/style.py +++ b/asv_bench/benchmarks/io/style.py @@ -13,8 +13,8 @@ class Render: def setup(self, cols, rows): self.df = DataFrame( np.random.randn(rows, cols), - columns=[f"float_{i+1}" for i in range(cols)], - index=[f"row_{i+1}" for i in range(rows)], + columns=[f"float_{i + 1}" for i in range(cols)], + index=[f"row_{i + 1}" for i in range(rows)], ) def time_apply_render(self, cols, rows): diff --git a/doc/make.py b/doc/make.py index 02deb5002fea1..9542563dc037b 100755 --- a/doc/make.py +++ b/doc/make.py @@ -260,8 +260,7 @@ def latex(self, force=False): for i in range(3): self._run_os("pdflatex", "-interaction=nonstopmode", "pandas.tex") raise SystemExit( - "You should check the file " - '"build/latex/pandas.pdf" for problems.' + 'You should check the file "build/latex/pandas.pdf" for problems.' ) self._run_os("make") return ret_code @@ -343,8 +342,7 @@ def main(): dest="verbosity", default=0, help=( - "increase verbosity (can be repeated), " - "passed to the sphinx build command" + "increase verbosity (can be repeated), passed to the sphinx build command" ), ) argparser.add_argument( diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index abb7181fc8d72..9cda1486eb48b 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -1288,7 +1288,7 @@ "outputs": [], "source": [ "df2.loc[:4].style.highlight_max(\n", - " axis=1, props=(\"color:white; \" \"font-weight:bold; \" \"background-color:darkblue;\")\n", + " axis=1, props=(\"color:white; font-weight:bold; background-color:darkblue;\")\n", ")" ] }, diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 64f4a66a109f5..95b5f7eea5eeb 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -59,9 +59,9 @@ Other enhancements - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`) +- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) -- :meth:`.DataFrameGroupBy.mean`, :meth:`.DataFrameGroupBy.sum`, :meth:`.SeriesGroupBy.mean` and :meth:`.SeriesGroupBy.sum` now accept ``skipna`` parameter (:issue:`15675`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) @@ -766,6 +766,7 @@ Reshaping - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`) - Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`) - Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`) +- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) Sparse diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 35139979f92fe..0d06e6fa8e96c 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -321,6 +321,11 @@ def reset_option(pat: str) -> None: """ Reset one or more options to their default value. + This method resets the specified pandas option(s) back to their default + values. It allows partial string matching for convenience, but users should + exercise caution to avoid unintended resets due to changes in option names + in future versions. + Parameters ---------- pat : str/regex diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi index e3909203d1f5a..163fc23535022 100644 --- a/pandas/_libs/groupby.pyi +++ b/pandas/_libs/groupby.pyi @@ -13,6 +13,7 @@ def group_median_float64( mask: np.ndarray | None = ..., result_mask: np.ndarray | None = ..., is_datetimelike: bool = ..., # bint + skipna: bool = ..., ) -> None: ... def group_cumprod( out: np.ndarray, # float64_t[:, ::1] @@ -76,6 +77,7 @@ def group_prod( mask: np.ndarray | None, result_mask: np.ndarray | None = ..., min_count: int = ..., + skipna: bool = ..., ) -> None: ... def group_var( out: np.ndarray, # floating[:, ::1] @@ -88,6 +90,7 @@ def group_var( result_mask: np.ndarray | None = ..., is_datetimelike: bool = ..., name: str = ..., + skipna: bool = ..., ) -> None: ... def group_skew( out: np.ndarray, # float64_t[:, ::1] @@ -183,6 +186,7 @@ def group_max( is_datetimelike: bool = ..., mask: np.ndarray | None = ..., result_mask: np.ndarray | None = ..., + skipna: bool = ..., ) -> None: ... def group_min( out: np.ndarray, # groupby_t[:, ::1] @@ -193,6 +197,7 @@ def group_min( is_datetimelike: bool = ..., mask: np.ndarray | None = ..., result_mask: np.ndarray | None = ..., + skipna: bool = ..., ) -> None: ... def group_idxmin_idxmax( out: npt.NDArray[np.intp], diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 70af22f514ce0..16a104a46ed3d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -62,7 +62,12 @@ cdef enum InterpolationEnumType: INTERPOLATION_MIDPOINT -cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) noexcept nogil: +cdef float64_t median_linear_mask( + float64_t* a, + int n, + uint8_t* mask, + bint skipna=True +) noexcept nogil: cdef: int i, j, na_count = 0 float64_t* tmp @@ -77,7 +82,7 @@ cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) noexcept n na_count += 1 if na_count: - if na_count == n: + if na_count == n or not skipna: return NaN tmp = malloc((n - na_count) * sizeof(float64_t)) @@ -104,7 +109,8 @@ cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) noexcept n cdef float64_t median_linear( float64_t* a, int n, - bint is_datetimelike=False + bint is_datetimelike=False, + bint skipna=True, ) noexcept nogil: cdef: int i, j, na_count = 0 @@ -125,7 +131,7 @@ cdef float64_t median_linear( na_count += 1 if na_count: - if na_count == n: + if na_count == n or not skipna: return NaN tmp = malloc((n - na_count) * sizeof(float64_t)) @@ -186,6 +192,7 @@ def group_median_float64( const uint8_t[:, :] mask=None, uint8_t[:, ::1] result_mask=None, bint is_datetimelike=False, + bint skipna=True, ) -> None: """ Only aggregates on axis=0 @@ -229,7 +236,7 @@ def group_median_float64( for j in range(ngroups): size = _counts[j + 1] - result = median_linear_mask(ptr, size, ptr_mask) + result = median_linear_mask(ptr, size, ptr_mask, skipna) out[j, i] = result if result != result: @@ -244,7 +251,7 @@ def group_median_float64( ptr += _counts[0] for j in range(ngroups): size = _counts[j + 1] - out[j, i] = median_linear(ptr, size, is_datetimelike) + out[j, i] = median_linear(ptr, size, is_datetimelike, skipna) ptr += size @@ -804,17 +811,18 @@ def group_prod( const uint8_t[:, ::1] mask, uint8_t[:, ::1] result_mask=None, Py_ssize_t min_count=0, + bint skipna=True, ) -> None: """ Only aggregates on axis=0 """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - int64float_t val + int64float_t val, nan_val int64float_t[:, ::1] prodx int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) - bint isna_entry, uses_mask = mask is not None + bint isna_entry, isna_result, uses_mask = mask is not None if len_values != len_labels: raise ValueError("len(index) != len(labels)") @@ -823,6 +831,7 @@ def group_prod( prodx = np.ones((out).shape, dtype=(out).base.dtype) N, K = (values).shape + nan_val = _get_na_val(0, False) with nogil: for i in range(N): @@ -836,12 +845,23 @@ def group_prod( if uses_mask: isna_entry = mask[i, j] + isna_result = result_mask[lab, j] else: isna_entry = _treat_as_na(val, False) + isna_result = _treat_as_na(prodx[lab, j], False) + + if not skipna and isna_result: + # If prod is already NA, no need to update it + continue if not isna_entry: nobs[lab, j] += 1 prodx[lab, j] *= val + elif not skipna: + if uses_mask: + result_mask[lab, j] = True + else: + prodx[lab, j] = nan_val _check_below_mincount( out, uses_mask, result_mask, ncounts, K, nobs, min_count, prodx @@ -862,6 +882,7 @@ def group_var( uint8_t[:, ::1] result_mask=None, bint is_datetimelike=False, str name="var", + bint skipna=True, ) -> None: cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) @@ -869,7 +890,7 @@ def group_var( floating[:, ::1] mean int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) - bint isna_entry, uses_mask = mask is not None + bint isna_entry, isna_result, uses_mask = mask is not None bint is_std = name == "std" bint is_sem = name == "sem" @@ -898,19 +919,34 @@ def group_var( if uses_mask: isna_entry = mask[i, j] + isna_result = result_mask[lab, j] elif is_datetimelike: # With group_var, we cannot just use _treat_as_na bc # datetimelike dtypes get cast to float64 instead of # to int64. isna_entry = val == NPY_NAT + isna_result = out[lab, j] == NPY_NAT else: isna_entry = _treat_as_na(val, is_datetimelike) + isna_result = _treat_as_na(out[lab, j], is_datetimelike) + + if not skipna and isna_result: + # If aggregate is already NA, don't add to it. This is important for + # datetimelike because adding a value to NPY_NAT may not result + # in a NPY_NAT + continue if not isna_entry: nobs[lab, j] += 1 oldmean = mean[lab, j] mean[lab, j] += (val - oldmean) / nobs[lab, j] out[lab, j] += (val - mean[lab, j]) * (val - oldmean) + elif not skipna: + nobs[lab, j] = 0 + if uses_mask: + result_mask[lab, j] = True + else: + out[lab, j] = NAN for i in range(ncounts): for j in range(K): @@ -1164,7 +1200,7 @@ def group_mean( mean_t[:, ::1] sumx, compensation int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) - bint isna_entry, uses_mask = mask is not None + bint isna_entry, isna_result, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -1194,25 +1230,24 @@ def group_mean( for j in range(K): val = values[i, j] - if not skipna and ( - (uses_mask and result_mask[lab, j]) or - (is_datetimelike and sumx[lab, j] == NPY_NAT) or - _treat_as_na(sumx[lab, j], False) - ): - # If sum is already NA, don't add to it. This is important for - # datetimelike because adding a value to NPY_NAT may not result - # in NPY_NAT - continue - if uses_mask: isna_entry = mask[i, j] + isna_result = result_mask[lab, j] elif is_datetimelike: # With group_mean, we cannot just use _treat_as_na bc # datetimelike dtypes get cast to float64 instead of # to int64. isna_entry = val == NPY_NAT + isna_result = sumx[lab, j] == NPY_NAT else: isna_entry = _treat_as_na(val, is_datetimelike) + isna_result = _treat_as_na(sumx[lab, j], is_datetimelike) + + if not skipna and isna_result: + # If sum is already NA, don't add to it. This is important for + # datetimelike because adding a value to NPY_NAT may not result + # in NPY_NAT + continue if not isna_entry: nobs[lab, j] += 1 @@ -1806,6 +1841,7 @@ cdef group_min_max( bint compute_max=True, const uint8_t[:, ::1] mask=None, uint8_t[:, ::1] result_mask=None, + bint skipna=True, ): """ Compute minimum/maximum of columns of `values`, in row groups `labels`. @@ -1833,6 +1869,8 @@ cdef group_min_max( result_mask : ndarray[bool, ndim=2], optional If not None, these specify locations in the output that are NA. Modified in-place. + skipna : bool, default True + If True, ignore nans in `values`. Notes ----- @@ -1841,17 +1879,18 @@ cdef group_min_max( """ cdef: Py_ssize_t i, j, N, K, lab, ngroups = len(counts) - numeric_t val + numeric_t val, nan_val numeric_t[:, ::1] group_min_or_max int64_t[:, ::1] nobs bint uses_mask = mask is not None - bint isna_entry + bint isna_entry, isna_result if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") min_count = max(min_count, 1) nobs = np.zeros((out).shape, dtype=np.int64) + nan_val = _get_na_val(0, is_datetimelike) group_min_or_max = np.empty_like(out) group_min_or_max[:] = _get_min_or_max(0, compute_max, is_datetimelike) @@ -1870,8 +1909,15 @@ cdef group_min_max( if uses_mask: isna_entry = mask[i, j] + isna_result = result_mask[lab, j] else: isna_entry = _treat_as_na(val, is_datetimelike) + isna_result = _treat_as_na(group_min_or_max[lab, j], + is_datetimelike) + + if not skipna and isna_result: + # If current min/max is already NA, it will always be NA + continue if not isna_entry: nobs[lab, j] += 1 @@ -1881,6 +1927,11 @@ cdef group_min_max( else: if val < group_min_or_max[lab, j]: group_min_or_max[lab, j] = val + elif not skipna: + if uses_mask: + result_mask[lab, j] = True + else: + group_min_or_max[lab, j] = nan_val _check_below_mincount( out, uses_mask, result_mask, ngroups, K, nobs, min_count, group_min_or_max @@ -2012,6 +2063,7 @@ def group_max( bint is_datetimelike=False, const uint8_t[:, ::1] mask=None, uint8_t[:, ::1] result_mask=None, + bint skipna=True, ) -> None: """See group_min_max.__doc__""" group_min_max( @@ -2024,6 +2076,7 @@ def group_max( compute_max=True, mask=mask, result_mask=result_mask, + skipna=skipna, ) @@ -2038,6 +2091,7 @@ def group_min( bint is_datetimelike=False, const uint8_t[:, ::1] mask=None, uint8_t[:, ::1] result_mask=None, + bint skipna=True, ) -> None: """See group_min_max.__doc__""" group_min_max( @@ -2050,6 +2104,7 @@ def group_min( compute_max=False, mask=mask, result_mask=result_mask, + skipna=skipna, ) diff --git a/pandas/core/_numba/kernels/min_max_.py b/pandas/core/_numba/kernels/min_max_.py index 59d36732ebae6..d56453e4e5abf 100644 --- a/pandas/core/_numba/kernels/min_max_.py +++ b/pandas/core/_numba/kernels/min_max_.py @@ -88,6 +88,7 @@ def grouped_min_max( ngroups: int, min_periods: int, is_max: bool, + skipna: bool = True, ) -> tuple[np.ndarray, list[int]]: N = len(labels) nobs = np.zeros(ngroups, dtype=np.int64) @@ -97,13 +98,16 @@ def grouped_min_max( for i in range(N): lab = labels[i] val = values[i] - if lab < 0: + if lab < 0 or (nobs[lab] >= 1 and np.isnan(output[lab])): continue if values.dtype.kind == "i" or not np.isnan(val): nobs[lab] += 1 else: - # NaN value cannot be a min/max value + if not skipna: + # If skipna is False and we encounter a NaN, + # both min and max of the group will be NaN + output[lab] = np.nan continue if nobs[lab] == 1: diff --git a/pandas/core/_numba/kernels/var_.py b/pandas/core/_numba/kernels/var_.py index 69aec4d6522c4..5d720c877815d 100644 --- a/pandas/core/_numba/kernels/var_.py +++ b/pandas/core/_numba/kernels/var_.py @@ -176,6 +176,7 @@ def grouped_var( ngroups: int, min_periods: int, ddof: int = 1, + skipna: bool = True, ) -> tuple[np.ndarray, list[int]]: N = len(labels) @@ -190,7 +191,11 @@ def grouped_var( lab = labels[i] val = values[i] - if lab < 0: + if lab < 0 or np.isnan(output[lab]): + continue + + if not skipna and np.isnan(val): + output[lab] = np.nan continue mean_x = means[lab] diff --git a/pandas/core/apply.py b/pandas/core/apply.py index af513d49bcfe0..f36fc82fb1a11 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1645,8 +1645,7 @@ def reconstruct_func( # GH 28426 will raise error if duplicated function names are used and # there is no reassigned name raise SpecificationError( - "Function names must be unique if there is no new column names " - "assigned" + "Function names must be unique if there is no new column names assigned" ) if func is None: # nicer error message diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e831883998098..33745438e2aea 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1791,9 +1791,11 @@ def take(self, indices, allow_fill=False, fill_value=None): # type for the array, to the physical storage type for # the data, before passing to take. - result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) + result = take( + data, indices, fill_value=fill_value, allow_fill=allow_fill + ) return self._from_sequence(result, dtype=self.dtype) - """ # noqa: E501 + """ # Implementer note: The `fill_value` parameter should be a user-facing # value, an instance of self.dtype.type. When passed `fill_value=None`, # the default of `self.dtype.na_value` should be used. diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 43cc492f82885..df40c9c11b117 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2707,8 +2707,7 @@ def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | N pass elif not timezones.tz_compare(tz, inferred_tz): raise TypeError( - f"data is already tz-aware {inferred_tz}, unable to " - f"set specified tz: {tz}" + f"data is already tz-aware {inferred_tz}, unable to set specified tz: {tz}" ) return tz diff --git a/pandas/core/base.py b/pandas/core/base.py index 61a7c079d87f8..a64cd8633c1db 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -506,6 +506,11 @@ def array(self) -> ExtensionArray: """ The ExtensionArray of the data backing this Series or Index. + This property provides direct access to the underlying array data of a + Series or Index without requiring conversion to a NumPy array. It + returns an ExtensionArray, which is the native storage format for + pandas extension dtypes. + Returns ------- ExtensionArray diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 9d844e590582a..f8e3200ef2ba0 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -204,7 +204,7 @@ def eval( By default, with the numexpr engine, the following operations are supported: - - Arthimetic operations: ``+``, ``-``, ``*``, ``/``, ``**``, ``%`` + - Arithmetic operations: ``+``, ``-``, ``*``, ``/``, ``**``, ``%`` - Boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not) - Comparison operators: ``<``, ``<=``, ``==``, ``!=``, ``>=``, ``>`` diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 010fad1bbf0b6..14a393b02409c 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -698,7 +698,7 @@ def visit_Call(self, node, side=None, **kwargs): if not isinstance(key, ast.keyword): # error: "expr" has no attribute "id" raise ValueError( - "keyword error in function call " f"'{node.func.id}'" # type: ignore[attr-defined] + f"keyword error in function call '{node.func.id}'" # type: ignore[attr-defined] ) if key.arg: diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 9b26de42e119b..f06ded6d9f98e 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -512,8 +512,7 @@ def __init__(self, op: Literal["+", "-", "~", "not"], operand) -> None: self.func = _unary_ops_dict[op] except KeyError as err: raise ValueError( - f"Invalid unary operator {op!r}, " - f"valid operators are {UNARY_OPS_SYMS}" + f"Invalid unary operator {op!r}, valid operators are {UNARY_OPS_SYMS}" ) from err def __call__(self, env) -> MathCall: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 02b9291da9b31..94531c2ac87e8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1651,7 +1651,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n # (test_constructor_coercion_signed_to_unsigned) so safe to ignore. warnings.filterwarnings( "ignore", - "NumPy will stop allowing conversion of " "out-of-bound Python int", + "NumPy will stop allowing conversion of out-of-bound Python int", DeprecationWarning, ) casted = np.asarray(arr, dtype=dtype) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1eb1a630056a2..d8dd6441913b5 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -605,8 +605,7 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype: return self elif not self.is_dtype(dtype): raise ValueError( - f"a CategoricalDtype must be passed to perform an update, " - f"got {dtype!r}" + f"a CategoricalDtype must be passed to perform an update, got {dtype!r}" ) else: # from here on, dtype is a CategoricalDtype diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 04df6e65f4a59..b715e526e0f33 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -13677,6 +13677,10 @@ def isin_(x): doc=""" The column labels of the DataFrame. + This property holds the column names as a pandas ``Index`` object. + It provides an immutable sequence of column labels that can be + used for data selection, renaming, and alignment in DataFrame operations. + Returns ------- pandas.Index diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e0a4f9d9c546a..f376518d4d3b8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5537,8 +5537,7 @@ def filter( nkw = common.count_not_none(items, like, regex) if nkw > 1: raise TypeError( - "Keyword arguments `items`, `like`, or `regex` " - "are mutually exclusive" + "Keyword arguments `items`, `like`, or `regex` are mutually exclusive" ) if axis is None: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f9059e6e8896f..27865a60f6ea3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -570,6 +570,13 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: """ Dict {group name -> group indices}. + The dictionary keys represent the group labels (e.g., timestamps for a + time-based resampling operation), and the values are arrays of integer + positions indicating where the elements of each group are located in the + original data. This property is particularly useful when working with + resampled data, as it provides insight into how the original time-series data + has been grouped. + See Also -------- core.groupby.DataFrameGroupBy.indices : Provides a mapping of group rows to @@ -2163,8 +2170,7 @@ def mean( numeric_only no longer accepts ``None`` and defaults to ``False``. skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. + Exclude NA/null values. If an entire group is NA, the result will be NA. .. versionadded:: 3.0.0 @@ -2248,7 +2254,7 @@ def mean( return result.__finalize__(self.obj, method="groupby") @final - def median(self, numeric_only: bool = False) -> NDFrameT: + def median(self, numeric_only: bool = False, skipna: bool = True) -> NDFrameT: """ Compute median of groups, excluding missing values. @@ -2263,6 +2269,11 @@ def median(self, numeric_only: bool = False) -> NDFrameT: numeric_only no longer accepts ``None`` and defaults to False. + skipna : bool, default True + Exclude NA/null values. If an entire group is NA, the result will be NA. + + .. versionadded:: 3.0.0 + Returns ------- Series or DataFrame @@ -2335,8 +2346,11 @@ def median(self, numeric_only: bool = False) -> NDFrameT: """ result = self._cython_agg_general( "median", - alt=lambda x: Series(x, copy=False).median(numeric_only=numeric_only), + alt=lambda x: Series(x, copy=False).median( + numeric_only=numeric_only, skipna=skipna + ), numeric_only=numeric_only, + skipna=skipna, ) return result.__finalize__(self.obj, method="groupby") @@ -2349,6 +2363,7 @@ def std( engine: Literal["cython", "numba"] | None = None, engine_kwargs: dict[str, bool] | None = None, numeric_only: bool = False, + skipna: bool = True, ): """ Compute standard deviation of groups, excluding missing values. @@ -2387,6 +2402,11 @@ def std( numeric_only now defaults to ``False``. + skipna : bool, default True + Exclude NA/null values. If an entire group is NA, the result will be NA. + + .. versionadded:: 3.0.0 + Returns ------- Series or DataFrame @@ -2441,14 +2461,16 @@ def std( engine_kwargs, min_periods=0, ddof=ddof, + skipna=skipna, ) ) else: return self._cython_agg_general( "std", - alt=lambda x: Series(x, copy=False).std(ddof=ddof), + alt=lambda x: Series(x, copy=False).std(ddof=ddof, skipna=skipna), numeric_only=numeric_only, ddof=ddof, + skipna=skipna, ) @final @@ -2460,6 +2482,7 @@ def var( engine: Literal["cython", "numba"] | None = None, engine_kwargs: dict[str, bool] | None = None, numeric_only: bool = False, + skipna: bool = True, ): """ Compute variance of groups, excluding missing values. @@ -2497,6 +2520,11 @@ def var( numeric_only now defaults to ``False``. + skipna : bool, default True + Exclude NA/null values. If an entire group is NA, the result will be NA. + + .. versionadded:: 3.0.0 + Returns ------- Series or DataFrame @@ -2550,13 +2578,15 @@ def var( engine_kwargs, min_periods=0, ddof=ddof, + skipna=skipna, ) else: return self._cython_agg_general( "var", - alt=lambda x: Series(x, copy=False).var(ddof=ddof), + alt=lambda x: Series(x, copy=False).var(ddof=ddof, skipna=skipna), numeric_only=numeric_only, ddof=ddof, + skipna=skipna, ) @final @@ -2598,8 +2628,7 @@ def _value_counts( doesnt_exist = subsetted - unique_cols if doesnt_exist: raise ValueError( - f"Keys {doesnt_exist} in subset do not " - f"exist in the DataFrame." + f"Keys {doesnt_exist} in subset do not exist in the DataFrame." ) else: subsetted = unique_cols @@ -2686,7 +2715,9 @@ def _value_counts( return result.__finalize__(self.obj, method="value_counts") @final - def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT: + def sem( + self, ddof: int = 1, numeric_only: bool = False, skipna: bool = True + ) -> NDFrameT: """ Compute standard error of the mean of groups, excluding missing values. @@ -2706,6 +2737,11 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT: numeric_only now defaults to ``False``. + skipna : bool, default True + Exclude NA/null values. If an entire group is NA, the result will be NA. + + .. versionadded:: 3.0.0 + Returns ------- Series or DataFrame @@ -2780,9 +2816,10 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT: ) return self._cython_agg_general( "sem", - alt=lambda x: Series(x, copy=False).sem(ddof=ddof), + alt=lambda x: Series(x, copy=False).sem(ddof=ddof, skipna=skipna), numeric_only=numeric_only, ddof=ddof, + skipna=skipna, ) @final @@ -2959,7 +2996,9 @@ def sum( return result @final - def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT: + def prod( + self, numeric_only: bool = False, min_count: int = 0, skipna: bool = True + ) -> NDFrameT: """ Compute prod of group values. @@ -2976,6 +3015,11 @@ def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT: The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA. + skipna : bool, default True + Exclude NA/null values. If an entire group is NA, the result will be NA. + + .. versionadded:: 3.0.0 + Returns ------- Series or DataFrame @@ -3024,17 +3068,22 @@ def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT: 2 30 72 """ return self._agg_general( - numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod + numeric_only=numeric_only, + min_count=min_count, + skipna=skipna, + alias="prod", + npfunc=np.prod, ) @final @doc( - _groupby_agg_method_engine_template, + _groupby_agg_method_skipna_engine_template, fname="min", no=False, mc=-1, e=None, ek=None, + s=True, example=dedent( """\ For SeriesGroupBy: @@ -3074,6 +3123,7 @@ def min( self, numeric_only: bool = False, min_count: int = -1, + skipna: bool = True, engine: Literal["cython", "numba"] | None = None, engine_kwargs: dict[str, bool] | None = None, ): @@ -3086,23 +3136,26 @@ def min( engine_kwargs, min_periods=min_count, is_max=False, + skipna=skipna, ) else: return self._agg_general( numeric_only=numeric_only, min_count=min_count, + skipna=skipna, alias="min", npfunc=np.min, ) @final @doc( - _groupby_agg_method_engine_template, + _groupby_agg_method_skipna_engine_template, fname="max", no=False, mc=-1, e=None, ek=None, + s=True, example=dedent( """\ For SeriesGroupBy: @@ -3142,6 +3195,7 @@ def max( self, numeric_only: bool = False, min_count: int = -1, + skipna: bool = True, engine: Literal["cython", "numba"] | None = None, engine_kwargs: dict[str, bool] | None = None, ): @@ -3154,11 +3208,13 @@ def max( engine_kwargs, min_periods=min_count, is_max=True, + skipna=skipna, ) else: return self._agg_general( numeric_only=numeric_only, min_count=min_count, + skipna=skipna, alias="max", npfunc=np.max, ) @@ -3180,8 +3236,7 @@ def first( The required number of valid values to perform the operation. If fewer than ``min_count`` valid values are present the result will be NA. skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. + Exclude NA/null values. If an entire group is NA, the result will be NA. .. versionadded:: 2.2.1 @@ -3267,8 +3322,7 @@ def last( The required number of valid values to perform the operation. If fewer than ``min_count`` valid values are present the result will be NA. skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. + Exclude NA/null values. If an entire group is NA, the result will be NA. .. versionadded:: 2.2.1 @@ -3654,14 +3708,21 @@ def rolling( an integer index is not used to calculate the rolling window. closed : str, default None - If ``'right'``, the first point in the window is excluded from calculations. + Determines the inclusivity of points in the window + If ``'right'``, (First, Last] the last point in the window + is included in the calculations. + + If ``'left'``, [First, Last) the first point in the window + is included in the calculations. - If ``'left'``, the last point in the window is excluded from calculations. + If ``'both'``, [First, Last] all points in the window + are included in the calculations. - If ``'both'``, no points in the window are excluded from calculations. + If ``'neither'``, (First, Last) the first and last points + in the window are excludedfrom calculations. - If ``'neither'``, the first and last points in the window are excluded - from calculations. + () and [] are referencing open and closed set + notation respetively. Default ``None`` (``'right'``). @@ -5461,8 +5522,7 @@ def _idxmax_idxmin( numeric_only : bool, default False Include only float, int, boolean columns. skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. + Exclude NA/null values. If an entire group is NA, the result will be NA. ignore_unobserved : bool, default False When True and an unobserved group is encountered, do not raise. This used for transform where unobserved groups do not play an impact on the result. diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 5f9ebdcea4a2d..c9d874fc08dbe 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -516,8 +516,7 @@ def __init__( ): grper = pprint_thing(grouping_vector) errmsg = ( - "Grouper result violates len(labels) == " - f"len(data)\nresult: {grper}" + f"Grouper result violates len(labels) == len(data)\nresult: {grper}" ) raise AssertionError(errmsg) diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 0064aa91056e8..88379164534f2 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -478,9 +478,9 @@ def get_window_bounds( ) start = start.astype(np.int64) end = end.astype(np.int64) - assert len(start) == len( - end - ), "these should be equal in length from get_window_bounds" + assert len(start) == len(end), ( + "these should be equal in length from get_window_bounds" + ) # Cannot use groupby_indices as they might not be monotonic with the object # we're rolling over window_indices = np.arange( diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 656ee54cbc5d4..8a493fef54d3b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -975,8 +975,7 @@ def _validate_tuple_indexer(self, key: tuple) -> tuple: self._validate_key(k, i) except ValueError as err: raise ValueError( - "Location based indexing can only have " - f"[{self._valid_types}] types" + f"Location based indexing can only have [{self._valid_types}] types" ) from err return key @@ -1589,8 +1588,7 @@ def _validate_key(self, key, axis: AxisInt) -> None: "is not available" ) raise ValueError( - "iLocation based boolean indexing cannot use " - "an indexable as a mask" + "iLocation based boolean indexing cannot use an indexable as a mask" ) return @@ -1994,8 +1992,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): return self._setitem_with_indexer((pi, info_axis[0]), value[0]) raise ValueError( - "Must have equal len keys and value " - "when setting with an iterable" + "Must have equal len keys and value when setting with an iterable" ) elif lplane_indexer == 0 and len(value) == len(self.obj.index): @@ -2023,8 +2020,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): else: raise ValueError( - "Must have equal len keys and value " - "when setting with an iterable" + "Must have equal len keys and value when setting with an iterable" ) else: diff --git a/pandas/core/interchange/buffer.py b/pandas/core/interchange/buffer.py index 62bf396256f2a..8953360a91c8e 100644 --- a/pandas/core/interchange/buffer.py +++ b/pandas/core/interchange/buffer.py @@ -31,8 +31,7 @@ def __init__(self, x: np.ndarray, allow_copy: bool = True) -> None: x = x.copy() else: raise RuntimeError( - "Exports cannot be zero-copy in the case " - "of a non-contiguous buffer" + "Exports cannot be zero-copy in the case of a non-contiguous buffer" ) # Store the numpy array in which the data resides as a private diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f44ad926dda5c..d1a9081b234de 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2264,8 +2264,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int) -> None: if values.ndim > ndim: # Check for both np.ndarray and ExtensionArray raise ValueError( - "Wrong number of dimensions. " - f"values.ndim > ndim [{values.ndim} > {ndim}]" + f"Wrong number of dimensions. values.ndim > ndim [{values.ndim} > {ndim}]" ) if not is_1d_only_ea_dtype(values.dtype): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index dfff34656f82b..69da2be0306f6 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -907,8 +907,7 @@ def _validate_or_indexify_columns( if not is_mi_list and len(columns) != len(content): # pragma: no cover # caller's responsibility to check for this... raise AssertionError( - f"{len(columns)} columns passed, passed data had " - f"{len(content)} columns" + f"{len(columns)} columns passed, passed data had {len(content)} columns" ) if is_mi_list: # check if nested list column, length of each sub-list should be equal diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 983a3df57e369..3a466b6fc7fc8 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -164,7 +164,7 @@ def _masked_arith_op(x: np.ndarray, y, op) -> np.ndarray: else: if not is_scalar(y): raise TypeError( - f"Cannot broadcast np.ndarray with operand of type { type(y) }" + f"Cannot broadcast np.ndarray with operand of type {type(y)}" ) # mask is only meaningful for x diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4b3b7a72b5a5c..1cfc75ea11725 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1269,8 +1269,53 @@ def last( ) @final - @doc(GroupBy.median) def median(self, numeric_only: bool = False): + """ + Compute median of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex + + Parameters + ---------- + numeric_only : bool, default False + Include only float, int, boolean columns. + + .. versionchanged:: 2.0.0 + + numeric_only no longer accepts ``None`` and defaults to False. + + Returns + ------- + Series or DataFrame + Median of values within each group. + + See Also + -------- + Series.groupby : Apply a function groupby to a Series. + DataFrame.groupby : Apply a function groupby to each row or column of a + DataFrame. + + Examples + -------- + + >>> ser = pd.Series( + ... [1, 2, 3, 3, 4, 5], + ... index=pd.DatetimeIndex( + ... [ + ... "2023-01-01", + ... "2023-01-10", + ... "2023-01-15", + ... "2023-02-01", + ... "2023-02-10", + ... "2023-02-15", + ... ] + ... ), + ... ) + >>> ser.resample("MS").median() + 2023-01-01 2.0 + 2023-02-01 4.0 + Freq: MS, dtype: float64 + """ return self._downsample("median", numeric_only=numeric_only) @final @@ -1450,12 +1495,61 @@ def var( return self._downsample("var", ddof=ddof, numeric_only=numeric_only) @final - @doc(GroupBy.sem) def sem( self, ddof: int = 1, numeric_only: bool = False, ): + """ + Compute standard error of the mean of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + .. versionchanged:: 2.0.0 + + numeric_only now defaults to ``False``. + + Returns + ------- + Series or DataFrame + Standard error of the mean of values within each group. + + See Also + -------- + DataFrame.sem : Return unbiased standard error of the mean over requested axis. + Series.sem : Return unbiased standard error of the mean over requested axis. + + Examples + -------- + + >>> ser = pd.Series( + ... [1, 3, 2, 4, 3, 8], + ... index=pd.DatetimeIndex( + ... [ + ... "2023-01-01", + ... "2023-01-10", + ... "2023-01-15", + ... "2023-02-01", + ... "2023-02-10", + ... "2023-02-15", + ... ] + ... ), + ... ) + >>> ser.resample("MS").sem() + 2023-01-01 0.577350 + 2023-02-01 1.527525 + Freq: MS, dtype: float64 + """ return self._downsample("sem", ddof=ddof, numeric_only=numeric_only) @final diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 33ff182f5baee..6a590ee5b227e 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -495,8 +495,7 @@ def from_dummies( if col_isna_mask.any(): raise ValueError( - "Dummy DataFrame contains NA value in column: " - f"'{col_isna_mask.idxmax()}'" + f"Dummy DataFrame contains NA value in column: '{col_isna_mask.idxmax()}'" ) # index data with a list of all columns that are dummies diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 5fddd9f9aca5b..ab056c8cc7e37 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1929,9 +1929,9 @@ def get_join_indexers( np.ndarray[np.intp] or None Indexer into the right_keys. """ - assert len(left_keys) == len( - right_keys - ), "left_keys and right_keys must be the same length" + assert len(left_keys) == len(right_keys), ( + "left_keys and right_keys must be the same length" + ) # fast-path for empty left/right left_n = len(left_keys[0]) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 9b7b768fe7adb..c60fe71a7ff28 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -929,6 +929,8 @@ def _reorder_for_extension_array_stack( def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame: if frame.columns.nunique() != len(frame.columns): raise ValueError("Columns with duplicate values are not supported in stack") + if not len(level): + return frame set_levels = set(level) stack_cols = frame.columns._drop_level_numbers( [k for k in range(frame.columns.nlevels - 1, -1, -1) if k not in set_levels] diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index b3f946f289891..034b861a83f43 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -73,7 +73,7 @@ def cut( Parameters ---------- - x : array-like + x : 1d ndarray or Series The input array to be binned. Must be 1-dimensional. bins : int, sequence of scalars, or IntervalIndex The criteria to bin by. @@ -126,7 +126,7 @@ def cut( Categorical for all other inputs. The values stored within are whatever the type in the sequence is. - * False : returns an ndarray of integers. + * False : returns a 1d ndarray or Series of integers. bins : numpy.ndarray or IntervalIndex. The computed or specified bins. Only returned when `retbins=True`. diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 30487de7bafd5..0a10001a3113f 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -192,9 +192,9 @@ def should_cache( else: check_count = 500 else: - assert ( - 0 <= check_count <= len(arg) - ), "check_count must be in next bounds: [0; len(arg)]" + assert 0 <= check_count <= len(arg), ( + "check_count must be in next bounds: [0; len(arg)]" + ) if check_count == 0: return False diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 631ab15464942..b954ce2584c13 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -929,14 +929,21 @@ class Window(BaseWindow): an integer index is not used to calculate the rolling window. closed : str, default None - If ``'right'``, the first point in the window is excluded from calculations. + Determines the inclusivity of points in the window + If ``'right'``, (First, Last] the last point in the window + is included in the calculations. - If ``'left'``, the last point in the window is excluded from calculations. + If ``'left'``, [First, Last) the first point in the window + is included in the calculations. - If ``'both'``, no point in the window is excluded from calculations. + If ``'both'``, [First, Last] all points in the window + are included in the calculations. - If ``'neither'``, the first and last points in the window are excluded - from calculations. + If ``'neither'``, (First, Last) the first and last points + in the window are excludedfrom calculations. + + () and [] are referencing open and closed set + notation respetively. Default ``None`` (``'right'``). diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 10a06aec72a57..ba4919c9298ed 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -270,7 +270,7 @@ def _process_style(self, style: dict[str, Any] | None) -> str | None: style_key = json.dumps(style) if style_key in self._style_dict: return self._style_dict[style_key] - name = f"pd{len(self._style_dict)+1}" + name = f"pd{len(self._style_dict) + 1}" self._style_dict[style_key] = name odf_style = Style(name=name, family="table-cell") if "font" in style: diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index a9936ba8c8f2c..b466e986450b1 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -336,8 +336,8 @@ def format_object_summary( if indent_for_name: name_len = len(name) - space1 = f'\n{(" " * (name_len + 1))}' - space2 = f'\n{(" " * (name_len + 2))}' + space1 = f"\n{(' ' * (name_len + 1))}" + space2 = f"\n{(' ' * (name_len + 2))}" else: space1 = "\n" space2 = "\n " # space for the opening '[' diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3f37556867954..b4c55da3eddd6 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2588,7 +2588,7 @@ def set_sticky( for i, level in enumerate(levels_): styles.append( { - "selector": f"thead tr:nth-child({level+1}) th", + "selector": f"thead tr:nth-child({level + 1}) th", "props": props + ( f"top:{i * pixel_size}px; height:{pixel_size}px; " @@ -2599,7 +2599,7 @@ def set_sticky( if not all(name is None for name in self.index.names): styles.append( { - "selector": f"thead tr:nth-child({obj.nlevels+1}) th", + "selector": f"thead tr:nth-child({obj.nlevels + 1}) th", "props": props + ( f"top:{(len(levels_)) * pixel_size}px; " @@ -2619,7 +2619,7 @@ def set_sticky( styles.extend( [ { - "selector": f"thead tr th:nth-child({level+1})", + "selector": f"thead tr th:nth-child({level + 1})", "props": props_ + "z-index:3 !important;", }, { @@ -4214,8 +4214,10 @@ def css_bar(start: float, end: float, color: str) -> str: if end > start: cell_css += "background: linear-gradient(90deg," if start > 0: - cell_css += f" transparent {start*100:.1f}%, {color} {start*100:.1f}%," - cell_css += f" {color} {end*100:.1f}%, transparent {end*100:.1f}%)" + cell_css += ( + f" transparent {start * 100:.1f}%, {color} {start * 100:.1f}%," + ) + cell_css += f" {color} {end * 100:.1f}%, transparent {end * 100:.1f}%)" return cell_css def css_calc(x, left: float, right: float, align: str, color: str | list | tuple): diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 2d1218b007d19..482ed316c7ce4 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -850,10 +850,7 @@ def _generate_body_row( data_element = _element( "td", - ( - f"{self.css['data']} {self.css['row']}{r} " - f"{self.css['col']}{c}{cls}" - ), + (f"{self.css['data']} {self.css['row']}{r} {self.css['col']}{c}{cls}"), value, data_element_visible, attributes="", @@ -973,7 +970,7 @@ def concatenated_visible_rows(obj): idx_len = d["index_lengths"].get((lvl, r), None) if idx_len is not None: # i.e. not a sparsified entry d["clines"][rn + idx_len].append( - f"\\cline{{{lvln+1}-{len(visible_index_levels)+data_len}}}" + f"\\cline{{{lvln + 1}-{len(visible_index_levels) + data_len}}}" # noqa: E501 ) def format( @@ -1557,7 +1554,7 @@ def relabel_index( >>> df = pd.DataFrame({"samples": np.random.rand(10)}) >>> styler = df.loc[np.random.randint(0, 10, 3)].style - >>> styler.relabel_index([f"sample{i+1} ({{}})" for i in range(3)]) + >>> styler.relabel_index([f"sample{i + 1} ({{}})" for i in range(3)]) ... # doctest: +SKIP samples sample1 (5) 0.315811 @@ -2520,7 +2517,7 @@ def color(value, user_arg, command, comm_arg): if value[0] == "#" and len(value) == 7: # color is hex code return command, f"[HTML]{{{value[1:].upper()}}}{arg}" if value[0] == "#" and len(value) == 4: # color is short hex code - val = f"{value[1].upper()*2}{value[2].upper()*2}{value[3].upper()*2}" + val = f"{value[1].upper() * 2}{value[2].upper() * 2}{value[3].upper() * 2}" return command, f"[HTML]{{{val}}}{arg}" elif value[:3] == "rgb": # color is rgb or rgba r = re.findall("(?<=\\()[0-9\\s%]+(?=,)", value)[0].strip() diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index 47f162e93216d..febf43b9a1018 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -260,7 +260,7 @@ def _other_namespaces(self) -> dict: nmsp_dict: dict[str, str] = {} if self.namespaces: nmsp_dict = { - f"xmlns{p if p=='' else f':{p}'}": n + f"xmlns{p if p == '' else f':{p}'}": n for p, n in self.namespaces.items() if n != self.prefix_uri[1:-1] } @@ -404,7 +404,7 @@ def _get_prefix_uri(self) -> str: f"{self.prefix} is not included in namespaces" ) from err elif "" in self.namespaces: - uri = f'{{{self.namespaces[""]}}}' + uri = f"{{{self.namespaces['']}}}" else: uri = "" @@ -502,7 +502,7 @@ def _get_prefix_uri(self) -> str: f"{self.prefix} is not included in namespaces" ) from err elif "" in self.namespaces: - uri = f'{{{self.namespaces[""]}}}' + uri = f"{{{self.namespaces['']}}}" else: uri = "" diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 237518b3c8d92..703a2b3656c9c 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -917,7 +917,7 @@ def _combine_lines(self, lines) -> str: Combines a list of JSON objects into one JSON object. """ return ( - f'[{",".join([line for line in (line.strip() for line in lines) if line])}]' + f"[{','.join([line for line in (line.strip() for line in lines) if line])}]" ) @overload diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index e263c69376d05..c283f600eb971 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -112,8 +112,7 @@ def __init__(self, kwds) -> None: parse_dates = bool(parse_dates) elif not isinstance(parse_dates, list): raise TypeError( - "Only booleans and lists are accepted " - "for the 'parse_dates' parameter" + "Only booleans and lists are accepted for the 'parse_dates' parameter" ) self.parse_dates: bool | list = parse_dates self.date_parser = kwds.pop("date_parser", lib.no_default) diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index db9547a18b600..e7b5c7f06a79a 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -595,8 +595,7 @@ def _infer_columns( joi = list(map(str, header[:-1] if have_mi_columns else header)) msg = f"[{','.join(joi)}], len of {len(joi)}, " raise ValueError( - f"Passed header={msg}" - f"but only {self.line_pos} lines in file" + f"Passed header={msg}but only {self.line_pos} lines in file" ) from err # We have an empty file, so check @@ -1219,8 +1218,7 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]: for row_num, actual_len in bad_lines: msg = ( - f"Expected {col_len} fields in line {row_num + 1}, saw " - f"{actual_len}" + f"Expected {col_len} fields in line {row_num + 1}, saw {actual_len}" ) if ( self.delimiter diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 54877017f76fc..67193f930b4dc 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1219,8 +1219,7 @@ def _get_options_with_defaults(self, engine: CSVEngine) -> dict[str, Any]: and value != getattr(value, "value", default) ): raise ValueError( - f"The {argname!r} option is not supported with the " - f"'pyarrow' engine" + f"The {argname!r} option is not supported with the 'pyarrow' engine" ) options[argname] = value @@ -1396,8 +1395,7 @@ def _clean_options( if not is_integer(skiprows) and skiprows is not None: # pyarrow expects skiprows to be passed as an integer raise ValueError( - "skiprows argument must be an integer when using " - "engine='pyarrow'" + "skiprows argument must be an integer when using engine='pyarrow'" ) else: if is_integer(skiprows): diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 89dbdab64c23c..a9c45e720fd56 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -33,19 +33,16 @@ ReadBuffer, ) _correct_line1 = ( - "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!" - "000000000000000000000000000000 " + "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000 " ) _correct_header1 = ( "HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000" ) _correct_header2 = ( - "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!" - "000000000000000000000000000000 " + "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 " ) _correct_obs_header = ( - "HEADER RECORD*******OBS HEADER RECORD!!!!!!!" - "000000000000000000000000000000 " + "HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 " ) _fieldkeys = [ "ntype", diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index aee872f9ae50a..9670b5439c87e 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -247,11 +247,14 @@ def hist_frame( .. plot:: :context: close-figs - >>> data = {"length": [1.5, 0.5, 1.2, 0.9, 3], "width": [0.7, 0.2, 0.15, 0.2, 1.1]} + >>> data = { + ... "length": [1.5, 0.5, 1.2, 0.9, 3], + ... "width": [0.7, 0.2, 0.15, 0.2, 1.1], + ... } >>> index = ["pig", "rabbit", "duck", "chicken", "horse"] >>> df = pd.DataFrame(data, index=index) >>> hist = df.hist(bins=3) - """ # noqa: E501 + """ plot_backend = _get_plot_backend(backend) return plot_backend.hist_frame( data, @@ -845,7 +848,10 @@ class PlotAccessor(PandasObject): :context: close-figs >>> df = pd.DataFrame( - ... {"length": [1.5, 0.5, 1.2, 0.9, 3], "width": [0.7, 0.2, 0.15, 0.2, 1.1]}, + ... { + ... "length": [1.5, 0.5, 1.2, 0.9, 3], + ... "width": [0.7, 0.2, 0.15, 0.2, 1.1], + ... }, ... index=["pig", "rabbit", "duck", "chicken", "horse"], ... ) >>> plot = df.plot(title="DataFrame Plot") @@ -866,7 +872,7 @@ class PlotAccessor(PandasObject): >>> df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": ["A", "B", "A", "B"]}) >>> plot = df.groupby("col2").plot(kind="bar", title="DataFrameGroupBy Plot") - """ # noqa: E501 + """ _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") _series_kinds = ("pie",) @@ -993,8 +999,7 @@ def __call__(self, *args, **kwargs): if kind not in self._all_kinds: raise ValueError( - f"{kind} is not a valid plot kind " - f"Valid plot kinds: {self._all_kinds}" + f"{kind} is not a valid plot kind Valid plot kinds: {self._all_kinds}" ) data = self._parent @@ -1630,7 +1635,9 @@ def area( ... "signups": [5, 5, 6, 12, 14, 13], ... "visits": [20, 42, 28, 62, 81, 50], ... }, - ... index=pd.date_range(start="2018/01/01", end="2018/07/01", freq="ME"), + ... index=pd.date_range( + ... start="2018/01/01", end="2018/07/01", freq="ME" + ... ), ... ) >>> ax = df.plot.area() @@ -1662,7 +1669,7 @@ def area( ... } ... ) >>> ax = df.plot.area(x="day") - """ # noqa: E501 + """ return self(kind="area", x=x, y=y, stacked=stacked, **kwargs) def pie(self, y: IndexLabel | None = None, **kwargs) -> PlotAccessor: diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 5ad30a68ae3c9..af77972da8634 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -123,8 +123,7 @@ def _validate_color_args(self, color, colormap): if colormap is not None: warnings.warn( - "'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'", + "'color' and 'colormap' cannot be used simultaneously. Using 'color'", stacklevel=find_stack_level(), ) diff --git a/pandas/tests/arrays/interval/test_formats.py b/pandas/tests/arrays/interval/test_formats.py index 535efee519374..88c9bf81d718c 100644 --- a/pandas/tests/arrays/interval/test_formats.py +++ b/pandas/tests/arrays/interval/test_formats.py @@ -6,8 +6,6 @@ def test_repr(): arr = IntervalArray.from_tuples([(0, 1), (1, 2)]) result = repr(arr) expected = ( - "\n" - "[(0, 1], (1, 2]]\n" - "Length: 2, dtype: interval[int64, right]" + "\n[(0, 1], (1, 2]]\nLength: 2, dtype: interval[int64, right]" ) assert result == expected diff --git a/pandas/tests/dtypes/cast/test_downcast.py b/pandas/tests/dtypes/cast/test_downcast.py index 9430ba2c478ae..69200b2e5fc96 100644 --- a/pandas/tests/dtypes/cast/test_downcast.py +++ b/pandas/tests/dtypes/cast/test_downcast.py @@ -33,9 +33,9 @@ ( # This is a judgement call, but we do _not_ downcast Decimal # objects - np.array([decimal.Decimal(0.0)]), + np.array([decimal.Decimal("0.0")]), "int64", - np.array([decimal.Decimal(0.0)]), + np.array([decimal.Decimal("0.0")]), ), ( # GH#45837 diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index b7e37ff270e60..621217a8c9317 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -660,8 +660,7 @@ def test_construction_generic(self, subtype): def test_construction_not_supported(self, subtype): # GH 19016 msg = ( - "category, object, and string subtypes are not supported " - "for IntervalDtype" + "category, object, and string subtypes are not supported for IntervalDtype" ) with pytest.raises(TypeError, match=msg): IntervalDtype(subtype) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 73c462d492d2d..c61cda83cf6e0 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -321,7 +321,7 @@ def test_period(self): def test_decimal(self): # scalars GH#23530 - a = Decimal(1.0) + a = Decimal("1.0") assert isna(a) is False assert notna(a) is True diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 27fa1206f6f7f..1f3680bf67e90 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -139,8 +139,8 @@ def test_getitem_invalid(self, data): "index out of bounds", # pyarrow "Out of bounds access", # Sparse f"loc must be an integer between -{ub} and {ub}", # Sparse - f"index {ub+1} is out of bounds for axis 0 with size {ub}", - f"index -{ub+1} is out of bounds for axis 0 with size {ub}", + f"index {ub + 1} is out of bounds for axis 0 with size {ub}", + f"index -{ub + 1} is out of bounds for axis 0 with size {ub}", ] ) with pytest.raises(IndexError, match=msg): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index a68c8a06e1d18..b110911bda400 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -176,8 +176,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): # an ndarary. indexer = np.asarray(indexer) msg = ( - "Index is out of bounds or cannot do a " - "non-empty take from an empty array." + "Index is out of bounds or cannot do a non-empty take from an empty array." ) if allow_fill: diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index da53bdcb4e37e..8b4728c7d6292 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -81,8 +81,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): # an ndarary. indexer = np.asarray(indexer) msg = ( - "Index is out of bounds or cannot do a " - "non-empty take from an empty array." + "Index is out of bounds or cannot do a non-empty take from an empty array." ) if allow_fill: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4fccf02e08bd6..d6f428f4938a6 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -964,8 +964,7 @@ def _get_arith_xfail_marker(self, opname, pa_dtype): mark = pytest.mark.xfail( raises=TypeError, reason=( - f"{opname} not supported between" - f"pd.NA and {pa_dtype} Python scalar" + f"{opname} not supported betweenpd.NA and {pa_dtype} Python scalar" ), ) elif opname == "__rfloordiv__" and ( diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py index 74e4383950174..462d86cadde88 100644 --- a/pandas/tests/frame/methods/test_info.py +++ b/pandas/tests/frame/methods/test_info.py @@ -522,7 +522,7 @@ def test_info_int_columns(using_infer_string): 0 1 2 non-null int64 1 2 2 non-null int64 dtypes: int64(2) - memory usage: {'50.0' if using_infer_string and HAS_PYARROW else '48.0+'} bytes + memory usage: {"50.0" if using_infer_string and HAS_PYARROW else "48.0+"} bytes """ ) assert result == expected diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 91d735a8b2fa7..a9d56cbfd2b46 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -198,8 +198,7 @@ def test_sample_upsampling_without_replacement(self, frame_or_series): obj = tm.get_obj(obj, frame_or_series) msg = ( - "Replace has to be set to `True` when " - "upsampling the population `frac` > 1." + "Replace has to be set to `True` when upsampling the population `frac` > 1." ) with pytest.raises(ValueError, match=msg): obj.sample(frac=2, replace=False) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 1967941bca9f0..7b75bcf4f348d 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -93,7 +93,7 @@ def test_set_axis_setattr_index_wrong_length(self, obj): # wrong length msg = ( f"Length mismatch: Expected axis has {len(obj)} elements, " - f"new values have {len(obj)-1} elements" + f"new values have {len(obj) - 1} elements" ) with pytest.raises(ValueError, match=msg): obj.index = np.arange(len(obj) - 1) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index abc14d10514fa..22fdfd3a01408 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1452,6 +1452,25 @@ def test_stack_empty_frame(dropna, future_stack): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:The previous implementation of stack is deprecated") +@pytest.mark.parametrize("dropna", [True, False, lib.no_default]) +def test_stack_empty_level(dropna, future_stack, int_frame): + # GH 60740 + if future_stack and dropna is not lib.no_default: + with pytest.raises(ValueError, match="dropna must be unspecified"): + DataFrame(dtype=np.int64).stack(dropna=dropna, future_stack=future_stack) + else: + expected = int_frame + result = int_frame.copy().stack( + level=[], dropna=dropna, future_stack=future_stack + ) + tm.assert_frame_equal(result, expected) + + expected = DataFrame() + result = DataFrame().stack(level=[], dropna=dropna, future_stack=future_stack) + tm.assert_frame_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:The previous implementation of stack is deprecated") @pytest.mark.parametrize("dropna", [True, False, lib.no_default]) @pytest.mark.parametrize("fill_value", [None, 0]) diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py index ca265a1d1108b..0cd8a14d97eb0 100644 --- a/pandas/tests/groupby/aggregate/test_numba.py +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -186,7 +186,7 @@ def test_multifunc_numba_vs_cython_frame(agg_kwargs): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("func", ["sum", "mean"]) +@pytest.mark.parametrize("func", ["sum", "mean", "var", "std", "min", "max"]) def test_multifunc_numba_vs_cython_frame_noskipna(func): pytest.importorskip("numba") data = DataFrame( diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py index cc69de2581a79..215e627abb018 100644 --- a/pandas/tests/groupby/test_api.py +++ b/pandas/tests/groupby/test_api.py @@ -174,16 +174,13 @@ def test_frame_consistency(groupby_func): elif groupby_func in ("nunique",): exclude_expected = {"axis"} elif groupby_func in ("max", "min"): - exclude_expected = {"axis", "kwargs", "skipna"} + exclude_expected = {"axis", "kwargs"} exclude_result = {"min_count", "engine", "engine_kwargs"} - elif groupby_func in ("sum", "mean"): + elif groupby_func in ("sum", "mean", "std", "var"): exclude_expected = {"axis", "kwargs"} exclude_result = {"engine", "engine_kwargs"} - elif groupby_func in ("std", "var"): - exclude_expected = {"axis", "kwargs", "skipna"} - exclude_result = {"engine", "engine_kwargs"} elif groupby_func in ("median", "prod", "sem"): - exclude_expected = {"axis", "kwargs", "skipna"} + exclude_expected = {"axis", "kwargs"} elif groupby_func in ("bfill", "ffill"): exclude_expected = {"inplace", "axis", "limit_area"} elif groupby_func in ("cummax", "cummin"): @@ -235,16 +232,13 @@ def test_series_consistency(request, groupby_func): if groupby_func in ("any", "all"): exclude_expected = {"kwargs", "bool_only", "axis"} elif groupby_func in ("max", "min"): - exclude_expected = {"axis", "kwargs", "skipna"} + exclude_expected = {"axis", "kwargs"} exclude_result = {"min_count", "engine", "engine_kwargs"} - elif groupby_func in ("sum", "mean"): + elif groupby_func in ("sum", "mean", "std", "var"): exclude_expected = {"axis", "kwargs"} exclude_result = {"engine", "engine_kwargs"} - elif groupby_func in ("std", "var"): - exclude_expected = {"axis", "kwargs", "skipna"} - exclude_result = {"engine", "engine_kwargs"} elif groupby_func in ("median", "prod", "sem"): - exclude_expected = {"axis", "kwargs", "skipna"} + exclude_expected = {"axis", "kwargs"} elif groupby_func in ("bfill", "ffill"): exclude_expected = {"inplace", "axis", "limit_area"} elif groupby_func in ("cummax", "cummin"): diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 20309e852a556..e49be8c00b426 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -990,7 +990,7 @@ def test_sort(): # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') df = DataFrame({"value": np.random.default_rng(2).integers(0, 10000, 10)}) - labels = [f"{i} - {i+499}" for i in range(0, 10000, 500)] + labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) df = df.sort_values(by=["value"], ascending=True) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index ba13d3bd7278f..864b9e5d55991 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -263,10 +263,7 @@ def test_groupby_raises_string_np( if using_infer_string: if groupby_func_np is np.mean: klass = TypeError - msg = ( - f"Cannot perform reduction '{groupby_func_np.__name__}' " - "with string dtype" - ) + msg = f"Cannot perform reduction '{groupby_func_np.__name__}' with string dtype" _call_and_check(klass, msg, how, gb, groupby_func_np, ()) diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 1db12f05e821f..ea876cfdf4933 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -514,6 +514,147 @@ def test_sum_skipna_object(skipna): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "func, values, dtype, result_dtype", + [ + ("prod", [0, 1, 3, np.nan, 4, 5, 6, 7, -8, 9], "float64", "float64"), + ("prod", [0, -1, 3, 4, 5, np.nan, 6, 7, 8, 9], "Float64", "Float64"), + ("prod", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Int64", "Int64"), + ("prod", [np.nan] * 10, "float64", "float64"), + ("prod", [np.nan] * 10, "Float64", "Float64"), + ("prod", [np.nan] * 10, "Int64", "Int64"), + ("var", [0, -1, 3, 4, np.nan, 5, 6, 7, 8, 9], "float64", "float64"), + ("var", [0, 1, 3, -4, 5, 6, 7, -8, 9, np.nan], "Float64", "Float64"), + ("var", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "Int64", "Float64"), + ("var", [np.nan] * 10, "float64", "float64"), + ("var", [np.nan] * 10, "Float64", "Float64"), + ("var", [np.nan] * 10, "Int64", "Float64"), + ("std", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "float64", "float64"), + ("std", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "Float64", "Float64"), + ("std", [0, 1, 3, -4, 5, 6, 7, -8, 9, np.nan], "Int64", "Float64"), + ("std", [np.nan] * 10, "float64", "float64"), + ("std", [np.nan] * 10, "Float64", "Float64"), + ("std", [np.nan] * 10, "Int64", "Float64"), + ("sem", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"), + ("sem", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"), + ("sem", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Float64"), + ("sem", [np.nan] * 10, "float64", "float64"), + ("sem", [np.nan] * 10, "Float64", "Float64"), + ("sem", [np.nan] * 10, "Int64", "Float64"), + ("min", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"), + ("min", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"), + ("min", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Int64"), + ( + "min", + [0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], + "timedelta64[ns]", + "timedelta64[ns]", + ), + ( + "min", + pd.to_datetime( + [ + "2019-05-09", + pd.NaT, + "2019-05-11", + "2019-05-12", + "2019-05-13", + "2019-05-14", + "2019-05-15", + "2019-05-16", + "2019-05-17", + "2019-05-18", + ] + ), + "datetime64[ns]", + "datetime64[ns]", + ), + ("min", [np.nan] * 10, "float64", "float64"), + ("min", [np.nan] * 10, "Float64", "Float64"), + ("min", [np.nan] * 10, "Int64", "Int64"), + ("max", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"), + ("max", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"), + ("max", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Int64"), + ( + "max", + [0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], + "timedelta64[ns]", + "timedelta64[ns]", + ), + ( + "max", + pd.to_datetime( + [ + "2019-05-09", + pd.NaT, + "2019-05-11", + "2019-05-12", + "2019-05-13", + "2019-05-14", + "2019-05-15", + "2019-05-16", + "2019-05-17", + "2019-05-18", + ] + ), + "datetime64[ns]", + "datetime64[ns]", + ), + ("max", [np.nan] * 10, "float64", "float64"), + ("max", [np.nan] * 10, "Float64", "Float64"), + ("max", [np.nan] * 10, "Int64", "Int64"), + ("median", [0, -1, 3, 4, 5, -6, 7, np.nan, 8, 9], "float64", "float64"), + ("median", [0, 1, 3, -4, 5, 6, 7, -8, np.nan, 9], "Float64", "Float64"), + ("median", [0, -1, 3, 4, 5, -6, 7, 8, 9, np.nan], "Int64", "Float64"), + ( + "median", + [0, 1, np.nan, 3, 4, 5, 6, 7, 8, 9], + "timedelta64[ns]", + "timedelta64[ns]", + ), + ( + "median", + pd.to_datetime( + [ + "2019-05-09", + pd.NaT, + "2019-05-11", + "2019-05-12", + "2019-05-13", + "2019-05-14", + "2019-05-15", + "2019-05-16", + "2019-05-17", + "2019-05-18", + ] + ), + "datetime64[ns]", + "datetime64[ns]", + ), + ("median", [np.nan] * 10, "float64", "float64"), + ("median", [np.nan] * 10, "Float64", "Float64"), + ("median", [np.nan] * 10, "Int64", "Float64"), + ], +) +def test_multifunc_skipna(func, values, dtype, result_dtype, skipna): + # GH#15675 + df = DataFrame( + { + "val": values, + "cat": ["A", "B"] * 5, + } + ).astype({"val": dtype}) + # We need to recast the expected values to the result_dtype as some operations + # change the dtype + expected = ( + df.groupby("cat")["val"] + .apply(lambda x: getattr(x, func)(skipna=skipna)) + .astype(result_dtype) + ) + result = getattr(df.groupby("cat")["val"], func)(skipna=skipna) + tm.assert_series_equal(result, expected) + + def test_cython_median(): arr = np.random.default_rng(2).standard_normal(1000) arr[::2] = np.nan diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 49eb79da616e7..25232075a07d9 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -64,8 +64,7 @@ def test_take_fill_value(self): tm.assert_categorical_equal(result.values, expected.values) msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" + "When allow_fill=True and fill_value is not None, all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) @@ -103,8 +102,7 @@ def test_take_fill_value_datetime(self): tm.assert_index_equal(result, expected) msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" + "When allow_fill=True and fill_value is not None, all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) diff --git a/pandas/tests/indexes/datetimes/methods/test_round.py b/pandas/tests/indexes/datetimes/methods/test_round.py index cde4a3a65804d..b023542ba0a4c 100644 --- a/pandas/tests/indexes/datetimes/methods/test_round.py +++ b/pandas/tests/indexes/datetimes/methods/test_round.py @@ -216,6 +216,6 @@ def test_round_int64(self, start, index_freq, periods, round_freq): assert (mod == 0).all(), f"round not a {round_freq} multiple" assert (diff <= unit // 2).all(), "round error" if unit % 2 == 0: - assert ( - result.asi8[diff == unit // 2] % 2 == 0 - ).all(), "round half to even error" + assert (result.asi8[diff == unit // 2] % 2 == 0).all(), ( + "round half to even error" + ) diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py index 4551fdf073193..f4e0a63043335 100644 --- a/pandas/tests/indexes/datetimes/test_formats.py +++ b/pandas/tests/indexes/datetimes/test_formats.py @@ -205,12 +205,7 @@ def test_dti_representation_to_series(self, unit): exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]" - exp4 = ( - "0 2011-01-01\n" - "1 2011-01-02\n" - "2 2011-01-03\n" - "dtype: datetime64[ns]" - ) + exp4 = "0 2011-01-01\n1 2011-01-02\n2 2011-01-03\ndtype: datetime64[ns]" exp5 = ( "0 2011-01-01 09:00:00+09:00\n" @@ -226,11 +221,7 @@ def test_dti_representation_to_series(self, unit): "dtype: datetime64[ns, US/Eastern]" ) - exp7 = ( - "0 2011-01-01 09:00:00\n" - "1 2011-01-02 10:15:00\n" - "dtype: datetime64[ns]" - ) + exp7 = "0 2011-01-01 09:00:00\n1 2011-01-02 10:15:00\ndtype: datetime64[ns]" with pd.option_context("display.width", 300): for idx, expected in zip( diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index bfbcdcff51ee6..c44345273466c 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -338,8 +338,7 @@ def test_take_fill_value(self): tm.assert_index_equal(result, expected) msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" + "When allow_fill=True and fill_value is not None, all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) @@ -375,8 +374,7 @@ def test_take_fill_value_with_timezone(self): tm.assert_index_equal(result, expected) msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" + "When allow_fill=True and fill_value is not None, all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 8db483751438c..90423149658ab 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -154,8 +154,7 @@ def test_constructor_empty(self, constructor, breaks, closed): def test_constructor_string(self, constructor, breaks): # GH 19016 msg = ( - "category, object, and string subtypes are not supported " - "for IntervalIndex" + "category, object, and string subtypes are not supported for IntervalIndex" ) with pytest.raises(TypeError, match=msg): constructor(**self.get_kwargs_from_breaks(breaks)) @@ -224,8 +223,7 @@ def test_constructor_errors(self): # GH 19016: categorical data data = Categorical(list("01234abcde"), ordered=True) msg = ( - "category, object, and string subtypes are not supported " - "for IntervalIndex" + "category, object, and string subtypes are not supported for IntervalIndex" ) with pytest.raises(TypeError, match=msg): IntervalIndex.from_arrays(data[:-1], data[1:]) @@ -297,8 +295,7 @@ def test_constructor_errors(self): # GH 19016: categorical data data = Categorical(list("01234abcde"), ordered=True) msg = ( - "category, object, and string subtypes are not supported " - "for IntervalIndex" + "category, object, and string subtypes are not supported for IntervalIndex" ) with pytest.raises(TypeError, match=msg): IntervalIndex.from_breaks(data) diff --git a/pandas/tests/indexes/interval/test_formats.py b/pandas/tests/indexes/interval/test_formats.py index 73bbfc91028b3..d45d894c485c9 100644 --- a/pandas/tests/indexes/interval/test_formats.py +++ b/pandas/tests/indexes/interval/test_formats.py @@ -21,12 +21,7 @@ class TestIntervalIndexRendering: [ ( Series, - ( - "(0.0, 1.0] a\n" - "NaN b\n" - "(2.0, 3.0] c\n" - "dtype: object" - ), + ("(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c\ndtype: object"), ), (DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")), ], diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index d82203a53a60f..f098690be2afa 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -259,8 +259,7 @@ def test_get_indexer(self): def test_get_indexer_nearest(self): midx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) msg = ( - "method='nearest' not implemented yet for MultiIndex; " - "see GitHub issue 9365" + "method='nearest' not implemented yet for MultiIndex; see GitHub issue 9365" ) with pytest.raises(NotImplementedError, match=msg): midx.get_indexer(["a"], method="nearest") diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py index 43adc09774914..3c1b98d57b2a0 100644 --- a/pandas/tests/indexes/numeric/test_indexing.py +++ b/pandas/tests/indexes/numeric/test_indexing.py @@ -479,8 +479,7 @@ def test_take_fill_value_float64(self): tm.assert_index_equal(result, expected) msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" + "When allow_fill=True and fill_value is not None, all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py index 9f36eb1e7a1d1..dc95e19523842 100644 --- a/pandas/tests/indexes/period/test_formats.py +++ b/pandas/tests/indexes/period/test_formats.py @@ -63,8 +63,7 @@ def test_representation(self, method): exp3 = "PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]')" exp4 = ( - "PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " - "dtype='period[D]')" + "PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='period[D]')" ) exp5 = "PeriodIndex(['2011', '2012', '2013'], dtype='period[Y-DEC]')" diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 2683e25eda618..00e8262ddfa4c 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -700,8 +700,7 @@ def test_take_fill_value(self): tm.assert_index_equal(result, expected) msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" + "When allow_fill=True and fill_value is not None, all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 608158d40cf23..5b75bd9afd6df 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1112,8 +1112,7 @@ def test_take_fill_value(self): def test_take_fill_value_none_raises(self): index = Index(list("ABC"), name="xxx") msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" + "When allow_fill=True and fill_value is not None, all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index 4a31ae88a757a..dd228e6b713b5 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -419,8 +419,7 @@ class TestIndexConstructionErrors: def test_constructor_overflow_int64(self): # see GH#15832 msg = ( - "The elements provided in the data cannot " - "all be casted to the dtype int64" + "The elements provided in the data cannot all be casted to the dtype int64" ) with pytest.raises(OverflowError, match=msg): Index([np.iinfo(np.uint64).max - 1], dtype="int64") diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index e411555c65bea..426083cb6b67c 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -262,8 +262,7 @@ def test_take_fill_value(self): tm.assert_index_equal(result, expected) msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" + "When allow_fill=True and fill_value is not None, all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index dc95e1bb1b8a0..2f6998a85c80b 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -763,8 +763,7 @@ def test_iloc_mask(self): "(index of the boolean Series and of the " "indexed object do not match).", ("locs", ".iloc"): ( - "iLocation based boolean indexing on an " - "integer type is not available" + "iLocation based boolean indexing on an integer type is not available" ), } diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 34824f0a67985..140cf39b26556 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -910,8 +910,7 @@ def test_corrupt_bytes_raises(self, engine): error = XLRDError msg = ( - "Unsupported format, or corrupt file: Expected BOF " - "record; found b'foo'" + "Unsupported format, or corrupt file: Expected BOF record; found b'foo'" ) elif engine == "calamine": from python_calamine import CalamineError diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index 71ef1201e523f..0e13b2f94ed58 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -356,6 +356,6 @@ def test_format_hierarchical_rows_periodindex(merge_cells): for cell in formatted_cells: if cell.row != 0 and cell.col == 0: - assert isinstance( - cell.val, Timestamp - ), "Period should be converted to Timestamp" + assert isinstance(cell.val, Timestamp), ( + "Period should be converted to Timestamp" + ) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index ff8a1b9f570ab..b7dcfde327b83 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -933,7 +933,7 @@ def test_trim(self, df): def test_export(self, df, styler): f = lambda x: "color: red" if x > 0 else "color: blue" - g = lambda x, z: f"color: {z}" if x > 0 else f"color: {z}" + g = lambda x, z: f"color: {z}" style1 = styler style1.map(f).map(g, z="b").highlight_max()._compute() # = render result = style1.export() diff --git a/pandas/tests/io/formats/test_css.py b/pandas/tests/io/formats/test_css.py index c4ecb48006cb1..642a562704344 100644 --- a/pandas/tests/io/formats/test_css.py +++ b/pandas/tests/io/formats/test_css.py @@ -193,8 +193,7 @@ def test_css_border_shorthands(prop, expected): ( "margin: 1px; margin-top: 2px", "", - "margin-left: 1px; margin-right: 1px; " - "margin-bottom: 1px; margin-top: 2px", + "margin-left: 1px; margin-right: 1px; margin-bottom: 1px; margin-top: 2px", ), ("margin-top: 2px", "margin: 1px", "margin: 1px; margin-top: 2px"), ("margin: 1px", "margin-top: 2px", "margin: 1px"), diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 7bf041a50b745..6d762fdeb8d79 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -482,10 +482,7 @@ def test_to_csv_string_with_crlf(self): # case 3: CRLF as line terminator # 'lineterminator' should not change inner element expected_crlf = ( - b"int,str_crlf\r\n" - b"1,abc\r\n" - b'2,"d\r\nef"\r\n' - b'3,"g\r\nh\r\n\r\ni"\r\n' + b'int,str_crlf\r\n1,abc\r\n2,"d\r\nef"\r\n3,"g\r\nh\r\n\r\ni"\r\n' ) df.to_csv(path, lineterminator="\r\n", index=False) with open(path, "rb") as f: diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index b1a437bfdbd8a..9c75314b66fa2 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -94,8 +94,7 @@ def test_to_html_with_column_specific_col_space_raises(): ) msg = ( - "Col_space length\\(\\d+\\) should match " - "DataFrame number of columns\\(\\d+\\)" + "Col_space length\\(\\d+\\) should match DataFrame number of columns\\(\\d+\\)" ) with pytest.raises(ValueError, match=msg): df.to_html(col_space=[30, 40]) diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py index 7aa7cebb5120f..f3d9b88cc91e2 100644 --- a/pandas/tests/io/formats/test_to_markdown.py +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -35,8 +35,7 @@ def test_empty_frame(): df.to_markdown(buf=buf) result = buf.getvalue() assert result == ( - "| id | first_name | last_name |\n" - "|------|--------------|-------------|" + "| id | first_name | last_name |\n|------|--------------|-------------|" ) @@ -65,8 +64,7 @@ def test_series(): s.to_markdown(buf=buf) result = buf.getvalue() assert result == ( - "| | foo |\n|---:|------:|\n| 0 | 1 " - "|\n| 1 | 2 |\n| 2 | 3 |" + "| | foo |\n|---:|------:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" ) diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index af3cdf2d44af3..63c975fd831e7 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -132,20 +132,17 @@ def test_to_string_with_formatters_unicode(self): ) assert result == expected - def test_to_string_index_formatter(self): - df = DataFrame([range(5), range(5, 10), range(10, 15)]) - - rs = df.to_string(formatters={"__index__": lambda x: "abc"[x]}) - - xp = dedent( - """\ - 0 1 2 3 4 - a 0 1 2 3 4 - b 5 6 7 8 9 - c 10 11 12 13 14\ - """ - ) - assert rs == xp + def test_to_string_index_formatter(self): + df = DataFrame([range(5), range(5, 10), range(10, 15)]) + rs = df.to_string(formatters={"__index__": lambda x: "abc"[x]}) + xp = dedent( + """\ + 0 1 2 3 4 + a 0 1 2 3 4 + b 5 6 7 8 9 + c 10 11 12 13 14""" + ) + assert rs == xp def test_no_extra_space(self): # GH#52690: Check that no extra space is given @@ -380,17 +377,11 @@ def test_to_string_small_float_values(self): # sadness per above if _three_digit_exp(): expected = ( - " a\n" - "0 1.500000e+000\n" - "1 1.000000e-017\n" - "2 -5.500000e-007" + " a\n0 1.500000e+000\n1 1.000000e-017\n2 -5.500000e-007" ) else: expected = ( - " a\n" - "0 1.500000e+00\n" - "1 1.000000e-17\n" - "2 -5.500000e-07" + " a\n0 1.500000e+00\n1 1.000000e-17\n2 -5.500000e-07" ) assert result == expected @@ -1213,13 +1204,7 @@ def test_to_string_float_na_spacing(self): ser[::2] = np.nan result = ser.to_string() - expected = ( - "0 NaN\n" - "1 1.5678\n" - "2 NaN\n" - "3 -3.0000\n" - "4 NaN" - ) + expected = "0 NaN\n1 1.5678\n2 NaN\n3 -3.0000\n4 NaN" assert result == expected def test_to_string_with_datetimeindex(self): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 5dc1272880c9b..144b36166261b 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1267,9 +1267,7 @@ def test_default_handler_numpy_unsupported_dtype(self): columns=["a", "b"], ) expected = ( - '[["(1+0j)","(nan+0j)"],' - '["(2.3+0j)","(nan+0j)"],' - '["(4-5j)","(1.2+0j)"]]' + '[["(1+0j)","(nan+0j)"],["(2.3+0j)","(nan+0j)"],["(4-5j)","(1.2+0j)"]]' ) assert df.to_json(default_handler=str, orient="values") == expected @@ -1372,11 +1370,7 @@ def test_tz_is_naive(self): ) def test_tz_range_is_utc(self, tz_range): exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]' - dfexp = ( - '{"DT":{' - '"0":"2013-01-01T05:00:00.000Z",' - '"1":"2013-01-02T05:00:00.000Z"}}' - ) + dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000Z","1":"2013-01-02T05:00:00.000Z"}}' assert ujson_dumps(tz_range, iso_dates=True) == exp dti = DatetimeIndex(tz_range) @@ -1775,7 +1769,7 @@ def test_read_json_with_url_value(self, url): ) def test_read_json_with_very_long_file_path(self, compression): # GH 46718 - long_json_path = f'{"a" * 1000}.json{compression}' + long_json_path = f"{'a' * 1000}.json{compression}" with pytest.raises( FileNotFoundError, match=f"File {long_json_path} does not exist" ): diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 3c843479b446a..d482eb5fa1a06 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -236,9 +236,9 @@ def test_readjson_chunks_closes(chunksize): ) with reader: reader.read() - assert ( - reader.handles.handle.closed - ), f"didn't close stream with chunksize = {chunksize}" + assert reader.handles.handle.closed, ( + f"didn't close stream with chunksize = {chunksize}" + ) @pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"]) @@ -435,8 +435,7 @@ def test_to_json_append_mode(mode_): # Test ValueError when mode is not supported option df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) msg = ( - f"mode={mode_} is not a valid option." - "Only 'w' and 'a' are currently supported." + f"mode={mode_} is not a valid option.Only 'w' and 'a' are currently supported." ) with pytest.raises(ValueError, match=msg): df.to_json(mode=mode_, lines=False, orient="records") diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index c5ccc3b3f7184..d2bf9bdb139bd 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -53,60 +53,24 @@ def orient(request): class TestUltraJSONTests: @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865") - def test_encode_decimal(self): - sut = decimal.Decimal("1337.1337") - encoded = ujson.ujson_dumps(sut, double_precision=15) - decoded = ujson.ujson_loads(encoded) - assert decoded == "1337.1337" - - sut = decimal.Decimal("0.95") - encoded = ujson.ujson_dumps(sut, double_precision=1) - assert encoded == '"0.95"' - - decoded = ujson.ujson_loads(encoded) - assert decoded == "0.95" - - sut = decimal.Decimal("0.94") - encoded = ujson.ujson_dumps(sut, double_precision=1) - assert encoded == '"0.94"' - - decoded = ujson.ujson_loads(encoded) - assert decoded == "0.94" - - sut = decimal.Decimal("1.95") - encoded = ujson.ujson_dumps(sut, double_precision=1) - assert encoded == '"1.95"' - - decoded = ujson.ujson_loads(encoded) - assert decoded == "1.95" - - sut = decimal.Decimal("-1.95") - encoded = ujson.ujson_dumps(sut, double_precision=1) - assert encoded == '"-1.95"' - - decoded = ujson.ujson_loads(encoded) - assert decoded == "-1.95" - - sut = decimal.Decimal("0.995") - encoded = ujson.ujson_dumps(sut, double_precision=2) - assert encoded == '"0.995"' - - decoded = ujson.ujson_loads(encoded) - assert decoded == "0.995" - - sut = decimal.Decimal("0.9995") - encoded = ujson.ujson_dumps(sut, double_precision=3) - assert encoded == '"0.9995"' - - decoded = ujson.ujson_loads(encoded) - assert decoded == "0.9995" - - sut = decimal.Decimal("0.99999999999999944") - encoded = ujson.ujson_dumps(sut, double_precision=15) - assert encoded == '"0.99999999999999944"' - + @pytest.mark.parametrize( + "value, double_precision", + [ + ("1337.1337", 15), + ("0.95", 1), + ("0.94", 1), + ("1.95", 1), + ("-1.95", 1), + ("0.995", 2), + ("0.9995", 3), + ("0.99999999999999944", 15), + ], + ) + def test_encode_decimal(self, value, double_precision): + sut = decimal.Decimal(value) + encoded = ujson.ujson_dumps(sut, double_precision=double_precision) decoded = ujson.ujson_loads(encoded) - assert decoded == "0.99999999999999944" + assert decoded == value @pytest.mark.parametrize("ensure_ascii", [True, False]) def test_encode_string_conversion(self, ensure_ascii): @@ -991,7 +955,7 @@ def test_decode_array(self, arr): def test_decode_extreme_numbers(self, extreme_num): assert extreme_num == ujson.ujson_loads(str(extreme_num)) - @pytest.mark.parametrize("too_extreme_num", [f"{2**64}", f"{-2**63-1}"]) + @pytest.mark.parametrize("too_extreme_num", [f"{2**64}", f"{-(2**63) - 1}"]) def test_decode_too_extreme_numbers(self, too_extreme_num): with pytest.raises( ValueError, @@ -1006,7 +970,7 @@ def test_decode_with_trailing_non_whitespaces(self): with pytest.raises(ValueError, match="Trailing data"): ujson.ujson_loads("{}\n\t a") - @pytest.mark.parametrize("value", [f"{2**64}", f"{-2**63-1}"]) + @pytest.mark.parametrize("value", [f"{2**64}", f"{-(2**63) - 1}"]) def test_decode_array_with_big_int(self, value): with pytest.raises( ValueError, diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index ed2e729430b01..a73327beea8bb 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -131,8 +131,7 @@ def test_catch_too_many_names(all_parsers): msg = ( "Too many columns specified: expected 4 and found 3" if parser.engine == "c" - else "Number of passed names did not match " - "number of header fields in the file" + else "Number of passed names did not match number of header fields in the file" ) with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py index d3789cd387c05..55c8bbc4bb9e1 100644 --- a/pandas/tests/io/parser/test_mangle_dupes.py +++ b/pandas/tests/io/parser/test_mangle_dupes.py @@ -136,7 +136,7 @@ def test_mangled_unnamed_placeholders(all_parsers): expected = DataFrame(columns=Index([], dtype="str")) for j in range(i + 1): - col_name = "Unnamed: 0" + f".{1*j}" * min(j, 1) + col_name = "Unnamed: 0" + f".{1 * j}" * min(j, 1) expected.insert(loc=0, column=col_name, value=[0, 1, 2]) expected[orig_key] = orig_value diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1411ed5019766..9a15d9bc84a2e 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -228,7 +228,7 @@ def test_parse_tz_aware(all_parsers): def test_read_with_parse_dates_scalar_non_bool(all_parsers, kwargs): # see gh-5636 parser = all_parsers - msg = "Only booleans and lists " "are accepted for the 'parse_dates' parameter" + msg = "Only booleans and lists are accepted for the 'parse_dates' parameter" data = """A,B,C 1,2,2003-11-1""" @@ -239,7 +239,7 @@ def test_read_with_parse_dates_scalar_non_bool(all_parsers, kwargs): @pytest.mark.parametrize("parse_dates", [(1,), np.array([4, 5]), {1, 3}]) def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates): parser = all_parsers - msg = "Only booleans and lists " "are accepted for the 'parse_dates' parameter" + msg = "Only booleans and lists are accepted for the 'parse_dates' parameter" data = """A,B,C 1,2,2003-11-1""" diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 5c07a56c9fb3f..d897d251909fe 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1503,8 +1503,7 @@ def test_bad_xml(parser): with pytest.raises( SyntaxError, match=( - "Extra content at the end of the document|" - "junk after document element" + "Extra content at the end of the document|junk after document element" ), ): read_xml( diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 9675b936c171e..c3b0219971446 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -427,7 +427,7 @@ def test_pie_series_autopct_and_fontsize(self): ax = _check_plot_works( series.plot.pie, colors=color_args, autopct="%.2f", fontsize=7 ) - pcts = [f"{s*100:.2f}" for s in series.values / series.sum()] + pcts = [f"{s * 100:.2f}" for s in series.values / series.sum()] expected_texts = list(chain.from_iterable(zip(series.index, pcts))) _check_text_labels(ax.texts, expected_texts) for t in ax.texts: diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 30e2c9dfe3d30..3cc95922e7f2f 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -353,7 +353,7 @@ def test_groupby_resample_interpolate_raises(groupy_test_df): for df in dfs: with pytest.raises( NotImplementedError, - match="Direct interpolation of MultiIndex data frames is " "not supported", + match="Direct interpolation of MultiIndex data frames is not supported", ): df.groupby("volume").resample("1D").interpolate(method="linear") diff --git a/pandas/tests/reshape/merge/test_merge_cross.py b/pandas/tests/reshape/merge/test_merge_cross.py index 14f9036e43fce..6ab80cf0e0823 100644 --- a/pandas/tests/reshape/merge/test_merge_cross.py +++ b/pandas/tests/reshape/merge/test_merge_cross.py @@ -42,8 +42,7 @@ def test_merge_cross_error_reporting(kwargs): left = DataFrame({"a": [1, 3]}) right = DataFrame({"b": [3, 4]}) msg = ( - "Can not pass on, right_on, left_on or set right_index=True or " - "left_index=True" + "Can not pass on, right_on, left_on or set right_index=True or left_index=True" ) with pytest.raises(MergeError, match=msg): merge(left, right, how="cross", **kwargs) @@ -94,8 +93,7 @@ def test_join_cross_error_reporting(): left = DataFrame({"a": [1, 3]}) right = DataFrame({"a": [3, 4]}) msg = ( - "Can not pass on, right_on, left_on or set right_index=True or " - "left_index=True" + "Can not pass on, right_on, left_on or set right_index=True or left_index=True" ) with pytest.raises(MergeError, match=msg): left.join(right, how="cross", on="a") diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index e029dfc3b2703..45caeb1733590 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -353,8 +353,7 @@ def test_construction(): Timedelta("foo") msg = ( - "cannot construct a Timedelta from " - "the passed arguments, allowed keywords are " + "cannot construct a Timedelta from the passed arguments, allowed keywords are " ) with pytest.raises(ValueError, match=msg): Timedelta(day=10) diff --git a/pandas/tests/series/methods/test_between.py b/pandas/tests/series/methods/test_between.py index e67eafbd118ce..f035767e2ce0e 100644 --- a/pandas/tests/series/methods/test_between.py +++ b/pandas/tests/series/methods/test_between.py @@ -66,8 +66,7 @@ def test_between_error_args(self, inclusive): left, right = series[[2, 7]] value_error_msg = ( - "Inclusive has to be either string of 'both'," - "'left', 'right', or 'neither'." + "Inclusive has to be either string of 'both','left', 'right', or 'neither'." ) series = Series(date_range("1/1/2000", periods=10)) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 74b051aec71a4..566fd8d901569 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1935,7 +1935,7 @@ def test_to_datetime_unit_na_values(self): @pytest.mark.parametrize("bad_val", ["foo", 111111111]) def test_to_datetime_unit_invalid(self, bad_val): if bad_val == "foo": - msg = "Unknown datetime string format, unable to parse: " f"{bad_val}" + msg = f"Unknown datetime string format, unable to parse: {bad_val}" else: msg = "cannot convert input 111111111 with the unit 'D'" with pytest.raises(ValueError, match=msg): @@ -2258,7 +2258,7 @@ def test_to_datetime_iso8601_exact_fails(self, input, format): [ '^unconverted data remains when parsing with format ".*": ".*". ' f"{PARSING_ERR_MSG}$", - f'^time data ".*" doesn\'t match format ".*". ' f"{PARSING_ERR_MSG}$", + f'^time data ".*" doesn\'t match format ".*". {PARSING_ERR_MSG}$', ] ) with pytest.raises( diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index f3645bf0649bd..893f526fb3eb0 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -192,7 +192,7 @@ def test_numeric_df_columns(columns): # see gh-14827 df = DataFrame( { - "a": [1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), "0.1"], + "a": [1.2, decimal.Decimal("3.14"), decimal.Decimal("infinity"), "0.1"], "b": [1.0, 2.0, 3.0, 4.0], } ) @@ -207,10 +207,10 @@ def test_numeric_df_columns(columns): "data,exp_data", [ ( - [[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1], + [[decimal.Decimal("3.14"), 1.0], decimal.Decimal("1.6"), 0.1], [[3.14, 1.0], 1.6, 0.1], ), - ([np.array([decimal.Decimal(3.14), 1.0]), 0.1], [[3.14, 1.0], 0.1]), + ([np.array([decimal.Decimal("3.14"), 1.0]), 0.1], [[3.14, 1.0], 0.1]), ], ) def test_numeric_embedded_arr_likes(data, exp_data): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index d0192c12f9518..7480b99595066 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -798,9 +798,9 @@ def test_get_offset(): for name, expected in pairs: offset = _get_offset(name) - assert ( - offset == expected - ), f"Expected {name!r} to yield {expected!r} (actual: {offset!r})" + assert offset == expected, ( + f"Expected {name!r} to yield {expected!r} (actual: {offset!r})" + ) def test_get_offset_legacy(): diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index f91230e1460c4..46b6846ad1ec2 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -289,8 +289,7 @@ def test_tick_rdiv(cls): td64 = delta.to_timedelta64() instance__type = ".".join([cls.__module__, cls.__name__]) msg = ( - "unsupported operand type\\(s\\) for \\/: 'int'|'float' and " - f"'{instance__type}'" + f"unsupported operand type\\(s\\) for \\/: 'int'|'float' and '{instance__type}'" ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 07425af8ed37a..bc5cd5fcccbf8 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -134,10 +134,7 @@ def test_does_not_convert_mixed_integer(date_string, expected): ( "2013Q1", {"freq": "INVLD-L-DEC-SAT"}, - ( - "Unable to retrieve month information " - "from given freq: INVLD-L-DEC-SAT" - ), + ("Unable to retrieve month information from given freq: INVLD-L-DEC-SAT"), ), ], ) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 9a01568971af8..88ea1bfa3c6ed 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -145,8 +145,7 @@ def infer_freq( pass elif isinstance(index.dtype, PeriodDtype): raise TypeError( - "PeriodIndex given. Check the `freq` attribute " - "instead of using infer_freq." + "PeriodIndex given. Check the `freq` attribute instead of using infer_freq." ) elif lib.is_np_dtype(index.dtype, "m"): # Allow TimedeltaIndex and TimedeltaArray diff --git a/pyproject.toml b/pyproject.toml index 7ab9cd2c17669..c6af69438f849 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -746,5 +746,5 @@ exclude_lines = [ directory = "coverage_html_report" [tool.codespell] -ignore-words-list = "blocs, coo, hist, nd, sav, ser, recuse, nin, timere, expec, expecs, indext, SME, NotIn, tructures, tru" +ignore-words-list = "blocs, coo, hist, nd, sav, ser, recuse, nin, timere, expec, expecs, indext, SME, NotIn, tructures, tru, indx, abd, ABD" ignore-regex = 'https://([\w/\.])+'