diff --git a/.github/actions/setup-backend/action.yml b/.github/actions/setup-backend/action.yml index 2d2f993ffb20e..73345481d945d 100644 --- a/.github/actions/setup-backend/action.yml +++ b/.github/actions/setup-backend/action.yml @@ -26,11 +26,12 @@ runs: shell: bash run: | if [ "${{ inputs.python-version }}" = "current" ]; then - echo "PYTHON_VERSION=3.10" >> $GITHUB_ENV - elif [ "${{ inputs.python-version }}" = "next" ]; then echo "PYTHON_VERSION=3.11" >> $GITHUB_ENV + elif [ "${{ inputs.python-version }}" = "next" ]; then + # currently disabled in GHA matrixes because of library compatibility issues + echo "PYTHON_VERSION=3.12" >> $GITHUB_ENV elif [ "${{ inputs.python-version }}" = "previous" ]; then - echo "PYTHON_VERSION=3.9" >> $GITHUB_ENV + echo "PYTHON_VERSION=3.10" >> $GITHUB_ENV else echo "PYTHON_VERSION=${{ inputs.python-version }}" >> $GITHUB_ENV fi @@ -43,6 +44,7 @@ runs: run: | if [ "${{ inputs.install-superset }}" = "true" ]; then sudo apt-get update && sudo apt-get -y install libldap2-dev libsasl2-dev + pip install --upgrade pip setuptools wheel uv if [ "${{ inputs.requirements-type }}" = "dev" ]; then diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index e102e630c813b..640468d2a216f 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["current", "next", "previous"] + python-version: ["current", "previous"] steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 diff --git a/.github/workflows/superset-python-integrationtest.yml b/.github/workflows/superset-python-integrationtest.yml index a511882e6563d..3a7488966b1f0 100644 --- a/.github/workflows/superset-python-integrationtest.yml +++ b/.github/workflows/superset-python-integrationtest.yml @@ -77,7 +77,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["current", "next", "previous"] + python-version: ["current", "previous"] env: PYTHONPATH: ${{ github.workspace }} SUPERSET_CONFIG: tests.integration_tests.superset_test_config diff --git a/.github/workflows/superset-python-unittest.yml b/.github/workflows/superset-python-unittest.yml index c7bb82a73363c..c4cef8de24c83 100644 --- a/.github/workflows/superset-python-unittest.yml +++ b/.github/workflows/superset-python-unittest.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["current", "next"] + python-version: ["previous", "current"] env: PYTHONPATH: ${{ github.workspace }} steps: diff --git a/Dockerfile b/Dockerfile index 7297ad139337b..76983ae773cf7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ ###################################################################### # Node stage to deal with static asset construction ###################################################################### -ARG PY_VER=3.10-slim-bookworm +ARG PY_VER=3.11-slim-bookworm # If BUILDPLATFORM is null, set it to 'amd64' (or leave as is otherwise). ARG BUILDPLATFORM=${BUILDPLATFORM:-amd64} diff --git a/UPDATING.md b/UPDATING.md index b680f2701c395..ee9ec1fd960c3 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -32,6 +32,7 @@ assists people when migrating to a new version. - [31262](https://github.com/apache/superset/pull/31262) NOTE: deprecated `pylint` in favor of `ruff` as our only python linter. Only affect development workflows positively (not the release itself). It should cover most important rules, be much faster, but some things linting rules that were enforced before may not be enforce in the exact same way as before. - [31173](https://github.com/apache/superset/pull/31173) Modified `fetch_csrf_token` to align with HTTP standards, particularly regarding how cookies are handled. If you encounter any issues related to CSRF functionality, please report them as a new issue and reference this PR for context. - [31385](https://github.com/apache/superset/pull/31385) Significant docker refactor, reducing access levels for the `superset` user, streamlining layer building, ... +- [31503](https://github.com/apache/superset/pull/31503) Deprecating python 3.9.x support, 3.11 is now the recommended version and 3.10 is still supported over the Superset 5.0 lifecycle. ### Potential Downtime diff --git a/pyproject.toml b/pyproject.toml index 453134f9a3435..715e0f3db1985 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,13 +24,12 @@ name = "apache-superset" description = "A modern, enterprise-ready business intelligence web application" readme = "README.md" dynamic = ["version", "scripts", "entry-points"] -requires-python = ">=3.9" +requires-python = ">=3.10" license = { file="LICENSE.txt" } authors = [ { name = "Apache Software Foundation", email = "dev@superset.apache.org" }, ] classifiers = [ - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] @@ -67,7 +66,7 @@ dependencies = [ "markdown>=3.0", "msgpack>=1.0.0, <1.1", "nh3>=0.2.11, <0.3", - "numpy==1.23.5", + "numpy>1.23.5, <2", "packaging", # -------------------------- # pandas and related (wanting pandas[performance] without numba as it's 100+MB and not needed) @@ -275,8 +274,8 @@ exclude = [ line-length = 88 indent-width = 4 -# Assume Python 3.9 -target-version = "py39" +# Assume Python 3.10 +target-version = "py310" [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. diff --git a/requirements/base.in b/requirements/base.in index 17f5379cc837f..3cf35921545a1 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -23,8 +23,3 @@ numexpr>=2.9.0 # 5.0.0 has a sensitive deprecation used in other libs # -> https://github.com/aio-libs/async-timeout/blob/master/CHANGES.rst#500-2024-10-31 async_timeout>=4.0.0,<5.0.0 - -# playwright requires greenlet==3.0.3 -# submitted a PR to relax deps in 11/2024 -# https://github.com/microsoft/playwright-python/pull/2669 -greenlet==3.0.3 diff --git a/requirements/base.txt b/requirements/base.txt index 821655c17eeee..5540942598769 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -153,7 +153,6 @@ google-auth==2.36.0 # via shillelagh greenlet==3.0.3 # via - # -r requirements/base.in # apache-superset (pyproject.toml) # shillelagh # sqlalchemy @@ -230,7 +229,7 @@ nh3==0.2.19 # via apache-superset (pyproject.toml) numexpr==2.10.2 # via -r requirements/base.in -numpy==1.23.5 +numpy==1.26.4 # via # apache-superset (pyproject.toml) # bottleneck diff --git a/requirements/development.txt b/requirements/development.txt index 3b2203f466da6..2343428738efc 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -463,7 +463,7 @@ nh3==0.2.19 # apache-superset nodeenv==1.8.0 # via pre-commit -numpy==1.23.5 +numpy==1.26.4 # via # -c requirements/base.txt # apache-superset diff --git a/superset/commands/dashboard/export.py b/superset/commands/dashboard/export.py index 93cc490ad73de..719aed6be5191 100644 --- a/superset/commands/dashboard/export.py +++ b/superset/commands/dashboard/export.py @@ -83,7 +83,7 @@ def append_charts(position: dict[str, Any], charts: set[Slice]) -> dict[str, Any "parents": ["ROOT_ID", "GRID_ID"], } - for chart_hash, chart in zip(chart_hashes, charts): + for chart_hash, chart in zip(chart_hashes, charts, strict=False): position[chart_hash] = { "children": [], "id": chart_hash, diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index bb27678717196..2acc7b12b6647 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1907,6 +1907,7 @@ def query_datasources_by_permissions( # pylint: disable=invalid-name for method, perms in zip( (SqlaTable.perm, SqlaTable.schema_perm, SqlaTable.catalog_perm), (permissions, schema_perms, catalog_perms), + strict=False, ) if perms ] diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py index 9491ff5882eca..55e1187402a36 100644 --- a/superset/db_engine_specs/hive.py +++ b/superset/db_engine_specs/hive.py @@ -440,7 +440,7 @@ def where_latest_partition( # table is not partitioned return None if values is not None and columns is not None: - for col_name, value in zip(col_names, values): + for col_name, value in zip(col_names, values, strict=False): for clm in columns: if clm.get("name") == col_name: query = query.where(Column(col_name) == value) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index e740ca938e254..a7b97ed699639 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -348,7 +348,9 @@ def identity(x: Any) -> Any: rows = [ tuple( sanitize_func(val) - for sanitize_func, val in zip(sanitization_functions, row) + for sanitize_func, val in zip( + sanitization_functions, row, strict=False + ) ) for row in rows ] diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index 3a3dadbbd5842..6f27503a2f285 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -545,7 +545,7 @@ def where_latest_partition( column.get("column_name"): column.get("type") for column in columns or [] } - for col_name, value in zip(col_names, values): + for col_name, value in zip(col_names, values, strict=False): col_type = column_type_by_name.get(col_name) if isinstance(col_type, str): @@ -1240,7 +1240,7 @@ def expand_data( # pylint: disable=too-many-locals # noqa: C901 if isinstance(values, str): values = cast(Optional[list[Any]], destringify(values)) row[name] = values - for value, col in zip(values or [], expanded): + for value, col in zip(values or [], expanded, strict=False): row[col["column_name"]] = value data = [ @@ -1271,7 +1271,7 @@ def get_extra_table_metadata( metadata["partitions"] = { "cols": sorted(indexes[0].get("column_names", [])), - "latest": dict(zip(col_names, latest_parts)), + "latest": dict(zip(col_names, latest_parts, strict=False)), "partitionQuery": cls._partition_query( table=table, indexes=indexes, diff --git a/superset/db_engine_specs/redshift.py b/superset/db_engine_specs/redshift.py index 8b5a35759b0c9..d1a09cf78bb69 100644 --- a/superset/db_engine_specs/redshift.py +++ b/superset/db_engine_specs/redshift.py @@ -131,7 +131,7 @@ def df_to_sql( # uses the max size for redshift nvarchar(65335) # the default object and string types create a varchar(256) col_name: NVARCHAR(length=65535) - for col_name, type in zip(df.columns, df.dtypes) + for col_name, type in zip(df.columns, df.dtypes, strict=False) if isinstance(type, pd.StringDtype) } diff --git a/superset/db_engine_specs/trino.py b/superset/db_engine_specs/trino.py index e4567082e44f6..c7ae2b0f82ebd 100644 --- a/superset/db_engine_specs/trino.py +++ b/superset/db_engine_specs/trino.py @@ -111,7 +111,7 @@ def get_extra_table_metadata( } ) ), - "latest": dict(zip(col_names, latest_parts)), + "latest": dict(zip(col_names, latest_parts, strict=False)), "partitionQuery": cls._partition_query( table=table, indexes=indexes, diff --git a/superset/extensions/metadb.py b/superset/extensions/metadb.py index 3a95ab5d7b75d..8409aed241e2e 100644 --- a/superset/extensions/metadb.py +++ b/superset/extensions/metadb.py @@ -412,7 +412,7 @@ def get_data( connection = engine.connect() rows = connection.execute(query) for i, row in enumerate(rows): - data = dict(zip(self.columns, row)) + data = dict(zip(self.columns, row, strict=False)) data["rowid"] = data[self._rowid] if self._rowid else i yield data diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 3d8f109c95fc3..1cad46ca62aed 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -1976,7 +1976,7 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma self.make_orderby_compatible(select_exprs, orderby_exprs) - for col, (orig_col, ascending) in zip(orderby_exprs, orderby): # noqa: B007 + for col, (orig_col, ascending) in zip(orderby_exprs, orderby, strict=False): # noqa: B007 if not db_engine_spec.allows_alias_in_orderby and isinstance(col, Label): # if engine does not allow using SELECT alias in ORDER BY # revert to the underlying column diff --git a/superset/result_set.py b/superset/result_set.py index eca00de4faf5c..f6daa4b99ebb5 100644 --- a/superset/result_set.py +++ b/superset/result_set.py @@ -123,7 +123,9 @@ def __init__( # pylint: disable=too-many-locals # noqa: C901 # fix cursor descriptor with the deduped names deduped_cursor_desc = [ tuple([column_name, *list(description)[1:]]) # noqa: C409 - for column_name, description in zip(column_names, cursor_description) + for column_name, description in zip( + column_names, cursor_description, strict=False + ) ] # generate numpy structured array dtype diff --git a/superset/utils/excel.py b/superset/utils/excel.py index 602549975f113..d34446832a8cc 100644 --- a/superset/utils/excel.py +++ b/superset/utils/excel.py @@ -56,7 +56,7 @@ def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any: def apply_column_types( df: pd.DataFrame, column_types: list[GenericDataType] ) -> pd.DataFrame: - for column, column_type in zip(df.columns, column_types): + for column, column_type in zip(df.columns, column_types, strict=False): if column_type == GenericDataType.NUMERIC: try: df[column] = pd.to_numeric(df[column]) diff --git a/superset/utils/mock_data.py b/superset/utils/mock_data.py index 88c9d5a57e5f2..b156273dc0b7a 100644 --- a/superset/utils/mock_data.py +++ b/superset/utils/mock_data.py @@ -221,8 +221,11 @@ def get_column_objects(columns: list[ColumnInfo]) -> list[Column]: def generate_data(columns: list[ColumnInfo], num_rows: int) -> list[dict[str, Any]]: keys = [column["name"] for column in columns] return [ - dict(zip(keys, row)) - for row in zip(*[generate_column_data(column, num_rows) for column in columns]) + dict(zip(keys, row, strict=False)) + for row in zip( + *[generate_column_data(column, num_rows) for column in columns], + strict=False, + ) ] diff --git a/superset/utils/pandas_postprocessing/compare.py b/superset/utils/pandas_postprocessing/compare.py index 64442280b2af2..22b345bb31170 100644 --- a/superset/utils/pandas_postprocessing/compare.py +++ b/superset/utils/pandas_postprocessing/compare.py @@ -59,7 +59,7 @@ def compare( # pylint: disable=too-many-arguments if len(source_columns) == 0: return df - for s_col, c_col in zip(source_columns, compare_columns): + for s_col, c_col in zip(source_columns, compare_columns, strict=False): s_df = df.loc[:, [s_col]] s_df.rename(columns={s_col: "__intermediate"}, inplace=True) c_df = df.loc[:, [c_col]] diff --git a/superset/utils/pandas_postprocessing/geography.py b/superset/utils/pandas_postprocessing/geography.py index 79046cb71a1b2..c5f46cd490ccc 100644 --- a/superset/utils/pandas_postprocessing/geography.py +++ b/superset/utils/pandas_postprocessing/geography.py @@ -40,7 +40,7 @@ def geohash_decode( try: lonlat_df = DataFrame() lonlat_df["latitude"], lonlat_df["longitude"] = zip( - *df[geohash].apply(geohash_lib.decode) + *df[geohash].apply(geohash_lib.decode), strict=False ) return _append_columns( df, lonlat_df, {"latitude": latitude, "longitude": longitude} @@ -109,7 +109,7 @@ def _parse_location(location: str) -> tuple[float, float, float]: geodetic_df["latitude"], geodetic_df["longitude"], geodetic_df["altitude"], - ) = zip(*df[geodetic].apply(_parse_location)) + ) = zip(*df[geodetic].apply(_parse_location), strict=False) columns = {"latitude": latitude, "longitude": longitude} if altitude: columns["altitude"] = altitude diff --git a/superset/utils/pandas_postprocessing/histogram.py b/superset/utils/pandas_postprocessing/histogram.py index 74fc68e227807..f55f0d0762673 100644 --- a/superset/utils/pandas_postprocessing/histogram.py +++ b/superset/utils/pandas_postprocessing/histogram.py @@ -71,7 +71,7 @@ def hist_values(series: Series) -> np.ndarray: if len(groupby) == 0: # without grouping - hist_dict = dict(zip(bin_edges_str, hist_values(df[column]))) + hist_dict = dict(zip(bin_edges_str, hist_values(df[column]), strict=False)) histogram_df = DataFrame(hist_dict, index=[0]) else: # with grouping diff --git a/superset/viz.py b/superset/viz.py index 7e13402f6cfc0..a42ce94203091 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -1483,6 +1483,7 @@ def get_data(self, df: pd.DataFrame) -> VizData: df[self.form_data.get("all_columns_y")], metric_col, point_radius_col, + strict=False, ) ], } @@ -1606,6 +1607,7 @@ def process_spatial_data_obj(self, key: str, df: pd.DataFrame) -> pd.DataFrame: zip( pd.to_numeric(df[spatial.get("lonCol")], errors="coerce"), pd.to_numeric(df[spatial.get("latCol")], errors="coerce"), + strict=False, ) ) elif spatial.get("type") == "delimited": diff --git a/tests/integration_tests/core_tests.py b/tests/integration_tests/core_tests.py index ec2f0d6bdec93..af59118aaaace 100644 --- a/tests/integration_tests/core_tests.py +++ b/tests/integration_tests/core_tests.py @@ -679,7 +679,9 @@ def test_explore_json_dist_bar_order(self): count_ds = series["values"] if series["key"] == "COUNT(name)": count_name = series["values"] - for expected, actual_ds, actual_name in zip(resp["data"], count_ds, count_name): + for expected, actual_ds, actual_name in zip( + resp["data"], count_ds, count_name, strict=False + ): assert expected["count_name"] == actual_name["y"] assert expected["count_ds"] == actual_ds["y"] diff --git a/tests/integration_tests/db_engine_specs/presto_tests.py b/tests/integration_tests/db_engine_specs/presto_tests.py index 6ee639552ccf9..c57bec88008d0 100644 --- a/tests/integration_tests/db_engine_specs/presto_tests.py +++ b/tests/integration_tests/db_engine_specs/presto_tests.py @@ -87,7 +87,7 @@ def verify_presto_column(self, column, expected_results): inspector.bind.execute.return_value.fetchall = mock.Mock(return_value=[row]) results = PrestoEngineSpec.get_columns(inspector, Table("", "")) assert len(expected_results) == len(results) - for expected_result, result in zip(expected_results, results): + for expected_result, result in zip(expected_results, results, strict=False): assert expected_result[0] == result["column_name"] assert expected_result[1] == str(result["type"]) @@ -191,7 +191,9 @@ def test_presto_get_fields(self): "label": 'column."quoted.nested obj"', }, ] - for actual_result, expected_result in zip(actual_results, expected_results): + for actual_result, expected_result in zip( + actual_results, expected_results, strict=False + ): assert actual_result.element.name == expected_result["column_name"] assert actual_result.name == expected_result["label"] diff --git a/tests/integration_tests/dict_import_export_tests.py b/tests/integration_tests/dict_import_export_tests.py index bff144630f4cf..c9f1436f06680 100644 --- a/tests/integration_tests/dict_import_export_tests.py +++ b/tests/integration_tests/dict_import_export_tests.py @@ -80,7 +80,8 @@ def create_table( "id": id, "params": json.dumps(params), "columns": [ - {"column_name": c, "uuid": u} for c, u in zip(cols_names, cols_uuids) + {"column_name": c, "uuid": u} + for c, u in zip(cols_names, cols_uuids, strict=False) ], "metrics": [{"metric_name": c, "expression": ""} for c in metric_names], } @@ -88,7 +89,7 @@ def create_table( table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params) ) - for col_name, uuid in zip(cols_names, cols_uuids): + for col_name, uuid in zip(cols_names, cols_uuids, strict=False): table.columns.append(TableColumn(column_name=col_name, uuid=uuid)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name, expression="")) diff --git a/tests/integration_tests/import_export_tests.py b/tests/integration_tests/import_export_tests.py index d4acc010347f3..89b29570884d3 100644 --- a/tests/integration_tests/import_export_tests.py +++ b/tests/integration_tests/import_export_tests.py @@ -153,7 +153,7 @@ def assert_dash_equals( assert len(expected_dash.slices) == len(actual_dash.slices) expected_slices = sorted(expected_dash.slices, key=lambda s: s.slice_name or "") actual_slices = sorted(actual_dash.slices, key=lambda s: s.slice_name or "") - for e_slc, a_slc in zip(expected_slices, actual_slices): + for e_slc, a_slc in zip(expected_slices, actual_slices, strict=False): self.assert_slice_equals(e_slc, a_slc) if check_position: assert expected_dash.position_json == actual_dash.position_json @@ -212,7 +212,7 @@ def assert_only_exported_slc_fields(self, expected_dash, actual_dash): """ expected_slices = sorted(expected_dash.slices, key=lambda s: s.slice_name or "") actual_slices = sorted(actual_dash.slices, key=lambda s: s.slice_name or "") - for e_slc, a_slc in zip(expected_slices, actual_slices): + for e_slc, a_slc in zip(expected_slices, actual_slices, strict=False): params = a_slc.params_dict assert e_slc.datasource.name == params["datasource_name"] assert e_slc.datasource.schema == params["schema"] diff --git a/tests/unit_tests/sql_parse_tests.py b/tests/unit_tests/sql_parse_tests.py index 3b44c1c2cf3c7..9c814a0f4221f 100644 --- a/tests/unit_tests/sql_parse_tests.py +++ b/tests/unit_tests/sql_parse_tests.py @@ -1507,7 +1507,7 @@ def get_rls_for_table( else candidate_table.table ) for left, right in zip( - candidate_table_name.split(".")[::-1], table.split(".")[::-1] + candidate_table_name.split(".")[::-1], table.split(".")[::-1], strict=False ): if left != right: return None @@ -1719,7 +1719,9 @@ def get_rls_for_table( Return the RLS ``condition`` if ``candidate`` matches ``table``. """ # compare ignoring schema - for left, right in zip(str(candidate).split(".")[::-1], table.split(".")[::-1]): + for left, right in zip( + str(candidate).split(".")[::-1], table.split(".")[::-1], strict=False + ): if left != right: return None return condition