From 313f16ebb0fa062318932ef45b52c539a7f2a1da Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Wed, 18 Aug 2021 14:27:28 -0700 Subject: [PATCH 1/4] feat: improve HTML table in text reports --- setup.cfg | 2 +- superset/charts/api.py | 15 ++--- superset/charts/post_processing.py | 38 ++++++------ superset/common/query_actions.py | 1 + superset/reports/commands/exceptions.py | 8 +++ superset/reports/commands/execute.py | 82 +++++++++++++++++-------- superset/reports/notifications/email.py | 44 +++++++++---- superset/reports/notifications/slack.py | 21 +++++-- superset/utils/csv.py | 19 ++++++ 9 files changed, 160 insertions(+), 70 deletions(-) diff --git a/setup.cfg b/setup.cfg index 15817a8039a38..9a108f76a480e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ combine_as_imports = true include_trailing_comma = true line_length = 88 known_first_party = superset -known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,tabulate,typing_extensions,urllib3,werkzeug,wtforms,wtforms_json,yaml +known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,urllib3,werkzeug,wtforms,wtforms_json,yaml multi_line_output = 3 order_by_type = false diff --git a/superset/charts/api.py b/superset/charts/api.py index db86615308715..3fa48c076b177 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -499,15 +499,6 @@ def send_chart_response( result_type = result["query_context"].result_type result_format = result["query_context"].result_format - # Post-process the data so it matches the data presented in the chart. - # This is needed for sending reports based on text charts that do the - # post-processing of data, eg, the pivot table. - if ( - result_type == ChartDataResultType.POST_PROCESSED - and result_format == ChartDataResultFormat.CSV - ): - result = apply_post_process(result, form_data) - if result_format == ChartDataResultFormat.CSV: # Verify user has permission to export CSV file if not security_manager.can_access("can_csv", "Superset"): @@ -518,6 +509,12 @@ def send_chart_response( return CsvResponse(data, headers=generate_download_headers("csv")) if result_format == ChartDataResultFormat.JSON: + # Post-process the data so it matches the data presented in the chart. + # This is needed for sending reports based on text charts that do the + # post-processing of data, eg, the pivot table. + if result_type == ChartDataResultType.POST_PROCESSED: + result = apply_post_process(result, form_data) + response_data = simplejson.dumps( {"result": result["queries"]}, default=json_int_dttm_ser, diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index 4919907ded852..566c2d2bd02de 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -126,11 +126,13 @@ def pivot_df( # pylint: disable=too-many-locals, too-many-arguments, too-many-s total = df.sum(axis=axis["columns"]) df = df.astype(total.dtypes).div(total, axis=axis["rows"]) - if show_rows_total: - # convert to a MultiIndex to simplify logic - if not isinstance(df.columns, pd.MultiIndex): - df.columns = pd.MultiIndex.from_tuples([(str(i),) for i in df.columns]) + # convert to a MultiIndex to simplify logic + if not isinstance(df.index, pd.MultiIndex): + df.index = pd.MultiIndex.from_tuples([(str(i),) for i in df.index]) + if not isinstance(df.columns, pd.MultiIndex): + df.columns = pd.MultiIndex.from_tuples([(str(i),) for i in df.columns]) + if rows and show_rows_total: # add subtotal for each group and overall total; we start from the # overall group, and iterate deeper into subgroups groups = df.columns @@ -146,10 +148,6 @@ def pivot_df( # pylint: disable=too-many-locals, too-many-arguments, too-many-s df.insert(int(slice_.stop), subtotal_name, subtotal) if rows and show_columns_total: - # convert to a MultiIndex to simplify logic - if not isinstance(df.index, pd.MultiIndex): - df.index = pd.MultiIndex.from_tuples([(str(i),) for i in df.index]) - # add subtotal for each group and overall total; we start from the # overall group, and iterate deeper into subgroups groups = df.index @@ -279,24 +277,28 @@ def apply_post_process( post_processor = post_processors[viz_type] for query in result["queries"]: - df = pd.read_csv(StringIO(query["data"])) + df = pd.DataFrame.from_dict(query["data"]) processed_df = post_processor(df, form_data) - # flatten column names + query["colnames"] = list(processed_df.columns) + query["indexnames"] = list(processed_df.index) + query["coltypes"] = extract_dataframe_dtypes(processed_df) + query["rowcount"] = len(processed_df.index) + + # flatten columns/index so we can encode data as JSON processed_df.columns = [ " ".join(str(name) for name in column).strip() if isinstance(column, tuple) else column for column in processed_df.columns ] + processed_df.index = [ + " ".join(str(name) for name in index).strip() + if isinstance(index, tuple) + else index + for index in processed_df.index + ] - buf = StringIO() - processed_df.to_csv(buf) - buf.seek(0) - - query["data"] = buf.getvalue() - query["colnames"] = list(processed_df.columns) - query["coltypes"] = extract_dataframe_dtypes(processed_df) - query["rowcount"] = len(processed_df.index) + query["data"] = processed_df.to_dict() return result diff --git a/superset/common/query_actions.py b/superset/common/query_actions.py index 1987872470bfa..ea8610433993d 100644 --- a/superset/common/query_actions.py +++ b/superset/common/query_actions.py @@ -101,6 +101,7 @@ def _get_full( status = payload["status"] if status != QueryStatus.FAILED: payload["colnames"] = list(df.columns) + payload["indexnames"] = list(df.index) payload["coltypes"] = extract_dataframe_dtypes(df) payload["data"] = query_context.get_data(df) del payload["df"] diff --git a/superset/reports/commands/exceptions.py b/superset/reports/commands/exceptions.py index dfe2402da0449..c59ba3e8b4c95 100644 --- a/superset/reports/commands/exceptions.py +++ b/superset/reports/commands/exceptions.py @@ -116,6 +116,10 @@ class ReportScheduleCsvFailedError(CommandException): message = _("Report Schedule execution failed when generating a csv.") +class ReportScheduleDataFrameFailedError(CommandException): + message = _("Report Schedule execution failed when generating a dataframe.") + + class ReportScheduleExecuteUnexpectedError(CommandException): message = _("Report Schedule execution got an unexpected error.") @@ -171,6 +175,10 @@ class ReportScheduleCsvTimeout(CommandException): message = _("A timeout occurred while generating a csv.") +class ReportScheduleDataFrameTimeout(CommandException): + message = _("A timeout occurred while generating a dataframe.") + + class ReportScheduleAlertGracePeriodError(CommandException): message = _("Alert fired during grace period.") diff --git a/superset/reports/commands/execute.py b/superset/reports/commands/execute.py index 065e38bc75362..c52ffb3c8fa61 100644 --- a/superset/reports/commands/execute.py +++ b/superset/reports/commands/execute.py @@ -17,7 +17,6 @@ import json import logging from datetime import datetime, timedelta -from io import BytesIO from typing import Any, List, Optional from uuid import UUID @@ -45,6 +44,8 @@ ReportScheduleAlertGracePeriodError, ReportScheduleCsvFailedError, ReportScheduleCsvTimeout, + ReportScheduleDataFrameFailedError, + ReportScheduleDataFrameTimeout, ReportScheduleExecuteUnexpectedError, ReportScheduleNotFoundError, ReportScheduleNotificationError, @@ -65,7 +66,7 @@ from superset.reports.notifications.exceptions import NotificationError from superset.utils.celery import session_scope from superset.utils.core import ChartDataResultFormat, ChartDataResultType -from superset.utils.csv import get_chart_csv_data +from superset.utils.csv import get_chart_csv_data, get_chart_dataframe from superset.utils.screenshots import ( BaseScreenshot, ChartScreenshot, @@ -137,17 +138,23 @@ def create_log( # pylint: disable=too-many-arguments self._session.commit() def _get_url( - self, user_friendly: bool = False, csv: bool = False, **kwargs: Any + self, + user_friendly: bool = False, + result_format: Optional[ChartDataResultFormat] = None, + **kwargs: Any, ) -> str: """ Get the url for this report schedule: chart or dashboard """ if self._report_schedule.chart: - if csv: + if result_format in { + ChartDataResultFormat.CSV, + ChartDataResultFormat.JSON, + }: return get_url_path( "ChartRestApi.get_data", pk=self._report_schedule.chart_id, - format=ChartDataResultFormat.CSV.value, + format=result_format.value, type=ChartDataResultType.POST_PROCESSED.value, ) return get_url_path( @@ -213,28 +220,14 @@ def _get_screenshot(self) -> bytes: return image_data def _get_csv_data(self) -> bytes: - url = self._get_url(csv=True) + url = self._get_url(result_format=ChartDataResultFormat.CSV) auth_cookies = machine_auth_provider_factory.instance.get_auth_cookies( self._get_user() ) - # To load CSV data from the endpoint the chart must have been saved - # with its query context. For charts without saved query context we - # get a screenshot to force the chart to produce and save the query - # context. if self._report_schedule.chart.query_context is None: logger.warning("No query context found, taking a screenshot to generate it") - try: - self._get_screenshot() - except ( - ReportScheduleScreenshotFailedError, - ReportScheduleScreenshotTimeout, - ) as ex: - raise ReportScheduleCsvFailedError( - "Unable to fetch CSV data because the chart has no query context " - "saved, and an error occurred when fetching it via a screenshot. " - "Please try loading the chart and saving it again." - ) from ex + self._update_query_context() try: logger.info("Getting chart from %s", url) @@ -251,11 +244,50 @@ def _get_csv_data(self) -> bytes: def _get_embedded_data(self) -> pd.DataFrame: """ - Return data as an HTML table, to embed in the email. + Return data as a Pandas dataframe, to embed in notifications as a table. + """ + url = self._get_url(result_format=ChartDataResultFormat.JSON) + auth_cookies = machine_auth_provider_factory.instance.get_auth_cookies( + self._get_user() + ) + + if self._report_schedule.chart.query_context is None: + logger.warning("No query context found, taking a screenshot to generate it") + self._update_query_context() + + try: + logger.info("Getting chart from %s", url) + dataframe = get_chart_dataframe(url, auth_cookies) + except SoftTimeLimitExceeded as ex: + raise ReportScheduleDataFrameTimeout() from ex + except Exception as ex: + raise ReportScheduleDataFrameFailedError( + f"Failed generating dataframe {str(ex)}" + ) from ex + if dataframe is None: + raise ReportScheduleCsvFailedError() + return dataframe + + def _update_query_context(self) -> None: + """ + Update chart query context. + + To load CSV data from the endpoint the chart must have been saved + with its query context. For charts without saved query context we + get a screenshot to force the chart to produce and save the query + context. """ - buf = BytesIO(self._get_csv_data()) - df = pd.read_csv(buf) - return df + try: + self._get_screenshot() + except ( + ReportScheduleScreenshotFailedError, + ReportScheduleScreenshotTimeout, + ) as ex: + raise ReportScheduleCsvFailedError( + "Unable to fetch data because the chart has no query context " + "saved, and an error occurred when fetching it via a screenshot. " + "Please try loading the chart and saving it again." + ) from ex def _get_notification_content(self) -> NotificationContent: """ diff --git a/superset/reports/notifications/email.py b/superset/reports/notifications/email.py index a55824d4f8393..c1fce9939b3fe 100644 --- a/superset/reports/notifications/email.py +++ b/superset/reports/notifications/email.py @@ -17,6 +17,7 @@ # under the License. import json import logging +import textwrap from dataclasses import dataclass from email.utils import make_msgid, parseaddr from typing import Any, Dict, Optional @@ -33,6 +34,7 @@ logger = logging.getLogger(__name__) TABLE_TAGS = ["table", "th", "tr", "td", "thead", "tbody", "tfoot"] +TABLE_ATTRIBUTES = ["colspan", "rowspan", "halign", "border", "class"] @dataclass @@ -79,24 +81,40 @@ def _get_content(self) -> EmailContent: if self._content.embedded_data is not None: df = self._content.embedded_data html_table = bleach.clean( - df.to_html(na_rep="", index=False), tags=TABLE_TAGS + df.to_html(na_rep="", index=True), + tags=TABLE_TAGS, + attributes=TABLE_ATTRIBUTES, ) else: html_table = "" - body = __( - """ -

%(description)s

- Explore in Superset

- %(html_table)s - %(img_tag)s - """, - description=description, - url=self._content.url, - html_table=html_table, - img_tag=''.format(msgid) + call_to_action = __("Explore in Superset") + img_tag = ( + f'' if self._content.screenshot - else "", + else "" + ) + body = textwrap.dedent( + f""" + + + + + +

{description}

+ {call_to_action}

+ {html_table} + {img_tag} + + + """ ) if self._content.screenshot: image = {msgid: self._content.screenshot} diff --git a/superset/reports/notifications/slack.py b/superset/reports/notifications/slack.py index 8d4078c92fb60..b5666a66f70da 100644 --- a/superset/reports/notifications/slack.py +++ b/superset/reports/notifications/slack.py @@ -24,7 +24,6 @@ from flask_babel import gettext as __ from slack import WebClient from slack.errors import SlackApiError, SlackClientError -from tabulate import tabulate from superset import app from superset.models.reports import ReportRecipientType @@ -89,6 +88,20 @@ def _get_body(self) -> str: # Embed data in the message df = self._content.embedded_data + # Flatten columns/index so they show up nicely in the table + df.columns = [ + " ".join(str(name) for name in column).strip() + if isinstance(column, tuple) + else column + for column in df.columns + ] + df.index = [ + " ".join(str(name) for name in index).strip() + if isinstance(index, tuple) + else index + for index in df.index + ] + # Slack Markdown only works on messages shorter than 4k chars, so we might # need to truncate the data for i in range(len(df) - 1): @@ -96,7 +109,7 @@ def _get_body(self) -> str: truncated_df = truncated_df.append( {k: "..." for k in df.columns}, ignore_index=True ) - tabulated = tabulate(truncated_df, headers="keys", showindex=False) + tabulated = df.to_markdown() table = f"```\n{tabulated}\n```\n\n(table was truncated)" message = self._message_template(table) if len(message) > MAXIMUM_MESSAGE_SIZE: @@ -105,7 +118,7 @@ def _get_body(self) -> str: truncated_df = truncated_df.append( {k: "..." for k in df.columns}, ignore_index=True ) - tabulated = tabulate(truncated_df, headers="keys", showindex=False) + tabulated = df.to_markdown() table = ( f"```\n{tabulated}\n```\n\n(table was truncated)" if len(truncated_df) > 0 @@ -115,7 +128,7 @@ def _get_body(self) -> str: # Send full data else: - tabulated = tabulate(df, headers="keys", showindex=False) + tabulated = df.to_markdown() table = f"```\n{tabulated}\n```" return self._message_template(table) diff --git a/superset/utils/csv.py b/superset/utils/csv.py index 3e5fe46212594..3c362f7fac432 100644 --- a/superset/utils/csv.py +++ b/superset/utils/csv.py @@ -20,6 +20,7 @@ from urllib.error import URLError import pandas as pd +import simplejson negative_number_re = re.compile(r"^-[0-9.]+$") @@ -84,3 +85,21 @@ def get_chart_csv_data( if content: return content return None + + +def get_chart_dataframe( + chart_url: str, auth_cookies: Optional[Dict[str, str]] = None +) -> Optional[pd.DataFrame]: + content = get_chart_csv_data(chart_url, auth_cookies) + if content is None: + return None + + result = simplejson.loads(content.decode("utf-8")) + df = pd.DataFrame.from_dict(result["result"][0]["data"]) + df.columns = pd.MultiIndex.from_tuples( + tuple(colname) for colname in result["result"][0]["colnames"] + ) + df.index = pd.MultiIndex.from_tuples( + tuple(indexname) for indexname in result["result"][0]["indexnames"] + ) + return df From 4697936b932c96e60f5a295d63946bde0b2a59ea Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Wed, 18 Aug 2021 14:36:58 -0700 Subject: [PATCH 2/4] Remove unused import --- superset/charts/post_processing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index 566c2d2bd02de..bb8c104e551e6 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -26,7 +26,6 @@ for these chart types. """ -from io import StringIO from typing import Any, Dict, List, Optional, Tuple import pandas as pd From 33c1da2f3912de3d4ac879af33c2391cb10a5467 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Wed, 18 Aug 2021 15:08:59 -0700 Subject: [PATCH 3/4] Update tests --- superset/charts/post_processing.py | 2 +- .../unit_tests/charts/test_post_processing.py | 429 ++++-------------- 2 files changed, 88 insertions(+), 343 deletions(-) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index bb8c104e551e6..96d1e7ca0ed44 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -131,7 +131,7 @@ def pivot_df( # pylint: disable=too-many-locals, too-many-arguments, too-many-s if not isinstance(df.columns, pd.MultiIndex): df.columns = pd.MultiIndex.from_tuples([(str(i),) for i in df.columns]) - if rows and show_rows_total: + if show_rows_total: # add subtotal for each group and overall total; we start from the # overall group, and iterate deeper into subgroups groups = df.columns diff --git a/tests/unit_tests/charts/test_post_processing.py b/tests/unit_tests/charts/test_post_processing.py index 94635779966e8..c82ae7ab715f2 100644 --- a/tests/unit_tests/charts/test_post_processing.py +++ b/tests/unit_tests/charts/test_post_processing.py @@ -15,265 +15,9 @@ # specific language governing permissions and limitations # under the License. -import copy -from typing import Any, Dict - import pandas as pd -from superset.charts.post_processing import apply_post_process, pivot_df -from superset.utils.core import GenericDataType, QueryStatus - -RESULT: Dict[str, Any] = { - "query_context": None, - "queries": [ - { - "cache_key": "1bd3ab8c01e98a0e349fb61bc76d9b90", - "cached_dttm": None, - "cache_timeout": 86400, - "annotation_data": {}, - "error": None, - "is_cached": None, - "query": """SELECT state AS state, - gender AS gender, - sum(num) AS \"Births\" -FROM birth_names -WHERE ds >= TO_TIMESTAMP('1921-07-28 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') - AND ds < TO_TIMESTAMP('2021-07-28 10:39:44.000000', 'YYYY-MM-DD HH24:MI:SS.US') -GROUP BY state, - gender -LIMIT 50000; - -""", - "status": QueryStatus.SUCCESS, - "stacktrace": None, - "rowcount": 22, - "colnames": ["state", "gender", "Births"], - "coltypes": [ - GenericDataType.STRING, - GenericDataType.STRING, - GenericDataType.NUMERIC, - ], - "data": """state,gender,Births -OH,boy,2376385 -TX,girl,2313186 -MA,boy,1285126 -MA,girl,842146 -PA,boy,2390275 -NY,boy,3543961 -FL,boy,1968060 -TX,boy,3311985 -NJ,boy,1486126 -CA,girl,3567754 -CA,boy,5430796 -IL,girl,1614427 -FL,girl,1312593 -NY,girl,2280733 -NJ,girl,992702 -MI,girl,1326229 -other,girl,15058341 -other,boy,22044909 -MI,boy,1938321 -IL,boy,2357411 -PA,girl,1615383 -OH,girl,1622814 - """, - "applied_filters": [], - "rejected_filters": [], - } - ], -} - - -def test_pivot_table(): - form_data = { - "adhoc_filters": [], - "columns": ["state"], - "datasource": "3__table", - "date_format": "smart_date", - "extra_form_data": {}, - "granularity_sqla": "ds", - "groupby": ["gender"], - "metrics": [ - { - "aggregate": "SUM", - "column": {"column_name": "num", "type": "BIGINT"}, - "expressionType": "SIMPLE", - "label": "Births", - "optionName": "metric_11", - } - ], - "number_format": "SMART_NUMBER", - "order_desc": True, - "pandas_aggfunc": "sum", - "pivot_margins": True, - "row_limit": 50000, - "slice_id": 143, - "time_grain_sqla": "P1D", - "time_range": "100 years ago : now", - "time_range_endpoints": ["inclusive", "exclusive"], - "url_params": {}, - "viz_type": "pivot_table", - } - result = copy.deepcopy(RESULT) - assert apply_post_process(result, form_data) == { - "query_context": None, - "queries": [ - { - "cache_key": "1bd3ab8c01e98a0e349fb61bc76d9b90", - "cached_dttm": None, - "cache_timeout": 86400, - "annotation_data": {}, - "error": None, - "is_cached": None, - "query": """SELECT state AS state, - gender AS gender, - sum(num) AS \"Births\" -FROM birth_names -WHERE ds >= TO_TIMESTAMP('1921-07-28 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') - AND ds < TO_TIMESTAMP('2021-07-28 10:39:44.000000', 'YYYY-MM-DD HH24:MI:SS.US') -GROUP BY state, - gender -LIMIT 50000; - -""", - "status": QueryStatus.SUCCESS, - "stacktrace": None, - "rowcount": 3, - "colnames": [ - "Births CA", - "Births FL", - "Births IL", - "Births MA", - "Births MI", - "Births NJ", - "Births NY", - "Births OH", - "Births PA", - "Births TX", - "Births other", - "Births Subtotal", - "Total (Sum)", - ], - "coltypes": [ - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - ], - "data": """,Births CA,Births FL,Births IL,Births MA,Births MI,Births NJ,Births NY,Births OH,Births PA,Births TX,Births other,Births Subtotal,Total (Sum) -boy,5430796,1968060,2357411,1285126,1938321,1486126,3543961,2376385,2390275,3311985,22044909,48133355,48133355 -girl,3567754,1312593,1614427,842146,1326229,992702,2280733,1622814,1615383,2313186,15058341,32546308,32546308 -Total (Sum),8998550,3280653,3971838,2127272,3264550,2478828,5824694,3999199,4005658,5625171,37103250,80679663,80679663 -""", - "applied_filters": [], - "rejected_filters": [], - } - ], - } - - -def test_pivot_table_v2(): - form_data = { - "adhoc_filters": [], - "aggregateFunction": "Sum as Fraction of Rows", - "colOrder": "key_a_to_z", - "colTotals": True, - "combineMetric": True, - "datasource": "3__table", - "date_format": "smart_date", - "extra_form_data": {}, - "granularity_sqla": "ds", - "groupbyColumns": ["state"], - "groupbyRows": ["gender"], - "metrics": [ - { - "aggregate": "SUM", - "column": {"column_name": "num", "type": "BIGINT"}, - "expressionType": "SIMPLE", - "label": "Births", - "optionName": "metric_11", - } - ], - "metricsLayout": "COLUMNS", - "rowOrder": "key_a_to_z", - "rowTotals": True, - "row_limit": 50000, - "slice_id": 72, - "time_grain_sqla": None, - "time_range": "100 years ago : now", - "time_range_endpoints": ["inclusive", "exclusive"], - "transposePivot": True, - "url_params": {}, - "valueFormat": "SMART_NUMBER", - "viz_type": "pivot_table_v2", - } - result = copy.deepcopy(RESULT) - assert apply_post_process(result, form_data) == { - "query_context": None, - "queries": [ - { - "cache_key": "1bd3ab8c01e98a0e349fb61bc76d9b90", - "cached_dttm": None, - "cache_timeout": 86400, - "annotation_data": {}, - "error": None, - "is_cached": None, - "query": """SELECT state AS state, - gender AS gender, - sum(num) AS \"Births\" -FROM birth_names -WHERE ds >= TO_TIMESTAMP('1921-07-28 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') - AND ds < TO_TIMESTAMP('2021-07-28 10:39:44.000000', 'YYYY-MM-DD HH24:MI:SS.US') -GROUP BY state, - gender -LIMIT 50000; - -""", - "status": QueryStatus.SUCCESS, - "stacktrace": None, - "rowcount": 12, - "colnames": [ - "boy Births", - "boy Subtotal", - "girl Births", - "girl Subtotal", - "Total (Sum as Fraction of Rows)", - ], - "coltypes": [ - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - GenericDataType.NUMERIC, - ], - "data": """,boy Births,boy Subtotal,girl Births,girl Subtotal,Total (Sum as Fraction of Rows) -CA,0.6035190113962805,0.6035190113962805,0.3964809886037195,0.3964809886037195,1.0 -FL,0.5998988615985903,0.5998988615985903,0.4001011384014097,0.4001011384014097,1.0 -IL,0.5935315085862012,0.5935315085862012,0.40646849141379887,0.40646849141379887,1.0 -MA,0.6041192663655611,0.6041192663655611,0.3958807336344389,0.3958807336344389,1.0 -MI,0.5937482960898133,0.5937482960898133,0.4062517039101867,0.4062517039101867,1.0 -NJ,0.5995276800165239,0.5995276800165239,0.40047231998347604,0.40047231998347604,1.0 -NY,0.6084372844307357,0.6084372844307357,0.39156271556926425,0.39156271556926425,1.0 -OH,0.5942152416021308,0.5942152416021308,0.40578475839786915,0.40578475839786915,1.0 -PA,0.596724682935987,0.596724682935987,0.40327531706401293,0.40327531706401293,1.0 -TX,0.5887794344385264,0.5887794344385264,0.41122056556147357,0.41122056556147357,1.0 -other,0.5941503507105172,0.5941503507105172,0.40584964928948275,0.40584964928948275,1.0 -Total (Sum as Fraction of Rows),6.576651618170867,6.576651618170867,4.423348381829133,4.423348381829133,11.0 -""", - "applied_filters": [], - "rejected_filters": [], - } - ], - } +from superset.charts.post_processing import pivot_df def test_pivot_df_no_cols_no_rows_single_metric(): @@ -307,9 +51,9 @@ def test_pivot_df_no_cols_no_rows_single_metric(): assert ( pivoted.to_markdown() == """ -| metric | SUM(num) | -|:------------|------------:| -| Total (Sum) | 8.06797e+07 | +| | ('SUM(num)',) | +|:-----------------|----------------:| +| ('Total (Sum)',) | 8.06797e+07 | """.strip() ) @@ -329,9 +73,9 @@ def test_pivot_df_no_cols_no_rows_single_metric(): assert ( pivoted.to_markdown() == """ -| metric | SUM(num) | -|:------------|------------:| -| Total (Sum) | 8.06797e+07 | +| | ('SUM(num)',) | +|:-----------------|----------------:| +| ('Total (Sum)',) | 8.06797e+07 | """.strip() ) @@ -352,9 +96,9 @@ def test_pivot_df_no_cols_no_rows_single_metric(): assert ( pivoted.to_markdown() == """ -| | Total (Sum) | -|:---------|--------------:| -| SUM(num) | 8.06797e+07 | +| | ('Total (Sum)',) | +|:--------------|-------------------:| +| ('SUM(num)',) | 8.06797e+07 | """.strip() ) @@ -374,9 +118,9 @@ def test_pivot_df_no_cols_no_rows_single_metric(): assert ( pivoted.to_markdown() == """ -| metric | ('SUM(num)',) | ('Total (Sum)',) | -|:------------|----------------:|-------------------:| -| Total (Sum) | 8.06797e+07 | 8.06797e+07 | +| | ('SUM(num)',) | ('Total (Sum)',) | +|:-----------------|----------------:|-------------------:| +| ('Total (Sum)',) | 8.06797e+07 | 8.06797e+07 | """.strip() ) @@ -412,9 +156,9 @@ def test_pivot_df_no_cols_no_rows_two_metrics(): assert ( pivoted.to_markdown() == """ -| metric | SUM(num) | MAX(num) | -|:------------|------------:|-----------:| -| Total (Sum) | 8.06797e+07 | 37296 | +| | ('SUM(num)',) | ('MAX(num)',) | +|:-----------------|----------------:|----------------:| +| ('Total (Sum)',) | 8.06797e+07 | 37296 | """.strip() ) @@ -434,9 +178,9 @@ def test_pivot_df_no_cols_no_rows_two_metrics(): assert ( pivoted.to_markdown() == """ -| metric | SUM(num) | MAX(num) | -|:------------|------------:|-----------:| -| Total (Sum) | 8.06797e+07 | 37296 | +| | ('SUM(num)',) | ('MAX(num)',) | +|:-----------------|----------------:|----------------:| +| ('Total (Sum)',) | 8.06797e+07 | 37296 | """.strip() ) @@ -457,10 +201,10 @@ def test_pivot_df_no_cols_no_rows_two_metrics(): assert ( pivoted.to_markdown() == """ -| | Total (Sum) | -|:---------|----------------:| -| SUM(num) | 8.06797e+07 | -| MAX(num) | 37296 | +| | ('Total (Sum)',) | +|:--------------|-------------------:| +| ('SUM(num)',) | 8.06797e+07 | +| ('MAX(num)',) | 37296 | """.strip() ) @@ -481,9 +225,9 @@ def test_pivot_df_no_cols_no_rows_two_metrics(): assert ( pivoted.to_markdown() == """ -| metric | ('SUM(num)',) | ('MAX(num)',) | ('Total (Sum)',) | -|:------------|----------------:|----------------:|-------------------:| -| Total (Sum) | 8.06797e+07 | 37296 | 8.0717e+07 | +| | ('SUM(num)',) | ('MAX(num)',) | ('Total (Sum)',) | +|:-----------------|----------------:|----------------:|-------------------:| +| ('Total (Sum)',) | 8.06797e+07 | 37296 | 8.0717e+07 | """.strip() ) @@ -524,10 +268,10 @@ def test_pivot_df_single_row_two_metrics(): assert ( pivoted.to_markdown() == """ -| gender | SUM(num) | MAX(num) | -|:---------|-----------:|-----------:| -| boy | 47123 | 1280 | -| girl | 118065 | 2588 | +| | ('SUM(num)',) | ('MAX(num)',) | +|:----------|----------------:|----------------:| +| ('boy',) | 47123 | 1280 | +| ('girl',) | 118065 | 2588 | """.strip() ) @@ -547,9 +291,9 @@ def test_pivot_df_single_row_two_metrics(): assert ( pivoted.to_markdown() == """ -| metric | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | -|:------------|----------------------:|-----------------------:|----------------------:|-----------------------:| -| Total (Sum) | 47123 | 118065 | 1280 | 2588 | +| | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | +|:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:| +| ('Total (Sum)',) | 47123 | 118065 | 1280 | 2588 | """.strip() ) @@ -569,10 +313,10 @@ def test_pivot_df_single_row_two_metrics(): assert ( pivoted.to_markdown() == """ -| gender | SUM(num) | MAX(num) | -|:---------|-----------:|-----------:| -| boy | 47123 | 1280 | -| girl | 118065 | 2588 | +| | ('SUM(num)',) | ('MAX(num)',) | +|:----------|----------------:|----------------:| +| ('boy',) | 47123 | 1280 | +| ('girl',) | 118065 | 2588 | """.strip() ) @@ -616,15 +360,15 @@ def test_pivot_df_single_row_two_metrics(): assert ( pivoted.to_markdown() == """ -| | Total (Sum) | -|:-------------------------|--------------:| -| ('SUM(num)', 'boy') | 47123 | -| ('SUM(num)', 'girl') | 118065 | -| ('SUM(num)', 'Subtotal') | 165188 | -| ('MAX(num)', 'boy') | 1280 | -| ('MAX(num)', 'girl') | 2588 | -| ('MAX(num)', 'Subtotal') | 3868 | -| ('Total (Sum)', '') | 169056 | +| | ('Total (Sum)',) | +|:-------------------------|-------------------:| +| ('SUM(num)', 'boy') | 47123 | +| ('SUM(num)', 'girl') | 118065 | +| ('SUM(num)', 'Subtotal') | 165188 | +| ('MAX(num)', 'boy') | 1280 | +| ('MAX(num)', 'girl') | 2588 | +| ('MAX(num)', 'Subtotal') | 3868 | +| ('Total (Sum)', '') | 169056 | """.strip() ) @@ -644,15 +388,15 @@ def test_pivot_df_single_row_two_metrics(): assert ( pivoted.to_markdown() == """ -| | Total (Sum) | -|:---------------------|--------------:| -| ('boy', 'SUM(num)') | 47123 | -| ('boy', 'MAX(num)') | 1280 | -| ('boy', 'Subtotal') | 48403 | -| ('girl', 'SUM(num)') | 118065 | -| ('girl', 'MAX(num)') | 2588 | -| ('girl', 'Subtotal') | 120653 | -| ('Total (Sum)', '') | 169056 | +| | ('Total (Sum)',) | +|:---------------------|-------------------:| +| ('boy', 'SUM(num)') | 47123 | +| ('boy', 'MAX(num)') | 1280 | +| ('boy', 'Subtotal') | 48403 | +| ('girl', 'SUM(num)') | 118065 | +| ('girl', 'MAX(num)') | 2588 | +| ('girl', 'Subtotal') | 120653 | +| ('Total (Sum)', '') | 169056 | """.strip() ) @@ -797,10 +541,10 @@ def test_pivot_df_complex(): assert ( pivoted.to_markdown() == """ -| state | ('SUM(num)', 'boy', 'Edward') | ('SUM(num)', 'boy', 'Tony') | ('SUM(num)', 'girl', 'Amy') | ('SUM(num)', 'girl', 'Cindy') | ('SUM(num)', 'girl', 'Dawn') | ('SUM(num)', 'girl', 'Sophia') | ('MAX(num)', 'boy', 'Edward') | ('MAX(num)', 'boy', 'Tony') | ('MAX(num)', 'girl', 'Amy') | ('MAX(num)', 'girl', 'Cindy') | ('MAX(num)', 'girl', 'Dawn') | ('MAX(num)', 'girl', 'Sophia') | +| | ('SUM(num)', 'boy', 'Edward') | ('SUM(num)', 'boy', 'Tony') | ('SUM(num)', 'girl', 'Amy') | ('SUM(num)', 'girl', 'Cindy') | ('SUM(num)', 'girl', 'Dawn') | ('SUM(num)', 'girl', 'Sophia') | ('MAX(num)', 'boy', 'Edward') | ('MAX(num)', 'boy', 'Tony') | ('MAX(num)', 'girl', 'Amy') | ('MAX(num)', 'girl', 'Cindy') | ('MAX(num)', 'girl', 'Dawn') | ('MAX(num)', 'girl', 'Sophia') | |:--------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:| -| CA | 31290 | 3765 | 45426 | 14149 | 11403 | 18859 | 1280 | 598 | 2227 | 842 | 1157 | 2588 | -| FL | 9395 | 2673 | 14740 | 1218 | 5089 | 7181 | 389 | 247 | 854 | 217 | 461 | 1187 | +| ('CA',) | 31290 | 3765 | 45426 | 14149 | 11403 | 18859 | 1280 | 598 | 2227 | 842 | 1157 | 2588 | +| ('FL',) | 9395 | 2673 | 14740 | 1218 | 5089 | 7181 | 389 | 247 | 854 | 217 | 461 | 1187 | """.strip() ) @@ -877,20 +621,20 @@ def test_pivot_df_complex(): assert ( pivoted.to_markdown() == """ -| | CA | FL | -|:-------------------------------|------:|------:| -| ('SUM(num)', 'boy', 'Edward') | 31290 | 9395 | -| ('SUM(num)', 'boy', 'Tony') | 3765 | 2673 | -| ('SUM(num)', 'girl', 'Amy') | 45426 | 14740 | -| ('SUM(num)', 'girl', 'Cindy') | 14149 | 1218 | -| ('SUM(num)', 'girl', 'Dawn') | 11403 | 5089 | -| ('SUM(num)', 'girl', 'Sophia') | 18859 | 7181 | -| ('MAX(num)', 'boy', 'Edward') | 1280 | 389 | -| ('MAX(num)', 'boy', 'Tony') | 598 | 247 | -| ('MAX(num)', 'girl', 'Amy') | 2227 | 854 | -| ('MAX(num)', 'girl', 'Cindy') | 842 | 217 | -| ('MAX(num)', 'girl', 'Dawn') | 1157 | 461 | -| ('MAX(num)', 'girl', 'Sophia') | 2588 | 1187 | +| | ('CA',) | ('FL',) | +|:-------------------------------|----------:|----------:| +| ('SUM(num)', 'boy', 'Edward') | 31290 | 9395 | +| ('SUM(num)', 'boy', 'Tony') | 3765 | 2673 | +| ('SUM(num)', 'girl', 'Amy') | 45426 | 14740 | +| ('SUM(num)', 'girl', 'Cindy') | 14149 | 1218 | +| ('SUM(num)', 'girl', 'Dawn') | 11403 | 5089 | +| ('SUM(num)', 'girl', 'Sophia') | 18859 | 7181 | +| ('MAX(num)', 'boy', 'Edward') | 1280 | 389 | +| ('MAX(num)', 'boy', 'Tony') | 598 | 247 | +| ('MAX(num)', 'girl', 'Amy') | 2227 | 854 | +| ('MAX(num)', 'girl', 'Cindy') | 842 | 217 | +| ('MAX(num)', 'girl', 'Dawn') | 1157 | 461 | +| ('MAX(num)', 'girl', 'Sophia') | 2588 | 1187 | """.strip() ) @@ -910,20 +654,20 @@ def test_pivot_df_complex(): assert ( pivoted.to_markdown() == """ -| | CA | FL | -|:-------------------------------|------:|------:| -| ('boy', 'Edward', 'SUM(num)') | 31290 | 9395 | -| ('boy', 'Edward', 'MAX(num)') | 1280 | 389 | -| ('boy', 'Tony', 'SUM(num)') | 3765 | 2673 | -| ('boy', 'Tony', 'MAX(num)') | 598 | 247 | -| ('girl', 'Amy', 'SUM(num)') | 45426 | 14740 | -| ('girl', 'Amy', 'MAX(num)') | 2227 | 854 | -| ('girl', 'Cindy', 'SUM(num)') | 14149 | 1218 | -| ('girl', 'Cindy', 'MAX(num)') | 842 | 217 | -| ('girl', 'Dawn', 'SUM(num)') | 11403 | 5089 | -| ('girl', 'Dawn', 'MAX(num)') | 1157 | 461 | -| ('girl', 'Sophia', 'SUM(num)') | 18859 | 7181 | -| ('girl', 'Sophia', 'MAX(num)') | 2588 | 1187 | +| | ('CA',) | ('FL',) | +|:-------------------------------|----------:|----------:| +| ('boy', 'Edward', 'SUM(num)') | 31290 | 9395 | +| ('boy', 'Edward', 'MAX(num)') | 1280 | 389 | +| ('boy', 'Tony', 'SUM(num)') | 3765 | 2673 | +| ('boy', 'Tony', 'MAX(num)') | 598 | 247 | +| ('girl', 'Amy', 'SUM(num)') | 45426 | 14740 | +| ('girl', 'Amy', 'MAX(num)') | 2227 | 854 | +| ('girl', 'Cindy', 'SUM(num)') | 14149 | 1218 | +| ('girl', 'Cindy', 'MAX(num)') | 842 | 217 | +| ('girl', 'Dawn', 'SUM(num)') | 11403 | 5089 | +| ('girl', 'Dawn', 'MAX(num)') | 1157 | 461 | +| ('girl', 'Sophia', 'SUM(num)') | 18859 | 7181 | +| ('girl', 'Sophia', 'MAX(num)') | 2588 | 1187 | """.strip() ) @@ -940,6 +684,7 @@ def test_pivot_df_complex(): show_columns_total=True, apply_metrics_on_rows=True, ) + print(pivoted.to_markdown()) assert ( pivoted.to_markdown() == """ From 09b9d01556e3ae9675ee4b3455631e34e6a764a8 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Wed, 18 Aug 2021 15:56:32 -0700 Subject: [PATCH 4/4] Fix test --- .../reports/commands_tests.py | 63 ++++++++++++++----- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/tests/integration_tests/reports/commands_tests.py b/tests/integration_tests/reports/commands_tests.py index fef19e88e7f01..733b75a2e52ce 100644 --- a/tests/integration_tests/reports/commands_tests.py +++ b/tests/integration_tests/reports/commands_tests.py @@ -756,19 +756,37 @@ def test_email_chart_report_schedule_with_csv_no_query_context( @patch("superset.utils.csv.urllib.request.urlopen") @patch("superset.utils.csv.urllib.request.OpenerDirector.open") @patch("superset.reports.notifications.email.send_email_smtp") -@patch("superset.utils.csv.get_chart_csv_data") +@patch("superset.utils.csv.get_chart_dataframe") def test_email_chart_report_schedule_with_text( - csv_mock, email_mock, mock_open, mock_urlopen, create_report_email_chart_with_text, + dataframe_mock, + email_mock, + mock_open, + mock_urlopen, + create_report_email_chart_with_text, ): """ ExecuteReport Command: Test chart email report schedule with text """ - # setup csv mock + # setup dataframe mock response = Mock() mock_open.return_value = response mock_urlopen.return_value = response mock_urlopen.return_value.getcode.return_value = 200 - response.read.return_value = CSV_FILE + response.read.return_value = json.dumps( + { + "result": [ + { + "data": { + "t1": {0: "c11", 1: "c21"}, + "t2": {0: "c12", 1: "c22"}, + "t3__sum": {0: "c13", 1: "c23"}, + }, + "colnames": [("t1",), ("t2",), ("t3__sum",)], + "indexnames": [(0,), (1,)], + }, + ], + } + ).encode("utf-8") with freeze_time("2020-01-01T00:00:00Z"): AsyncExecuteReportScheduleCommand( @@ -776,9 +794,10 @@ def test_email_chart_report_schedule_with_text( ).run() # assert that the data is embedded correctly - table_html = """ + table_html = """
+ @@ -786,11 +805,13 @@ def test_email_chart_report_schedule_with_text( + + @@ -908,9 +929,9 @@ def test_slack_chart_report_schedule_with_csv( @patch("superset.reports.notifications.slack.WebClient.chat_postMessage") @patch("superset.utils.csv.urllib.request.urlopen") @patch("superset.utils.csv.urllib.request.OpenerDirector.open") -@patch("superset.utils.csv.get_chart_csv_data") +@patch("superset.utils.csv.get_chart_dataframe") def test_slack_chart_report_schedule_with_text( - csv_mock, + dataframe_mock, mock_open, mock_urlopen, post_message_mock, @@ -919,24 +940,36 @@ def test_slack_chart_report_schedule_with_text( """ ExecuteReport Command: Test chart slack report schedule with text """ - # setup csv mock + # setup dataframe mock response = Mock() mock_open.return_value = response mock_urlopen.return_value = response mock_urlopen.return_value.getcode.return_value = 200 - response.read.return_value = CSV_FILE + response.read.return_value = json.dumps( + { + "result": [ + { + "data": { + "t1": {0: "c11", 1: "c21"}, + "t2": {0: "c12", 1: "c22"}, + "t3__sum": {0: "c13", 1: "c23"}, + }, + "colnames": [("t1",), ("t2",), ("t3__sum",)], + "indexnames": [(0,), (1,)], + }, + ], + } + ).encode("utf-8") with freeze_time("2020-01-01T00:00:00Z"): AsyncExecuteReportScheduleCommand( TEST_ID, create_report_slack_chart_with_text.id, datetime.utcnow() ).run() - table_markdown = """``` -t1 t2 t3__sum ----- ---- --------- -c11 c12 c13 -c21 c22 c23 -```""" + table_markdown = """| | t1 | t2 | t3__sum | +|---:|:-----|:-----|:----------| +| 0 | c11 | c12 | c13 | +| 1 | c21 | c22 | c23 |""" assert table_markdown in post_message_mock.call_args[1]["text"] # Assert logs are correct
t1 t2 t3__sum
0 c11 c12 c13
1 c21 c22 c23