-
Notifications
You must be signed in to change notification settings - Fork 14.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: Apply normalization to all dttm columns #25147
Changes from all commits
8c40e98
cb43efd
c545159
69079d4
a352a8b
1eec421
1a57cb8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -282,10 +282,11 @@ def _get_timestamp_format( | |
datasource = self._qc_datasource | ||
labels = tuple( | ||
label | ||
for label in [ | ||
for label in { | ||
*get_base_axis_labels(query_object.columns), | ||
*[col for col in query_object.columns or [] if isinstance(col, str)], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there risk that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, I don't know if there's such risk but can't hurt to make it bulletproof 👍 |
||
query_object.granularity, | ||
] | ||
} | ||
if datasource | ||
# Query datasource didn't support `get_column` | ||
and hasattr(datasource, "get_column") | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -16,17 +16,24 @@ | |||||
# under the License. | ||||||
from __future__ import annotations | ||||||
|
||||||
from datetime import datetime | ||||||
from typing import Any, TYPE_CHECKING | ||||||
|
||||||
from superset.common.chart_data import ChartDataResultType | ||||||
from superset.common.query_object import QueryObject | ||||||
from superset.common.utils.time_range_utils import get_since_until_from_time_range | ||||||
from superset.utils.core import apply_max_row_limit, DatasourceDict, DatasourceType | ||||||
from superset.utils.core import ( | ||||||
apply_max_row_limit, | ||||||
DatasourceDict, | ||||||
DatasourceType, | ||||||
FilterOperator, | ||||||
QueryObjectFilterClause, | ||||||
) | ||||||
|
||||||
if TYPE_CHECKING: | ||||||
from sqlalchemy.orm import sessionmaker | ||||||
|
||||||
from superset.connectors.base.models import BaseDatasource | ||||||
from superset.connectors.base.models import BaseColumn, BaseDatasource | ||||||
from superset.daos.datasource import DatasourceDAO | ||||||
|
||||||
|
||||||
|
@@ -66,6 +73,10 @@ def create( # pylint: disable=too-many-arguments | |||||
) | ||||||
kwargs["from_dttm"] = from_dttm | ||||||
kwargs["to_dttm"] = to_dttm | ||||||
if datasource_model_instance and kwargs.get("filters", []): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
kwargs["filters"] = self._process_filters( | ||||||
datasource_model_instance, kwargs["filters"] | ||||||
) | ||||||
return QueryObject( | ||||||
datasource=datasource_model_instance, | ||||||
extras=extras, | ||||||
|
@@ -102,3 +113,54 @@ def _process_row_limit( | |||||
# light version of the view.utils.core | ||||||
# import view.utils require application context | ||||||
# Todo: move it and the view.utils.core to utils package | ||||||
|
||||||
def _process_filters( | ||||||
self, datasource: BaseDatasource, query_filters: list[QueryObjectFilterClause] | ||||||
) -> list[QueryObjectFilterClause]: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we returning the filters where in actuality we're mutating the |
||||||
def get_dttm_filter_value( | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A lot of this logic and the logic defined on lines 156-164 was already defined here. Wouldn't it be better (and potentially less error prone) if we adhered to the DRY principle and reused the same helper function. |
||||||
value: Any, col: BaseColumn, date_format: str | ||||||
) -> int | str: | ||||||
if not isinstance(value, int): | ||||||
return value | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||||||
if date_format in {"epoch_ms", "epoch_s"}: | ||||||
if date_format == "epoch_s": | ||||||
value = str(value) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||||||
else: | ||||||
value = str(value * 1000) | ||||||
else: | ||||||
dttm = datetime.utcfromtimestamp(value / 1000) | ||||||
value = dttm.strftime(date_format) | ||||||
|
||||||
if col.type in col.num_types: | ||||||
value = int(value) | ||||||
return value | ||||||
|
||||||
for query_filter in query_filters: | ||||||
if query_filter.get("op") == FilterOperator.TEMPORAL_RANGE: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than the continue I think one if statement (based on truthiness and not falseness) is likely more readable. i.e., if (
query_filter.get("op") != FilterOperator.TEMPORAL_RANGE
and (filter_col := query_filter.get("col"))
and isinstance(filter_col, str)
and (column := datasource.get_column(filter_col))
):
... |
||||||
continue | ||||||
filter_col = query_filter.get("col") | ||||||
if not isinstance(filter_col, str): | ||||||
continue | ||||||
column = datasource.get_column(filter_col) | ||||||
if not column: | ||||||
continue | ||||||
Comment on lines
+144
to
+146
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mentioned during our call that you've added lines 145-146 to make the linter happy. A better way — more explicit — is to do: column = cast(BaseColumn, datasource.get_column(filter_col)) This informs the linter that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn’t it safer to check for None since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I thought |
||||||
filter_value = query_filter.get("val") | ||||||
|
||||||
date_format = column.python_date_format | ||||||
if not date_format and datasource.db_extra: | ||||||
date_format = datasource.db_extra.get( | ||||||
"python_date_format_by_column_name", {} | ||||||
).get(column.column_name) | ||||||
|
||||||
if column.is_dttm and date_format: | ||||||
if isinstance(filter_value, list): | ||||||
query_filter["val"] = [ | ||||||
get_dttm_filter_value(value, column, date_format) | ||||||
for value in filter_value | ||||||
] | ||||||
else: | ||||||
query_filter["val"] = get_dttm_filter_value( | ||||||
filter_value, column, date_format | ||||||
) | ||||||
|
||||||
return query_filters |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the reason for this change, i.e., why are we including a filter which was flagged for removal if it operation is not a temporal range?