Skip to content

Commit

Permalink
fix: Fixing pinot query generation for date format conversion from py…
Browse files Browse the repository at this point in the history
…thon datetime format to java simple date format (#13163)

* Fixing pinot query generation for date format conversion from python datetime format to java simple date format

* Address comments

* fix test

Co-authored-by: Ville Brofeldt <[email protected]>
  • Loading branch information
xiangfu0 and villebro authored Feb 20, 2021
1 parent eea5fe3 commit 786c12d
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 12 deletions.
26 changes: 16 additions & 10 deletions superset/db_engine_specs/pinot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import datetime
from typing import Dict, List, Optional

from sqlalchemy.sql.expression import ColumnClause, ColumnElement
Expand Down Expand Up @@ -69,38 +68,45 @@ def get_timestamp_expr(
time_grain: Optional[str],
type_: Optional[str] = None,
) -> TimestampExpression:
if not pdf:
raise NotImplementedError(f"Empty date format for '{col}'")
is_epoch = pdf in ("epoch_s", "epoch_ms")

# The DATETIMECONVERT pinot udf is documented at
# Per https://github.com/apache/incubator-pinot/wiki/dateTimeConvert-UDF
# We are not really converting any time units, just bucketing them.
tf = ""
java_date_format = ""
if not is_epoch:
try:
today = datetime.datetime.today()
today.strftime(str(pdf))
except ValueError:
raise ValueError(f"Invalid column datetime format:{str(pdf)}")
java_date_format = str(pdf)
java_date_format = pdf
for (
python_pattern,
java_pattern,
) in cls._python_to_java_time_patterns.items():
java_date_format.replace(python_pattern, java_pattern)
java_date_format = java_date_format.replace(
python_pattern, java_pattern
)
tf = f"1:SECONDS:SIMPLE_DATE_FORMAT:{java_date_format}"
else:
seconds_or_ms = "MILLISECONDS" if pdf == "epoch_ms" else "SECONDS"
tf = f"1:{seconds_or_ms}:EPOCH"
if time_grain:
granularity = cls.get_time_grain_expressions().get(time_grain)
if not granularity:
raise NotImplementedError("No pinot grain spec for " + str(time_grain))
raise NotImplementedError(f"No pinot grain spec for '{time_grain}'")
else:
return TimestampExpression("{{col}}", col)

# In pinot the output is a string since there is no timestamp column like pg
if cls._use_date_trunc_function.get(time_grain):
time_expr = f"DATETRUNC('{granularity}', {{col}}, '{seconds_or_ms}')"
if is_epoch:
time_expr = f"DATETRUNC('{granularity}', {{col}}, '{seconds_or_ms}')"
else:
time_expr = (
f"ToDateTime(DATETRUNC('{granularity}', "
+ f"FromDateTime({{col}}, '{java_date_format}'), "
+ f"'MILLISECONDS'), '{java_date_format}')"
)
else:
time_expr = f"DATETIMECONVERT({{col}}, '{tf}', '{tf}', '{granularity}')"

Expand Down
38 changes: 36 additions & 2 deletions tests/db_engine_specs/pinot_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,46 @@ def test_pinot_time_expression_sec_one_1d_grain(self):
self.assertEqual(
result,
"DATETIMECONVERT(tstamp, '1:SECONDS:EPOCH', '1:SECONDS:EPOCH', '1:DAYS')",
) # noqa
)

def test_pinot_time_expression_simple_date_format_1d_grain(self):
col = column("tstamp")
expr = PinotEngineSpec.get_timestamp_expr(col, "%Y-%m-%d %H:%M:%S", "P1D")
result = str(expr.compile())
self.assertEqual(
result,
(
"DATETIMECONVERT(tstamp, "
+ "'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', "
+ "'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:DAYS')"
),
)

def test_pinot_time_expression_simple_date_format_1w_grain(self):
col = column("tstamp")
expr = PinotEngineSpec.get_timestamp_expr(col, "%Y-%m-%d %H:%M:%S", "P1W")
result = str(expr.compile())
self.assertEqual(
result,
(
"ToDateTime(DATETRUNC('week', FromDateTime(tstamp, "
+ "'yyyy-MM-dd HH:mm:ss'), 'MILLISECONDS'), 'yyyy-MM-dd HH:mm:ss')"
),
)

def test_pinot_time_expression_sec_one_1m_grain(self):
col = column("tstamp")
expr = PinotEngineSpec.get_timestamp_expr(col, "epoch_s", "P1M")
result = str(expr.compile())
self.assertEqual(
result, "DATETRUNC('month', tstamp, 'SECONDS')",
) # noqa
)

def test_invalid_get_time_expression_arguments(self):
with self.assertRaises(NotImplementedError):
PinotEngineSpec.get_timestamp_expr(column("tstamp"), None, "P1M")

with self.assertRaises(NotImplementedError):
PinotEngineSpec.get_timestamp_expr(
column("tstamp"), "epoch_s", "invalid_grain"
)

0 comments on commit 786c12d

Please sign in to comment.