Skip to content

Commit

Permalink
Fixing pinot query generation for date format conversion from python …
Browse files Browse the repository at this point in the history
…datetime format to java simple date format
  • Loading branch information
xiangfu0 committed Feb 17, 2021
1 parent bc4c837 commit 43732c3
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
20 changes: 16 additions & 4 deletions superset/db_engine_specs/pinot.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,38 +69,50 @@ def get_timestamp_expr(
time_grain: Optional[str],
type_: Optional[str] = None,
) -> TimestampExpression:
if not pdf:
raise NotImplementedError(f"Empty date format for '{col}'")
is_epoch = pdf in ("epoch_s", "epoch_ms")

# The DATETIMECONVERT pinot udf is documented at
# Per https://github.com/apache/incubator-pinot/wiki/dateTimeConvert-UDF
# We are not really converting any time units, just bucketing them.
tf = ""
java_date_format = ""
if not is_epoch:
try:
today = datetime.datetime.today()
today.strftime(str(pdf))
except ValueError:
raise ValueError(f"Invalid column datetime format:{str(pdf)}")
java_date_format = str(pdf)
java_date_format = pdf
for (
python_pattern,
java_pattern,
) in cls._python_to_java_time_patterns.items():
java_date_format.replace(python_pattern, java_pattern)
java_date_format = java_date_format.replace(
python_pattern, java_pattern
)
tf = f"1:SECONDS:SIMPLE_DATE_FORMAT:{java_date_format}"
else:
seconds_or_ms = "MILLISECONDS" if pdf == "epoch_ms" else "SECONDS"
tf = f"1:{seconds_or_ms}:EPOCH"
if time_grain:
granularity = cls.get_time_grain_expressions().get(time_grain)
if not granularity:
raise NotImplementedError("No pinot grain spec for " + str(time_grain))
raise NotImplementedError(f"No pinot grain spec for '{time_grain}'")
else:
return TimestampExpression("{{col}}", col)

# In pinot the output is a string since there is no timestamp column like pg
if cls._use_date_trunc_function.get(time_grain):
time_expr = f"DATETRUNC('{granularity}', {{col}}, '{seconds_or_ms}')"
if is_epoch:
time_expr = f"DATETRUNC('{granularity}', {{col}}, '{seconds_or_ms}')"
else:
time_expr = (
f"ToDateTime(DATETRUNC('{granularity}', "
+ f"FromDateTime({{col}}, '{java_date_format}'), "
+ f"'MILLISECONDS'), '{java_date_format}')"
)
else:
time_expr = f"DATETIMECONVERT({{col}}, '{tf}', '{tf}', '{granularity}')"

Expand Down
25 changes: 25 additions & 0 deletions tests/db_engine_specs/pinot_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,31 @@ def test_pinot_time_expression_sec_one_1d_grain(self):
"DATETIMECONVERT(tstamp, '1:SECONDS:EPOCH', '1:SECONDS:EPOCH', '1:DAYS')",
) # noqa

def test_pinot_time_expression_simple_date_format_1d_grain(self):
col = column("tstamp")
expr = PinotEngineSpec.get_timestamp_expr(col, "%Y-%m-%d %H:%M:%S", "P1D")
result = str(expr.compile())
self.assertEqual(
result,
(
"DATETIMECONVERT(tstamp, "
+ "'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', "
+ "'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:DAYS')"
),
) # noqa

def test_pinot_time_expression_simple_date_format_1w_grain(self):
col = column("tstamp")
expr = PinotEngineSpec.get_timestamp_expr(col, "%Y-%m-%d %H:%M:%S", "P1W")
result = str(expr.compile())
self.assertEqual(
result,
(
"ToDateTime(DATETRUNC('week', FromDateTime(tstamp, "
+ "'yyyy-MM-dd HH:mm:ss'), 'MILLISECONDS'), 'yyyy-MM-dd HH:mm:ss')"
),
) # noqa

def test_pinot_time_expression_sec_one_1m_grain(self):
col = column("tstamp")
expr = PinotEngineSpec.get_timestamp_expr(col, "epoch_s", "P1M")
Expand Down

0 comments on commit 43732c3

Please sign in to comment.