Skip to content

Commit

Permalink
Trying to create a simpler test case for query #14.
Browse files Browse the repository at this point in the history
  • Loading branch information
EpsilonPrime committed Jul 30, 2024
1 parent 048b099 commit 92bac4d
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
6 changes: 4 additions & 2 deletions src/gateway/converter/spark_to_substrait.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,6 @@ def convert_expression(self, expr: spark_exprs_pb2.Expression) -> algebra_pb2.Ex
"""Convert a SparkConnect expression to a Substrait expression."""
parent_processing_mode = self._expression_processing_mode
try:
if parent_processing_mode == ExpressionProcessingMode.AGGR_NOT_TOP_LEVEL:
self._expression_processing_mode = ExpressionProcessingMode.AGGR_UNDER_AGGREGATE
match expr.WhichOneof('expr_type'):
case 'literal':
result = self.convert_literal_expression(expr.literal)
Expand All @@ -572,7 +570,11 @@ def convert_expression(self, expr: spark_exprs_pb2.Expression) -> algebra_pb2.Ex
case 'unresolved_attribute':
result = self.convert_unresolved_attribute(expr.unresolved_attribute)
case 'unresolved_function':
if parent_processing_mode == ExpressionProcessingMode.AGGR_NOT_TOP_LEVEL:
self._expression_processing_mode = ExpressionProcessingMode.AGGR_UNDER_AGGREGATE
result = self.convert_unresolved_function(expr.unresolved_function)
if parent_processing_mode == ExpressionProcessingMode.AGGR_NOT_TOP_LEVEL:
self._expression_processing_mode = parent_processing_mode
if isinstance(result, algebra_pb2.AggregateFunction):
match parent_processing_mode:
case ExpressionProcessingMode.AGGR_TOP_LEVEL:
Expand Down
20 changes: 19 additions & 1 deletion src/gateway/tests/test_dataframe_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2398,7 +2398,6 @@ def test_try_sum(self, numbers_dataframe):
assertDataFrameEqual(outcome, expected)


@pytest.mark.interesting
class TestDataFrameAggregateBehavior:
"""Tests aggregation behavior using the dataframe side of SparkConnect."""

Expand Down Expand Up @@ -2467,6 +2466,25 @@ def test_computation_with_two_aggregations(self, register_tpch_dataset, spark_se

assertDataFrameEqual(outcome, expected)

@pytest.mark.interesting
def test_computation_with_two_aggregations_and_internal_calculation(
self, register_tpch_dataset, spark_session):
expected = [
Row(l_suppkey=1, a=3903113211864.30),
Row(l_suppkey=2, a=2883714563527.20),
Row(l_suppkey=3, a=3065404357629.60),
]

with utilizes_valid_plans(spark_session):
lineitem = spark_session.table('lineitem')

outcome = lineitem.groupBy('l_suppkey').agg(
try_sum(col('l_extendedprice') * 10) *
try_sum(col('l_quantity'))
).orderBy('l_suppkey').limit(3).collect()

assertDataFrameEqual(outcome, expected)

def test_multiple_measures(self, register_tpch_dataset, spark_session):
expected = [
Row(i=1, a=229583290946.9698, b=229589292160.9698, c=229595293374.96982),
Expand Down

0 comments on commit 92bac4d

Please sign in to comment.