Trying to create a simpler test case for query #14.

voltrondata · Jul 30, 2024 · 92bac4d · 92bac4d
1 parent 048b099
commit 92bac4d
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 3 deletions.
diff --git a/src/gateway/converter/spark_to_substrait.py b/src/gateway/converter/spark_to_substrait.py
@@ -561,8 +561,6 @@ def convert_expression(self, expr: spark_exprs_pb2.Expression) -> algebra_pb2.Ex
         """Convert a SparkConnect expression to a Substrait expression."""
         parent_processing_mode = self._expression_processing_mode
         try:
-            if parent_processing_mode == ExpressionProcessingMode.AGGR_NOT_TOP_LEVEL:
-                self._expression_processing_mode = ExpressionProcessingMode.AGGR_UNDER_AGGREGATE
             match expr.WhichOneof('expr_type'):
                 case 'literal':
                     result = self.convert_literal_expression(expr.literal)
@@ -572,7 +570,11 @@ def convert_expression(self, expr: spark_exprs_pb2.Expression) -> algebra_pb2.Ex
                 case 'unresolved_attribute':
                     result = self.convert_unresolved_attribute(expr.unresolved_attribute)
                 case 'unresolved_function':
+                    if parent_processing_mode == ExpressionProcessingMode.AGGR_NOT_TOP_LEVEL:
+                        self._expression_processing_mode = ExpressionProcessingMode.AGGR_UNDER_AGGREGATE
                     result = self.convert_unresolved_function(expr.unresolved_function)
+                    if parent_processing_mode == ExpressionProcessingMode.AGGR_NOT_TOP_LEVEL:
+                        self._expression_processing_mode = parent_processing_mode
                     if isinstance(result, algebra_pb2.AggregateFunction):
                         match parent_processing_mode:
                             case ExpressionProcessingMode.AGGR_TOP_LEVEL:

diff --git a/src/gateway/tests/test_dataframe_api.py b/src/gateway/tests/test_dataframe_api.py
@@ -2398,7 +2398,6 @@ def test_try_sum(self, numbers_dataframe):
         assertDataFrameEqual(outcome, expected)
 
 
-@pytest.mark.interesting
 class TestDataFrameAggregateBehavior:
     """Tests aggregation behavior using the dataframe side of SparkConnect."""
 
@@ -2467,6 +2466,25 @@ def test_computation_with_two_aggregations(self, register_tpch_dataset, spark_se
 
         assertDataFrameEqual(outcome, expected)
 
+    @pytest.mark.interesting
+    def test_computation_with_two_aggregations_and_internal_calculation(
+            self, register_tpch_dataset, spark_session):
+        expected = [
+            Row(l_suppkey=1, a=3903113211864.30),
+            Row(l_suppkey=2, a=2883714563527.20),
+            Row(l_suppkey=3, a=3065404357629.60),
+        ]
+
+        with utilizes_valid_plans(spark_session):
+            lineitem = spark_session.table('lineitem')
+
+            outcome = lineitem.groupBy('l_suppkey').agg(
+                try_sum(col('l_extendedprice') * 10) *
+                try_sum(col('l_quantity'))
+            ).orderBy('l_suppkey').limit(3).collect()
+
+        assertDataFrameEqual(outcome, expected)
+
     def test_multiple_measures(self, register_tpch_dataset, spark_session):
         expected = [
             Row(i=1, a=229583290946.9698, b=229589292160.9698, c=229595293374.96982),