ray-project · scottjlee · Nov 13, 2024 · Oct 27, 2024 · Oct 29, 2024 · Oct 29, 2024
@@ -120,6 +120,7 @@ def __init__(
         self,
         input_op: LogicalOperator,
         sort_key: SortKey,
+        batch_format: Optional[str] = "default",
     ):
         super().__init__(
             "Sort",
@@ -131,6 +132,7 @@ def __init__(
             ],
         )
         self._sort_key = sort_key
+        self._batch_format = batch_format
 
     def aggregate_output_metadata(self) -> BlockMetadata:
         assert len(self._input_dependencies) == 1, len(self._input_dependencies)
@@ -145,6 +147,7 @@ def __init__(
         input_op: LogicalOperator,
         key: Optional[str],
         aggs: List[AggregateFn],
+        batch_format: Optional[str] = "default",
     ):
         super().__init__(
             "Aggregate",
@@ -157,3 +160,4 @@ def __init__(
         )
         self._key = key
         self._aggs = aggs
+        self._batch_format = batch_format
@@ -6,6 +6,7 @@
     PhysicalPlan,
     Rule,
 )
+from ray.data._internal.logical.rules.inherit_batch_format import InheritBatchFormatRule
 from ray.data._internal.logical.rules.inherit_target_max_block_size import (
     InheritTargetMaxBlockSizeRule,
 )
@@ -20,6 +21,7 @@
 
 _LOGICAL_RULES = [
     ReorderRandomizeBlocksRule,
+    InheritBatchFormatRule,
 ]
 
 _PHYSICAL_RULES = [

@@ -0,0 +1,42 @@
+from collections import deque
+from typing import Iterable
+
+from ray.data._internal.logical.interfaces import LogicalOperator, LogicalPlan, Rule
+from ray.data._internal.logical.operators.all_to_all_operator import AbstractAllToAll
+from ray.data._internal.logical.operators.map_operator import MapBatches
+
+
+class InheritBatchFormatRule(Rule):
+    """For AbstractAllToAll based operator, apply this rule
+    to inherit batch_format from upstream operator by traversing
+    the entire DAG."""
+
+    def apply(self, plan: LogicalPlan) -> LogicalPlan:
+        optimized_dag: LogicalOperator = self._apply(plan.dag)
+        new_plan = LogicalPlan(dag=optimized_dag, context=plan.context)
+        return new_plan
+
+    def _apply(self, op: LogicalOperator):
+        # Post-order traversal.
+        nodes: Iterable[LogicalOperator] = deque()
+        for node in op.post_order_iter():
+            nodes.appendleft(node)
+
+        while len(nodes) > 0:
+            current_op = nodes.pop()
+
+            if isinstance(current_op, AbstractAllToAll):
+                # traversal up the DAG until we find MapBatches with batch_format
+                # or we reach to source op and do nothing
+                upstream_op = current_op.input_dependencies[0]
+                while upstream_op.input_dependencies:
+                    if (
+                        isinstance(upstream_op, MapBatches)
+                        and upstream_op._batch_format
+                    ):
+                        current_op._batch_format = upstream_op._batch_format
+                        break
+                    upstream_op = upstream_op.input_dependencies[0]
+
+        # just return the default op
+        return op
@@ -24,6 +24,7 @@
 def generate_aggregate_fn(
     key: Optional[str],
     aggs: List[AggregateFn],
+    batch_format: str,
     _debug_limit_shuffle_execution_to_num_blocks: Optional[int] = None,
 ) -> AllToAllTransformFn:
     """Generate function to aggregate blocks by the specified key column or key
@@ -67,6 +68,7 @@ def fn(
             boundaries=boundaries,
             key=key,
             aggs=aggs,
+            batch_format=batch_format,
         )
         if DataContext.get_current().use_push_based_shuffle:
             scheduler = PushBasedShuffleTaskScheduler(agg_spec)

@@ -29,10 +29,11 @@ def __init__(
         boundaries: List[KeyType],
         key: Optional[str],
         aggs: List[AggregateFn],
+        batch_format: str,
     ):
         super().__init__(
             map_args=[boundaries, key, aggs],
-            reduce_args=[key, aggs],
+            reduce_args=[key, aggs, batch_format],
         )
 
     @staticmethod
@@ -62,11 +63,15 @@ def map(
     def reduce(
         key: Optional[str],
         aggs: List[AggregateFn],
+        batch_format: str,
         *mapper_outputs: List[Block],
         partial_reduce: bool = False,
     ) -> Tuple[Block, BlockMetadata]:
-        return BlockAccessor.for_block(mapper_outputs[0]).aggregate_combined_blocks(
-            list(mapper_outputs), key, aggs, finalize=not partial_reduce
+        normalized_blocks = TableBlockAccessor.normalize_block_types(
+            mapper_outputs, normalize_type=batch_format
+        )
+        return BlockAccessor.for_block(normalized_blocks[0]).aggregate_combined_blocks(
+            list(normalized_blocks), key, aggs, finalize=not partial_reduce
         )
 
     @staticmethod

@@ -6,6 +6,7 @@
 from ray.data._internal.planner.exchange.interfaces import ExchangeTaskSpec
 from ray.data._internal.progress_bar import ProgressBar
 from ray.data._internal.remote_fn import cached_remote_fn
+from ray.data._internal.table_block import TableBlockAccessor
 from ray.data.block import Block, BlockAccessor, BlockExecStats, BlockMetadata
 from ray.types import ObjectRef
 
@@ -116,10 +117,11 @@ def __init__(
         self,
         boundaries: List[T],
         sort_key: SortKey,
+        batch_format: str,
     ):
         super().__init__(
             map_args=[boundaries, sort_key],
-            reduce_args=[sort_key],
+            reduce_args=[sort_key, batch_format],
         )
 
     @staticmethod
@@ -138,11 +140,15 @@ def map(
     @staticmethod
     def reduce(
         sort_key: SortKey,
+        batch_format: str,
         *mapper_outputs: List[Block],
         partial_reduce: bool = False,
     ) -> Tuple[Block, BlockMetadata]:
-        return BlockAccessor.for_block(mapper_outputs[0]).merge_sorted_blocks(
-            mapper_outputs, sort_key
+        normalized_blocks = TableBlockAccessor.normalize_block_types(
+            mapper_outputs, normalize_type=batch_format
+        )
+        return BlockAccessor.for_block(normalized_blocks[0]).merge_sorted_blocks(
+            normalized_blocks, sort_key
         )
 
     @staticmethod

@@ -71,7 +71,9 @@ def plan_all_to_all_op(
                 "debug_limit_shuffle_execution_to_num_blocks", None
             )
         )
-        fn = generate_sort_fn(op._sort_key, debug_limit_shuffle_execution_to_num_blocks)
+        fn = generate_sort_fn(
+            op._sort_key, op._batch_format, debug_limit_shuffle_execution_to_num_blocks
+        )
         target_max_block_size = DataContext.get_current().target_shuffle_max_block_size
     elif isinstance(op, Aggregate):
         debug_limit_shuffle_execution_to_num_blocks = (
@@ -80,7 +82,10 @@ def plan_all_to_all_op(
             )
         )
         fn = generate_aggregate_fn(
-            op._key, op._aggs, debug_limit_shuffle_execution_to_num_blocks
+            op._key,
+            op._aggs,
+            op._batch_format,
+            debug_limit_shuffle_execution_to_num_blocks,
         )
         target_max_block_size = DataContext.get_current().target_shuffle_max_block_size
     else:

@@ -20,6 +20,7 @@
 
 def generate_sort_fn(
     sort_key: SortKey,
+    batch_format: str,
     _debug_limit_shuffle_execution_to_num_blocks: Optional[int] = None,
 ) -> AllToAllTransformFn:
     """Generate function to sort blocks by the specified key column or key function."""
@@ -56,7 +57,9 @@ def fn(
         _, ascending = sort_key.to_pandas_sort_args()
         if not ascending:
             boundaries.reverse()
-        sort_spec = SortTaskSpec(boundaries=boundaries, sort_key=sort_key)
+        sort_spec = SortTaskSpec(
+            boundaries=boundaries, sort_key=sort_key, batch_format=batch_format
+        )
 
         if DataContext.get_current().use_push_based_shuffle:
             scheduler = PushBasedShuffleTaskScheduler(sort_spec)

@@ -1172,6 +1172,75 @@ def test_sort_validate_keys(ray_start_regular_shared):
         ds_named.sort(invalid_col_name).take_all()
 
 
+def test_inherit_batch_format_rule():
+    from ray.data._internal.logical.rules.inherit_batch_format import (
+        InheritBatchFormatRule,
+    )
+
+    ctx = DataContext.get_current()
+
+    operator1 = get_parquet_read_logical_op()
+    operator2 = MapBatches(operator1, fn=lambda g: g, batch_format="pandas")
+    sort_key = SortKey("number", descending=True)
+    operator3 = Sort(operator2, sort_key)
+    original_plan = LogicalPlan(dag=operator3, context=ctx)
+
+    rule = InheritBatchFormatRule()
+    optimized_plan = rule.apply(original_plan)
+    assert optimized_plan.dag._batch_format == "pandas"
+
+
+def test_batch_format_on_sort(ray_start_regular_shared):
+    """Checks that the Sort op can inherit batch_format from upstream ops correctly."""
+    ds = ray.data.from_items(
+        [
+            {"col1": 1, "col2": 2},
+            {"col1": 1, "col2": 4},
+            {"col1": 5, "col2": 6},
+            {"col1": 7, "col2": 8},
+        ]
+    )
+    df_expected = pd.DataFrame(
+        {
+            "col1": [7, 5, 1, 1],
+            "col2": [8, 6, 4, 2],
+        }
+    )
+    df_actual = (
+        ds.groupby("col1")
+        .map_groups(lambda g: g, batch_format="pandas")
+        .sort("col2", descending=True)
+        .to_pandas()
+    )
+    pd.testing.assert_frame_equal(df_actual, df_expected)
+
+
+def test_batch_format_on_aggregate(ray_start_regular_shared):
+    """Checks that the Aggregate op can inherit batch_format
+    from upstream ops correctly."""
+    from ray.data.aggregate import AggregateFn
+
+    ds = ray.data.from_items(
+        [
+            {"col1": 1, "col2": 2},
+            {"col1": 1, "col2": 4},
+            {"col1": 5, "col2": 6},
+            {"col1": 7, "col2": 8},
+        ]
+    )
+    aggregation = AggregateFn(
+        init=lambda column: 1,
+        accumulate_row=lambda a, row: a * row["col2"],
+        merge=lambda a1, a2: a1 * a2,
+        name="prod",
+    )
+    assert (
+        ds.groupby("col1")
+        .map_groups(lambda g: g, batch_format="pandas")
+        .aggregate(aggregation)
+    ) == {"prod": 384}
+
+
 def test_aggregate_operator(ray_start_regular_shared):
     ctx = DataContext.get_current()