Revert "[ca] trace saved variable unpacking (pytorch#147242)"

pytorchmergebot · pytorchmergebot · commit 90e3a3d86d61 · 2025-02-26T00:40:16.000Z
This reverts commit 68ddca9. Reverted pytorch#147242 on behalf of https://github.com/wdvr due to failing tests in the slow workflow, see below ([comment](pytorch#147242 (comment)))
diff --git a/test/inductor/test_compiled_autograd.py b/test/inductor/test_compiled_autograd.py
diff --git a/torch/_dynamo/compiled_autograd.py b/torch/_dynamo/compiled_autograd.py
@@ -134,7 +134,7 @@ def __repr__(self):
 ops = OpNamespace()
 
 
-_graph_placeholders = ["inputs", "sizes", "scalars", "hooks", "packed_data"]
+_graph_placeholders = ["inputs", "sizes", "scalars", "hooks"]
 _impure_targets = OrderedSet(
     [
         call_hook,
@@ -206,13 +206,7 @@ def begin_capture(
         self.fx_tracer.graph = torch.fx.Graph(tracer_cls=PythonKeyTracer)
         self.fx_tracer.tensor_attrs = {}
         self.symnode_proxy_lookup = {}
-        (
-            args_proxy,
-            self.sizes_proxy,
-            self.scalars_proxy,
-            self.hooks_proxy,
-            self.packed_data_proxy,
-        ) = (
+        args_proxy, self.sizes_proxy, self.scalars_proxy, self.hooks_proxy = (
             self.fx_tracer.create_proxy("placeholder", name, (), {})
             for name in _graph_placeholders
         )
@@ -274,12 +268,7 @@ def begin_capture(
         self.stack.enter_context(
             torch.fx.experimental.symbolic_shapes._suppress_guards(env)
         )
-        return (
-            str(CompileContext.current_compile_id()),
-            inputs,
-            sizes,
-            scalars,
-        )
+        return str(CompileContext.current_compile_id()), inputs, sizes, scalars
 
     def log_compile_reasons(
         self,
@@ -578,19 +567,6 @@ def proxy_call_hook(self, hook, *args, **kwargs):
             kwargs,
         )
 
-    def unpack_hook(self, hook_id, data_id):
-        assert self.hooks_proxy is not None
-        hook = self.hooks_proxy[hook_id]  # type: ignore[index]
-        data = self.packed_data_proxy[data_id]  # type: ignore[index]
-        proxy = self.proxy_call_hook(
-            hook,
-            data,
-            hook_type="unpack_hook",
-        )
-        out = self.allocate_dummy()
-        self.bind_objects_to_proxies([out], [proxy])
-        return out
-
     def tensor_pre_hook(self, inputs, hook_id, i: int):
         assert self.hooks_proxy is not None
         hook = self.hooks_proxy[hook_id]  # type: ignore[index]
@@ -730,9 +706,6 @@ def is_impure(node):
         after = len(self.fx_tracer.graph.nodes)
         verbose_log.debug("DCE removed %d nodes", before - after)
 
-    def create_graph_module(self, id):
-        return GraphModule(self.fx_tracer.root, self.fx_tracer.graph, id)
-
     def end_capture(self, outputs):
         self.fx_tracer.create_proxy(
             "call_function",
@@ -772,7 +745,6 @@ def end_capture(self, outputs):
             ).print_readable(print_output=False),
         )
         self.rename_aot_dispatcher_nodes()
-        self.delay_unpack_hook_nodes()
         self.reorder_tensor_pre_hook_nodes()
         self.reorder_pre_hook_nodes_to_schedule_asap()
         self.reorder_accumulate_grad_nodes()
@@ -791,7 +763,9 @@ def end_capture(self, outputs):
         # should prevent these ops from going into the CA graph.
         self.dce()
 
-        graph = self.create_graph_module(f"CompiledAutograd{self.id}")
+        graph = GraphModule(
+            self.fx_tracer.root, self.fx_tracer.graph, f"CompiledAutograd{self.id}"
+        )
         set_locals_to_steal(graph, ["inputs"])
         lazy_graph_code = lazy_format_graph_code(
             "Compiled autograd graph",
@@ -807,15 +781,15 @@ def end_capture(self, outputs):
             payload_fn=lambda: graph.print_readable(print_output=False),
         )
 
-        def runtime_wrapper(compiled_fn, inputs, sizes, scalars, hooks, packed_inputs):
+        def runtime_wrapper(compiled_fn, inputs, sizes, scalars, hooks):
             global in_compiled_autograd_region
             try:
                 in_compiled_autograd_region = True
                 for i in runtime_inputs_to_move:
                     inputs[i] = inputs[i].pin_memory().cuda(non_blocking=True)
 
                 with _disable(), make_compile_context(self.id):
-                    return compiled_fn(inputs, sizes, scalars, hooks, packed_inputs)
+                    return compiled_fn(inputs, sizes, scalars, hooks)
             finally:
                 in_compiled_autograd_region = False
 
@@ -964,19 +938,6 @@ def reorder_accumulate_grad_nodes(self):
                 if getitem_node is not None:
                     arg.append(getitem_node)
 
-    def delay_unpack_hook_nodes(self):
-        """
-        We can delay unpack hooks until they are needed, even later than in the eager autograd engine.
-        """
-        for node in self.fx_tracer.graph.find_nodes(
-            op="call_function", target=call_hook
-        ):
-            if node.kwargs.get("hook_type", None) != "unpack_hook":
-                continue
-
-            first_user = min(node.users)
-            first_user.prepend(node)
-
     def reorder_tensor_pre_hook_nodes(self):
         """
         Usage of AOTAutograd causes all the tensor_pre_hook nodes to get pushed
diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp
@@ -1334,7 +1334,6 @@ auto Engine::execute(
         !AnomalyMode::is_enabled(),
         "compiled_autograd does not support AnomalyMode")
     GraphTaskGuard guard(graph_task);
-    CheckpointValidGuard cpvguard(graph_task);
     return (*compiled_autograd)(
         graph_root, *graph_task, accumulate_grad, outputs);
   }
diff --git a/torch/csrc/autograd/python_saved_variable_hooks.cpp b/torch/csrc/autograd/python_saved_variable_hooks.cpp
@@ -46,15 +46,6 @@ at::Tensor PySavedVariableHooks::call_unpack_hook() {
   // unpack_hook_ will be manually decrefed when the saved variable is released
 }
 
-std::optional<std::pair<c10::SafePyObject, c10::SafePyObject>>
-PySavedVariableHooks::retrieve_unpack_hook_data() const {
-  Py_INCREF(unpack_hook_);
-  Py_INCREF(data_);
-  return std::make_pair(
-      c10::SafePyObject(unpack_hook_, getPyInterpreter()),
-      c10::SafePyObject(data_, getPyInterpreter()));
-}
-
 // NOLINTNEXTLINE(bugprone-exception-escape)
 PySavedVariableHooks::~PySavedVariableHooks() {
   // If python is already dead, leak the wrapped python objects
diff --git a/torch/csrc/autograd/python_saved_variable_hooks.h b/torch/csrc/autograd/python_saved_variable_hooks.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <ATen/ATen.h>
-#include <c10/core/SafePyObject.h>
 #include <pybind11/pybind11.h>
 #include <torch/csrc/Export.h>
 #include <torch/csrc/autograd/python_variable.h>
@@ -18,8 +17,6 @@ struct PySavedVariableHooks : public SavedVariableHooks {
   void call_pack_hook(const at::Tensor& tensor) override;
   at::Tensor call_unpack_hook() override;
   ~PySavedVariableHooks() override;
-  std::optional<std::pair<c10::SafePyObject, c10::SafePyObject>>
-  retrieve_unpack_hook_data() const override;
 
  private:
   PyObject* pack_hook_;
diff --git a/torch/csrc/autograd/saved_variable.cpp b/torch/csrc/autograd/saved_variable.cpp
@@ -59,7 +59,6 @@ SavedVariable::SavedVariable(
     if (maybe_hooks && !variable.unsafeGetTensorImpl()->is_wrapped_number()) {
       save_metadata(variable);
       set_hooks_and_pack_data(std::move(maybe_hooks), variable);
-      TORCH_INTERNAL_ASSERT(!data_.defined());
       return;
     }
 
@@ -135,14 +134,9 @@ Variable SavedVariable::unpack(std::shared_ptr<Node> saved_for) const {
   // We want grad_fn here to provide the most helpful debug message to the user
   // if versions don't match
 
-  std::shared_ptr<Node> grad_fn;
-  if (is_inplace_on_view_) {
-    grad_fn = weak_grad_fn_.lock();
-  } else if (!hooks_) {
-    grad_fn = saved_original_ ? data_.grad_fn() : nullptr;
-  } else {
-    grad_fn = grad_fn_;
-  }
+  auto grad_fn = is_inplace_on_view_ ? weak_grad_fn_.lock()
+      : !hooks_ ? saved_original_ ? data_.grad_fn() : nullptr
+                : grad_fn_;
 
   if (!is_leaf_ && !grad_fn) {
     // This issue was introduced when we added logic to save the original
diff --git a/torch/csrc/autograd/saved_variable.h b/torch/csrc/autograd/saved_variable.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <c10/core/SafePyObject.h>
 #include <torch/csrc/Export.h>
 #include <torch/csrc/autograd/forward_grad.h>
 #include <torch/csrc/autograd/saved_variable_hooks.h>
@@ -54,15 +53,6 @@ class TORCH_API SavedVariable {
     return (bool)hooks_;
   }
 
-  // Used by compiled autograd
-  std::optional<std::pair<c10::SafePyObject, c10::SafePyObject>>
-  retrieve_unpack_hook_data() const {
-    if (!hooks_) {
-      return std::nullopt;
-    }
-    return hooks_->retrieve_unpack_hook_data();
-  }
-
  private:
   // This field contains either:
   // 1. the variable to save
diff --git a/torch/csrc/autograd/saved_variable_hooks.h b/torch/csrc/autograd/saved_variable_hooks.h
@@ -1,19 +1,13 @@
 #pragma once
 
 #include <ATen/core/Tensor.h>
-#include <c10/core/SafePyObject.h>
 
 namespace torch::autograd {
 
 struct TORCH_API SavedVariableHooks {
   virtual void call_pack_hook(const at::Tensor& tensor) = 0;
   virtual at::Tensor call_unpack_hook() = 0;
   virtual ~SavedVariableHooks() = default;
-  virtual std::optional<std::pair<c10::SafePyObject, c10::SafePyObject>>
-  retrieve_unpack_hook_data() const {
-    throw std::runtime_error(
-        "Compiled Autograd only supports python saved tensor hooks ");
-  }
 };
 
 } // namespace torch::autograd
diff --git a/torch/csrc/dynamo/compiled_autograd.h b/torch/csrc/dynamo/compiled_autograd.h
diff --git a/torch/csrc/dynamo/python_compiled_autograd.cpp b/torch/csrc/dynamo/python_compiled_autograd.cpp

Original file line number	Diff line number	Diff line change
`@@ -1334,7 +1334,6 @@ auto Engine::execute(`
`1334`	`1334`	`!AnomalyMode::is_enabled(),`
`1335`	`1335`	`"compiled_autograd does not support AnomalyMode")`
`1336`	`1336`	`GraphTaskGuard guard(graph_task);`
`1337`		`- CheckpointValidGuard cpvguard(graph_task);`
`1338`	`1337`	`return (*compiled_autograd)(`
`1339`	`1338`	`graph_root, *graph_task, accumulate_grad, outputs);`
`1340`	`1339`	`}`