Inline compile_to_fn at its only call site (pytorch#141691)

ezyang · pytorchmergebot · commit b97a78612561 · 2024-11-29T01:15:38.000Z
Stacked on pytorch#141689 Signed-off-by: Edward Z. Yang <ezyang@meta.com> Pull Request resolved: pytorch#141691 Approved by: https://github.com/jansel ghstack dependencies: pytorch#141681, pytorch#141683, pytorch#141685, pytorch#141688, pytorch#141689
diff --git a/test/inductor/test_codecache.py b/test/inductor/test_codecache.py
@@ -673,45 +673,24 @@ def test_inductor_counters(self):
         """
         Test that we bump the inductor counters on a cache hit.
         """
-        compile_to_fn = GraphLowering.compile_to_fn
 
-        counter_name = "a_test_counter"
-        counter_incr = 7
-
-        def bump_counter(self):
-            # Mock that bumps some arbitrary test counter by a set amount, then calls
-            # the original GraphLowering.compile_to_fn.
-            counters["inductor"][counter_name] += counter_incr
-            return compile_to_fn(self)
-
-        with mock.patch.object(GraphLowering, "compile_to_fn", bump_counter):
-
-            def fn(a, b):
-                return torch.mm(a, b)
+        def fn(a, b):
+            return torch.mm(a, b)
 
-            a = torch.rand(8, 32, device="cpu")
-            b = torch.rand(32, 8, device="cpu")
+        a = torch.rand(8, 32, device="cpu")
+        b = torch.rand(32, 8, device="cpu")
 
-            compiled_fn = torch.compile(fn)
+        compiled_fn = torch.compile(fn)
 
-            # Verify the "miss" case.
-            counter_val = 2
-            counters["inductor"][counter_name] = counter_val
-            self.assertEqual(fn(a, b), compiled_fn(a, b))
-            self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 0)
-            self.assertEqual(
-                counters["inductor"][counter_name], counter_val + counter_incr
-            )
+        # Verify the "miss" case.
+        self.assertEqual(fn(a, b), compiled_fn(a, b))
+        self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 0)
 
-            # Verify the "hit" case.
-            self.reset()
-            counter_val = 5
-            counters["inductor"][counter_name] = counter_val
-            self.assertEqual(fn(a, b), compiled_fn(a, b))
-            self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 1)
-            self.assertEqual(
-                counters["inductor"][counter_name], counter_val + counter_incr
-            )
+        # Verify the "hit" case.
+        self.reset()
+        counter_val = 5
+        self.assertEqual(fn(a, b), compiled_fn(a, b))
+        self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 1)
 
     @config.patch({"fx_graph_cache": True})
     @config.patch({"fx_graph_remote_cache": False})
diff --git a/torch/_inductor/compile_fx.py b/torch/_inductor/compile_fx.py
@@ -52,7 +52,7 @@
 )
 from torch._functorch import config as functorch_config
 from torch._functorch.aot_autograd import aot_export_module, make_boxed_func
-from torch._inductor.codecache import code_hash, FxGraphCache
+from torch._inductor.codecache import code_hash, FxGraphCache, output_code_log
 from torch._inductor.cudagraph_utils import BoxedDeviceIndex, PlaceholderInfo
 from torch._inductor.debug import save_args_for_compile_fx_inner
 from torch._inductor.output_code import (
@@ -978,7 +978,45 @@ def log_graph_runnable() -> str:
                             output_strides.append(None)
 
                 _check_triton_bf16_support(graph)
-                compiled_fn = graph.compile_to_fn()
+
+                compiled_fn: Any
+
+                with dynamo_timed(
+                    "GraphLowering.compile_to_fn", log_pt2_compile_event=True
+                ):
+                    if graph.aot_mode:
+                        from .codecache import AotCodeCompiler
+
+                        assert graph.cpp_wrapper, "AOT mode only supports C++ wrapper"
+                        code, linemap = graph.codegen_with_cpp_wrapper()
+                        output_code_log.debug("Output code: \n%s", code)
+
+                        serialized_extern_kernel_nodes = None
+                        if graph.extern_kernel_nodes:
+                            serialized_extern_kernel_nodes = (
+                                graph.extern_node_serializer(graph.extern_kernel_nodes)
+                            )
+                            output_code_log.debug(
+                                "Serialized Extern Kernel Nodes: \n%s",
+                                serialized_extern_kernel_nodes,
+                            )
+
+                        additional_files = graph.wrapper_code.additional_files
+
+                        with dynamo_timed(
+                            "AotCodeCompiler.compile", log_pt2_compile_event=True
+                        ):
+                            # Directly return the file path with the compiled code
+                            compiled_fn = AotCodeCompiler.compile(
+                                graph,
+                                code,
+                                serialized_extern_kernel_nodes,
+                                device_type=graph.device_type,
+                                additional_files=additional_files,
+                            )
+                    else:
+                        compiled_fn = graph.compile_to_module().call
+
                 num_bytes, nodes_num_elem, node_runtimes = graph.count_bytes()
                 metrics.num_bytes_accessed += num_bytes
                 metrics.node_runtimes += node_runtimes
diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py
@@ -2059,42 +2059,6 @@ def _compile_to_module(self) -> ModuleType:
         V.debug.copy(os.path.splitext(mod.__file__)[0] + ".debug")
         return mod
 
-    def compile_to_fn(self) -> Any:
-        with dynamo_timed("GraphLowering.compile_to_fn", log_pt2_compile_event=True):
-            return self._compile_to_fn()
-
-    def _compile_to_fn(self) -> Any:
-        if self.aot_mode:
-            from .codecache import AotCodeCompiler
-
-            assert self.cpp_wrapper, "AOT mode only supports C++ wrapper"
-            code, linemap = self.codegen_with_cpp_wrapper()
-            output_code_log.debug("Output code: \n%s", code)
-
-            serialized_extern_kernel_nodes = None
-            if self.extern_kernel_nodes:
-                serialized_extern_kernel_nodes = self.extern_node_serializer(
-                    self.extern_kernel_nodes
-                )
-                output_code_log.debug(
-                    "Serialized Extern Kernel Nodes: \n%s",
-                    serialized_extern_kernel_nodes,
-                )
-
-            additional_files = self.wrapper_code.additional_files
-
-            with dynamo_timed("AotCodeCompiler.compile", log_pt2_compile_event=True):
-                # Directly return the file path with the compiled code
-                return AotCodeCompiler.compile(
-                    self,
-                    code,
-                    serialized_extern_kernel_nodes,
-                    device_type=self.device_type,
-                    additional_files=additional_files,
-                )
-        else:
-            return self.compile_to_module().call
-
     def get_output_names(self) -> List[str]:
         return [
             node.get_name()