From 581cfbdece72ac1c0a14d5e5beed541ebb92f70b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 4 Jul 2024 13:42:17 +0100 Subject: [PATCH 01/15] [SYCL][Graph] Add implicit queue recording mechanism --- .../experimental/sycl_ext_oneapi_graph.asciidoc | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index a8cab9bdb1be6..264b753491f8c 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1442,7 +1442,7 @@ The alternative `queue_state::recording` state is used for graph construction. Instead of being scheduled for execution, command-groups submitted to the queue are recorded to a graph object as new nodes for each submission. After recording has finished and the queue returns to the executing state, the recorded commands are -not then executed, they are transparent to any following queue operations. The state +not executed, they are transparent to any following queue operations. The state of a queue can be queried with `queue::ext_oneapi_get_state()`. .Queue State Diagram @@ -1453,7 +1453,18 @@ graph LR Recording -->|End Recording| Executing .... -==== Queue Properties +==== Implicit Queue Recording + +Submitting a command-group to a queue can implicitly change its state +to `queue_state::recording`. This will occur when the command-group depends on +an event that has been returned by a queue in the recording state. + +A queue whose state has been set to `queue_state::recording` using this +mechanism, will behave as if it had been passed as an argument to +`command_graph::begin_recording()`. In particular, its state will not +change again until `command_graph::end_recording()` is called. + +==== Queue Properties; :queue-properties: https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:queue-properties From 3ea3cac4945d4284ff17fcdc38a4141d7053af03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 4 Jul 2024 16:22:53 +0100 Subject: [PATCH 02/15] Add example and address review comments --- .../sycl_ext_oneapi_graph.asciidoc | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 264b753491f8c..29ad1234b05d3 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1453,17 +1453,38 @@ graph LR Recording -->|End Recording| Executing .... -==== Implicit Queue Recording +==== Transitive Queue Recording -Submitting a command-group to a queue can implicitly change its state -to `queue_state::recording`. This will occur when the command-group depends on -an event that has been returned by a queue in the recording state. +Submitting a command-group to a queue in the executable state can implicitly +change its state to `queue_state::recording`. This will occur when the +command-group depends on an event that has been returned by a queue in the +recording state. The change of state happens before the command-group is +submitted (i.e. a new graph node will be created for that command-group). A queue whose state has been set to `queue_state::recording` using this mechanism, will behave as if it had been passed as an argument to `command_graph::begin_recording()`. In particular, its state will not change again until `command_graph::end_recording()` is called. +===== Example + +[source,c++] +---- +// q1 state is set to recording. +graph.begin_recording(q1); + +// Node is added to the graph by submitting to a recording queue. +auto e1 = q1.single_task(...); + +// Since there is a dependency on e1 which was created by a queue being +// recorded, q2 immediately enters record mode, and a new node is created +// with an edge between e1 and e2. +auto e2 = q2.single_task(e1, ...); + +// Ends recording on q1 and q2. +graph.end_recording(); +---- + ==== Queue Properties; :queue-properties: https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:queue-properties From 0ce6e0e9d7f4a0533df3a6b301c76e14126bdcac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 4 Jul 2024 16:39:22 +0100 Subject: [PATCH 03/15] Fix typo --- sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 29ad1234b05d3..c71eb00b3834c 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1485,7 +1485,7 @@ auto e2 = q2.single_task(e1, ...); graph.end_recording(); ---- -==== Queue Properties; +==== Queue Properties :queue-properties: https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:queue-properties From 99c5bb3c2d92dfe9f2096e22ffc490eb84e0af39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 12 Jul 2024 18:36:31 +0100 Subject: [PATCH 04/15] [SYCL][Graph] Implement transitive queue recording --- sycl/source/detail/graph_impl.cpp | 18 +- sycl/source/detail/graph_impl.hpp | 7 +- sycl/source/handler.cpp | 9 + .../Graph/RecordReplay/transitive_queue.cpp | 154 ++++++++++++++++++ 4 files changed, 181 insertions(+), 7 deletions(-) create mode 100644 sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 786a8b09932ec..5040f01d7ab33 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -620,6 +620,15 @@ std::vector graph_impl::getExitNodesEvents( return Events; } +void graph_impl::beginRecording( + std::shared_ptr Queue) { + graph_impl::WriteLock Lock(MMutex); + if (Queue->getCommandGraph() == nullptr) { + Queue->setCommandGraph(shared_from_this()); + addQueue(Queue); + } +} + // Check if nodes are empty and if so loop back through predecessors until we // find the real dependency. void exec_graph_impl::findRealDeps( @@ -1601,16 +1610,13 @@ void modifiable_command_graph::begin_recording( "differs from the graph device."); } - if (QueueImpl->getCommandGraph() == nullptr) { - QueueImpl->setCommandGraph(impl); - graph_impl::WriteLock Lock(impl->MMutex); - impl->addQueue(QueueImpl); - } - if (QueueImpl->getCommandGraph() != impl) { + if (QueueImpl->getCommandGraph() != nullptr) { throw sycl::exception(sycl::make_error_code(errc::invalid), "begin_recording called for a queue which is already " "recording to a different graph."); } + + impl->beginRecording(QueueImpl); } void modifiable_command_graph::begin_recording( diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index fe8fc14842d6e..09e2d3e4c3623 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -848,7 +848,7 @@ class partition { }; /// Implementation details of command_graph. -class graph_impl { +class graph_impl : public std::enable_shared_from_this { public: using ReadLock = std::shared_lock; using WriteLock = std::unique_lock; @@ -1192,6 +1192,11 @@ class graph_impl { std::vector getExitNodesEvents(std::weak_ptr Queue); + /// Sets the Queue state to queue_state::recording. Adds the queue to the list + /// of recording queues associated with this graph. + /// \param Queue[in] The queue to be recorded from. + void beginRecording(std::shared_ptr Queue); + /// Store the last barrier node that was submitted to the queue. /// @param[in] Queue The queue the barrier was recorded from. /// @param[in] BarrierNodeImpl The created barrier node. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index d2fc0b00b60be..4e40bdecc98b4 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1499,6 +1499,15 @@ void handler::depends_on(const std::vector &Events) { } void handler::depends_on(const detail::EventImplPtr &EventImpl) { + + /* If the event dependency has a graph, that means that the queue that created + * it was in recording mode. If the current queue is not recording, we need to + * set it to recording (implements the transitive queue recording feature).*/ + auto GraphFromDep = EventImpl->getCommandGraph(); + if (GraphFromDep && MQueue && !MQueue->getCommandGraph()) { + GraphFromDep->beginRecording(MQueue); + } + if (!EventImpl) return; if (EventImpl->isDiscarded()) { diff --git a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp new file mode 100644 index 0000000000000..2e75cfe7997d6 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp @@ -0,0 +1,154 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Checks that the transitive queue recording feature is working as expected. +// i.e. submitting a command group function to a queue that has a dependency +// from a graph, should change the state of the queue to recording mode. + +#include "../graph_common.hpp" +#include + +std::optional> getGraphFromQueue(queue &Q) { + try { + return {Q.ext_oneapi_get_graph()}; + } catch (exception &E) { + if (E.code() == sycl::errc::invalid) { + return {}; + } + } + assert(false && "Unexpected exception from ext_oneapi_get_graph()"); +} + +int main() { + queue Q1; + queue Q2; + queue Q3; + + using T = int; + + std::vector DataA(Size), DataB(Size), DataC(Size); + + std::iota(DataA.begin(), DataA.end(), 1); + std::iota(DataB.begin(), DataB.end(), 10); + std::iota(DataC.begin(), DataC.end(), 1000); + + std::vector ReferenceA(DataA), ReferenceB(DataB), ReferenceC(DataC); + + T *PtrA = malloc_device(Size, Q1); + T *PtrB = malloc_device(Size, Q1); + T *PtrC = malloc_device(Size, Q1); + + Q1.copy(DataA.data(), PtrA, Size); + Q1.copy(DataB.data(), PtrB, Size); + Q1.copy(DataC.data(), PtrC, Size); + Q1.wait_and_throw(); + + exp_ext::command_graph Graph{ + Q1.get_context(), Q1.get_device(), + exp_ext::property::graph::assume_buffer_outlives_graph{}}; + + Graph.begin_recording(Q1); + + auto GraphEventA = Q1.submit([&](handler &CGH) { + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrA[Id]++; }); + }); + + // Since there is a dependency on GraphEventA which is part of a graph, + // this will change Q2 to the recording state. + auto GraphEventB = Q2.submit([&](handler &CGH) { + CGH.depends_on(GraphEventA); + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrA[Id]++; }); + }); + + // Has no dependencies but should still be recorded to the graph because + // the queue was implicitly changed to recording mode previously. + auto GraphEventC = Q2.submit([&](handler &CGH) { + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrB[Id]++; }); + }); + + // Q2 is now in recording mode. Submitting a command group to Q3 with a + // dependency on an event from Q2 should change it to recording mode as well. + auto GraphEventD = Q3.submit([&](handler &CGH) { + CGH.depends_on(GraphEventB); + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrC[Id]++; }); + }); + + assert(Q1.ext_oneapi_get_state() == exp_ext::queue_state::recording); + assert(Q2.ext_oneapi_get_state() == exp_ext::queue_state::recording); + assert(Q3.ext_oneapi_get_state() == exp_ext::queue_state::recording); + + Graph.end_recording(Q1); + Graph.end_recording(Q2); + + assert(Q1.ext_oneapi_get_state() == exp_ext::queue_state::executing); + assert(Q2.ext_oneapi_get_state() == exp_ext::queue_state::executing); + assert(Q3.ext_oneapi_get_state() == exp_ext::queue_state::recording); + + auto GraphEventE = Q1.submit([&](handler &CGH) { + CGH.depends_on(GraphEventD); + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrC[Id]++; }); + }); + + assert(Q1.ext_oneapi_get_state() == exp_ext::queue_state::recording); + assert(Q2.ext_oneapi_get_state() == exp_ext::queue_state::executing); + assert(Q3.ext_oneapi_get_state() == exp_ext::queue_state::recording); + + Graph.end_recording(Q1); + + // Q2 is not recording anymore. So this will be submitted outside the graph. + auto OutsideEventA = Q2.submit([&](handler &CGH) { + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrC[Id] /= 2; }); + }); + + try { + // Q3 should still be recording. Adding a dependency from an event outside + // the graph should fail. + auto EventF = Q3.submit([&](handler &CGH) { + CGH.depends_on(OutsideEventA); + CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrC[Id]++; }); + }); + } catch (exception &E) { + assert(E.code() == sycl::errc::invalid); + } + + Q2.wait_and_throw(); + + Q1.copy(PtrA, DataA.data(), Size); + Q1.copy(PtrB, DataB.data(), Size); + Q1.copy(PtrC, DataC.data(), Size); + Q1.wait_and_throw(); + + // Check that only DataC was changed before running the graph + for (size_t i = 0; i < Size; i++) { + assert(check_value(i, ReferenceA[i], DataA[i], "DataA")); + assert(check_value(i, ReferenceB[i], DataB[i], "DataB")); + assert(check_value(i, ReferenceC[i] / 2, DataC[i], "DataC")); + } + + Graph.end_recording(); + assert(Q1.ext_oneapi_get_state() == exp_ext::queue_state::executing); + assert(Q2.ext_oneapi_get_state() == exp_ext::queue_state::executing); + assert(Q3.ext_oneapi_get_state() == exp_ext::queue_state::executing); + + auto GraphExec = Graph.finalize(); + + Q1.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + Q1.wait_and_throw(); + + Q1.copy(PtrA, DataA.data(), Size); + Q1.copy(PtrB, DataB.data(), Size); + Q1.copy(PtrC, DataC.data(), Size); + Q1.wait_and_throw(); + + for (size_t i = 0; i < Size; i++) { + assert(check_value(i, ReferenceA[i] + 2, DataA[i], "DataA")); + assert(check_value(i, ReferenceB[i] + 1, DataB[i], "DataB")); + assert(check_value(i, ReferenceC[i] / 2 + 2, DataC[i], "DataC")); + } + + return 0; +} From 44d6a98554499ddb65005338e1d87b96dde9adaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 12 Jul 2024 18:39:37 +0100 Subject: [PATCH 05/15] Fix documentation typo --- sycl/source/detail/graph_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 09e2d3e4c3623..fb5ad69608c96 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -1194,7 +1194,7 @@ class graph_impl : public std::enable_shared_from_this { /// Sets the Queue state to queue_state::recording. Adds the queue to the list /// of recording queues associated with this graph. - /// \param Queue[in] The queue to be recorded from. + /// @param[in] Queue The queue to be recorded from. void beginRecording(std::shared_ptr Queue); /// Store the last barrier node that was submitted to the queue. From 502d6f4f8808c21f9166f3ae200754d97718f411 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 12 Jul 2024 18:42:16 +0100 Subject: [PATCH 06/15] Remove unused function from test --- .../test-e2e/Graph/RecordReplay/transitive_queue.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp index 2e75cfe7997d6..f5bbb9abecb27 100644 --- a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp @@ -10,18 +10,6 @@ // from a graph, should change the state of the queue to recording mode. #include "../graph_common.hpp" -#include - -std::optional> getGraphFromQueue(queue &Q) { - try { - return {Q.ext_oneapi_get_graph()}; - } catch (exception &E) { - if (E.code() == sycl::errc::invalid) { - return {}; - } - } - assert(false && "Unexpected exception from ext_oneapi_get_graph()"); -} int main() { queue Q1; From 26ec10b9d3dde82f56eba22fdf664687995487e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 12 Jul 2024 18:43:14 +0100 Subject: [PATCH 07/15] Remove buffer property from test --- sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp index f5bbb9abecb27..f8c3675204aa6 100644 --- a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp @@ -35,9 +35,7 @@ int main() { Q1.copy(DataC.data(), PtrC, Size); Q1.wait_and_throw(); - exp_ext::command_graph Graph{ - Q1.get_context(), Q1.get_device(), - exp_ext::property::graph::assume_buffer_outlives_graph{}}; + exp_ext::command_graph Graph{Q1.get_context(), Q1.get_device()}; Graph.begin_recording(Q1); From 83bb0a8288b66b822f83ed7f2fc76e1eb017095a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 15 Jul 2024 14:29:35 +0100 Subject: [PATCH 08/15] Fix bugs --- sycl/source/detail/graph_impl.cpp | 3 ++- sycl/source/handler.cpp | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 5040f01d7ab33..9324b96277230 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1610,7 +1610,8 @@ void modifiable_command_graph::begin_recording( "differs from the graph device."); } - if (QueueImpl->getCommandGraph() != nullptr) { + auto QueueGraph = QueueImpl->getCommandGraph(); + if (QueueGraph != nullptr && QueueGraph != impl) { throw sycl::exception(sycl::make_error_code(errc::invalid), "begin_recording called for a queue which is already " "recording to a different graph."); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 4e40bdecc98b4..ade3804022187 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1500,6 +1500,13 @@ void handler::depends_on(const std::vector &Events) { void handler::depends_on(const detail::EventImplPtr &EventImpl) { + if (!EventImpl) + return; + if (EventImpl->isDiscarded()) { + throw sycl::exception(make_error_code(errc::invalid), + "Queue operation cannot depend on discarded event."); + } + /* If the event dependency has a graph, that means that the queue that created * it was in recording mode. If the current queue is not recording, we need to * set it to recording (implements the transitive queue recording feature).*/ @@ -1508,12 +1515,6 @@ void handler::depends_on(const detail::EventImplPtr &EventImpl) { GraphFromDep->beginRecording(MQueue); } - if (!EventImpl) - return; - if (EventImpl->isDiscarded()) { - throw sycl::exception(make_error_code(errc::invalid), - "Queue operation cannot depend on discarded event."); - } if (auto Graph = getCommandGraph(); Graph) { auto EventGraph = EventImpl->getCommandGraph(); if (EventGraph == nullptr) { From fde9d959cea36f86fa2ca28c5a653130ce3cfa69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 15 Jul 2024 14:48:15 +0100 Subject: [PATCH 09/15] Add device and context checks] --- sycl/source/handler.cpp | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index ade3804022187..1bb493fff2723 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1507,16 +1507,31 @@ void handler::depends_on(const detail::EventImplPtr &EventImpl) { "Queue operation cannot depend on discarded event."); } - /* If the event dependency has a graph, that means that the queue that created - * it was in recording mode. If the current queue is not recording, we need to - * set it to recording (implements the transitive queue recording feature).*/ - auto GraphFromDep = EventImpl->getCommandGraph(); - if (GraphFromDep && MQueue && !MQueue->getCommandGraph()) { - GraphFromDep->beginRecording(MQueue); + auto EventGraph = EventImpl->getCommandGraph(); + if (EventGraph) { + if (EventGraph->getContext() != MQueue->get_context()) { + throw sycl::exception( + make_error_code(errc::invalid), + "Cannot submit to a queue with a dependency from a graph that is " + "associated with a different context."); + if (EventGraph->getDevice() != MQueue->get_device()) { + throw sycl::exception( + make_error_code(errc::invalid), + "Cannot submit to a queue with a dependency from a graph that is " + "associated with a different device."); + } + } + + // If the event dependency has a graph, that means that the queue that + // created it was in recording mode. If the current queue is not recording, + // we need to set it to recording (implements the transitive queue recording + // feature). + if (MQueue && !MQueue->getCommandGraph()) { + EventGraph->beginRecording(MQueue); + } } if (auto Graph = getCommandGraph(); Graph) { - auto EventGraph = EventImpl->getCommandGraph(); if (EventGraph == nullptr) { throw sycl::exception( make_error_code(errc::invalid), From 69f82fd99ce60859c26d179acc173b424ed17db1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 15 Jul 2024 17:50:59 +0100 Subject: [PATCH 10/15] Fix bug --- sycl/source/handler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1bb493fff2723..3bd6593208e7a 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1508,7 +1508,7 @@ void handler::depends_on(const detail::EventImplPtr &EventImpl) { } auto EventGraph = EventImpl->getCommandGraph(); - if (EventGraph) { + if (MQueue && EventGraph) { if (EventGraph->getContext() != MQueue->get_context()) { throw sycl::exception( make_error_code(errc::invalid), @@ -1526,7 +1526,7 @@ void handler::depends_on(const detail::EventImplPtr &EventImpl) { // created it was in recording mode. If the current queue is not recording, // we need to set it to recording (implements the transitive queue recording // feature). - if (MQueue && !MQueue->getCommandGraph()) { + if (!MQueue->getCommandGraph()) { EventGraph->beginRecording(MQueue); } } From 5d5c7c818e2da215bad0bebb3225d8dd46f78abc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Fri, 26 Jul 2024 18:46:28 +0100 Subject: [PATCH 11/15] Fix transitive_queue test on windows# --- sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp index f8c3675204aa6..f7b1e6b65b7e5 100644 --- a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp @@ -12,9 +12,12 @@ #include "../graph_common.hpp" int main() { - queue Q1; - queue Q2; - queue Q3; + + device Dev; + context Ctx{Dev}; + queue Q1{Ctx, Dev}; + queue Q2{Ctx, Dev}; + queue Q3{Ctx, Dev}; using T = int; From d721bb810cb59d52aefa774e291b06802f834e3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Mon, 29 Jul 2024 19:35:56 +0100 Subject: [PATCH 12/15] Address review comments; Add unit tests --- sycl/source/detail/graph_impl.cpp | 11 --- sycl/source/handler.cpp | 30 +++++-- .../Graph/RecordReplay/transitive_queue.cpp | 38 +++++---- .../Extensions/CommandGraph/CommandGraph.cpp | 47 +++++++++++ .../Extensions/CommandGraph/Exceptions.cpp | 82 +++++++++++++++++++ 5 files changed, 173 insertions(+), 35 deletions(-) diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 9324b96277230..4472a17e8a7c0 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1599,17 +1599,6 @@ void modifiable_command_graph::begin_recording( "can NOT be recorded."); } - if (QueueImpl->get_context() != impl->getContext()) { - throw sycl::exception(sycl::make_error_code(errc::invalid), - "begin_recording called for a queue whose context " - "differs from the graph context."); - } - if (QueueImpl->get_device() != impl->getDevice()) { - throw sycl::exception(sycl::make_error_code(errc::invalid), - "begin_recording called for a queue whose device " - "differs from the graph device."); - } - auto QueueGraph = QueueImpl->getCommandGraph(); if (QueueGraph != nullptr && QueueGraph != impl) { throw sycl::exception(sycl::make_error_code(errc::invalid), diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 3bd6593208e7a..19de18f210ce2 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -1499,7 +1499,6 @@ void handler::depends_on(const std::vector &Events) { } void handler::depends_on(const detail::EventImplPtr &EventImpl) { - if (!EventImpl) return; if (EventImpl->isDiscarded()) { @@ -1509,24 +1508,39 @@ void handler::depends_on(const detail::EventImplPtr &EventImpl) { auto EventGraph = EventImpl->getCommandGraph(); if (MQueue && EventGraph) { + auto QueueGraph = MQueue->getCommandGraph(); + if (EventGraph->getContext() != MQueue->get_context()) { throw sycl::exception( make_error_code(errc::invalid), "Cannot submit to a queue with a dependency from a graph that is " "associated with a different context."); - if (EventGraph->getDevice() != MQueue->get_device()) { - throw sycl::exception( - make_error_code(errc::invalid), - "Cannot submit to a queue with a dependency from a graph that is " - "associated with a different device."); - } + } + + if (EventGraph->getDevice() != MQueue->get_device()) { + throw sycl::exception( + make_error_code(errc::invalid), + "Cannot submit to a queue with a dependency from a graph that is " + "associated with a different device."); + } + + if (MQueue->is_in_fusion_mode()) { + throw sycl::exception( + sycl::make_error_code(errc::invalid), + "Queue in fusion mode cannot have a dependency from a graph"); + } + + if (QueueGraph && QueueGraph != EventGraph) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Cannot submit to a recording queue with a " + "dependency from a different graph."); } // If the event dependency has a graph, that means that the queue that // created it was in recording mode. If the current queue is not recording, // we need to set it to recording (implements the transitive queue recording // feature). - if (!MQueue->getCommandGraph()) { + if (!QueueGraph) { EventGraph->beginRecording(MQueue); } } diff --git a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp index f7b1e6b65b7e5..c3db65ed13b33 100644 --- a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp @@ -12,6 +12,7 @@ #include "../graph_common.hpp" int main() { + using T = int; device Dev; context Ctx{Dev}; @@ -19,7 +20,16 @@ int main() { queue Q2{Ctx, Dev}; queue Q3{Ctx, Dev}; - using T = int; + exp_ext::queue_state Recording = exp_ext::queue_state::recording; + exp_ext::queue_state Executing = exp_ext::queue_state::executing; + + auto assertQueueState = [&](exp_ext::queue_state ExpectedQ1, + exp_ext::queue_state ExpectedQ2, + exp_ext::queue_state ExpectedQ3) { + assert(Q1.ext_oneapi_get_state() == ExpectedQ1); + assert(Q2.ext_oneapi_get_state() == ExpectedQ2); + assert(Q3.ext_oneapi_get_state() == ExpectedQ3); + }; std::vector DataA(Size), DataB(Size), DataC(Size); @@ -41,10 +51,12 @@ int main() { exp_ext::command_graph Graph{Q1.get_context(), Q1.get_device()}; Graph.begin_recording(Q1); + assertQueueState(Recording, Executing, Executing); auto GraphEventA = Q1.submit([&](handler &CGH) { CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrA[Id]++; }); }); + assertQueueState(Recording, Executing, Executing); // Since there is a dependency on GraphEventA which is part of a graph, // this will change Q2 to the recording state. @@ -58,6 +70,7 @@ int main() { auto GraphEventC = Q2.submit([&](handler &CGH) { CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrB[Id]++; }); }); + assertQueueState(Recording, Recording, Executing); // Q2 is now in recording mode. Submitting a command group to Q3 with a // dependency on an event from Q2 should change it to recording mode as well. @@ -65,33 +78,27 @@ int main() { CGH.depends_on(GraphEventB); CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrC[Id]++; }); }); - - assert(Q1.ext_oneapi_get_state() == exp_ext::queue_state::recording); - assert(Q2.ext_oneapi_get_state() == exp_ext::queue_state::recording); - assert(Q3.ext_oneapi_get_state() == exp_ext::queue_state::recording); + assertQueueState(Recording, Recording, Recording); Graph.end_recording(Q1); + assertQueueState(Executing, Recording, Recording); Graph.end_recording(Q2); - - assert(Q1.ext_oneapi_get_state() == exp_ext::queue_state::executing); - assert(Q2.ext_oneapi_get_state() == exp_ext::queue_state::executing); - assert(Q3.ext_oneapi_get_state() == exp_ext::queue_state::recording); + assertQueueState(Executing, Executing, Recording); auto GraphEventE = Q1.submit([&](handler &CGH) { CGH.depends_on(GraphEventD); CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrC[Id]++; }); }); - - assert(Q1.ext_oneapi_get_state() == exp_ext::queue_state::recording); - assert(Q2.ext_oneapi_get_state() == exp_ext::queue_state::executing); - assert(Q3.ext_oneapi_get_state() == exp_ext::queue_state::recording); + assertQueueState(Recording, Executing, Recording); Graph.end_recording(Q1); + assertQueueState(Executing, Executing, Recording); // Q2 is not recording anymore. So this will be submitted outside the graph. auto OutsideEventA = Q2.submit([&](handler &CGH) { CGH.parallel_for(range<1>(Size), [=](item<1> Id) { PtrC[Id] /= 2; }); }); + assertQueueState(Executing, Executing, Recording); try { // Q3 should still be recording. Adding a dependency from an event outside @@ -102,6 +109,7 @@ int main() { }); } catch (exception &E) { assert(E.code() == sycl::errc::invalid); + assertQueueState(Executing, Executing, Recording); } Q2.wait_and_throw(); @@ -119,9 +127,7 @@ int main() { } Graph.end_recording(); - assert(Q1.ext_oneapi_get_state() == exp_ext::queue_state::executing); - assert(Q2.ext_oneapi_get_state() == exp_ext::queue_state::executing); - assert(Q3.ext_oneapi_get_state() == exp_ext::queue_state::executing); + assertQueueState(Executing, Executing, Executing); auto GraphExec = Graph.finalize(); diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 20b95f99a2d14..6b5c8f8d6178b 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -586,3 +586,50 @@ TEST_F(CommandGraphTest, AccessorModeEdges) { Queue); testAccessorModeCombo(Queue); } + +// Tests the transitive queue recording behaviour with queue shortcuts. +TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) { + device Dev; + context Ctx{{Dev}}; + queue Q1{Ctx, Dev}; + queue Q2{Ctx, Dev}; + queue Q3{Ctx, Dev}; + + ext::oneapi::experimental::command_graph Graph1{Q1.get_context(), + Q1.get_device()}; + + Graph1.begin_recording(Q1); + + auto GraphEvent1 = Q1.single_task([=] {}); + ASSERT_EQ(Q1.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::recording); + ASSERT_EQ(Q2.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::executing); + ASSERT_EQ(Q3.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::executing); + + auto GraphEvent2 = Q2.single_task(GraphEvent1, [=] {}); + ASSERT_EQ(Q1.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::recording); + ASSERT_EQ(Q2.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::recording); + ASSERT_EQ(Q3.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::executing); + + auto GraphEvent3 = Q3.parallel_for(range<1>{1024}, GraphEvent1, + [=](item<1> Id) {}); + ASSERT_EQ(Q1.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::recording); + ASSERT_EQ(Q2.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::recording); + ASSERT_EQ(Q3.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::recording); + + Graph1.end_recording(); + ASSERT_EQ(Q1.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::executing); + ASSERT_EQ(Q2.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::executing); + ASSERT_EQ(Q3.ext_oneapi_get_state(), + ext::oneapi::experimental::queue_state::executing); +} diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 1b76c23fe64c5..89116c7161642 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -628,3 +628,85 @@ TEST_F(CommandGraphTest, ClusterLaunchException) { } ASSERT_EQ(ExceptionCode, sycl::errc::invalid); } + +// Submits a command to a queue that has a dependency to a graph event +// associated with a different context. +TEST_F(CommandGraphTest, TransitiveRecordingWrongContext) { + + device Dev; + context Ctx{Dev}; + context Ctx2{Dev}; + queue Q1{Ctx, Dev}; + queue Q2{Ctx2, Dev}; + + ext::oneapi::experimental::command_graph Graph{Q1.get_context(), + Q1.get_device()}; + Graph.begin_recording(Q1); + + auto GraphEvent1 = + Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); + + ASSERT_THROW(Q2.submit([&](handler &CGH) { + CGH.depends_on(GraphEvent1); + CGH.single_task([=] {}); + }), + sycl::exception); +} + +// Submits a command to a queue that has a dependency to a graph event +// associated with a different device. +TEST_F(CommandGraphTest, TransitiveRecordingWrongDevice) { + + auto devices = device::get_devices(); + + // Test needs at least 2 devices available. + if (devices.size() < 2) { + GTEST_SKIP(); + } + + device &Dev1 = devices[0]; + device &Dev2 = devices[1]; + context Ctx{{Dev1, Dev2}}; + queue Q1{Ctx, Dev1}; + queue Q2{Ctx, Dev2}; + + ASSERT_EQ(Dev1, Dev2); + ext::oneapi::experimental::command_graph Graph{Q1.get_context(), + Q1.get_device()}; + Graph.begin_recording(Q1); + + auto GraphEvent1 = + Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); + + ASSERT_THROW(Q2.submit([&](handler &CGH) { + CGH.depends_on(GraphEvent1); + CGH.single_task([=] {}); + }), + sycl::exception); +} + +// Submits a command to a queue that has a dependency to a different graph. +TEST_F(CommandGraphTest, RecordingWrongGraphDep) { + device Dev; + context Ctx{{Dev}}; + queue Q1{Ctx, Dev}; + queue Q2{Ctx, Dev}; + + ext::oneapi::experimental::command_graph Graph1{Q1.get_context(), + Q1.get_device()}; + + ext::oneapi::experimental::command_graph Graph2{Q1.get_context(), + Q1.get_device()}; + + Graph1.begin_recording(Q1); + Graph2.begin_recording(Q2); + + auto GraphEvent1 = + Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); + + ASSERT_THROW(Q2.submit([&](handler &CGH) { + CGH.depends_on(GraphEvent1); + CGH.single_task([=] {}); + }), + sycl::exception); +} From d1e2daea74d36fcd6341b2c02975525001ee64c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio?= Date: Mon, 29 Jul 2024 19:36:38 +0100 Subject: [PATCH 13/15] Apply review suggestion Co-authored-by: Ben Tracy --- sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index c71eb00b3834c..9418bc3d4d6e0 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1459,7 +1459,7 @@ Submitting a command-group to a queue in the executable state can implicitly change its state to `queue_state::recording`. This will occur when the command-group depends on an event that has been returned by a queue in the recording state. The change of state happens before the command-group is -submitted (i.e. a new graph node will be created for that command-group). +submitted to the device (i.e. a new graph node will be created for that command-group). A queue whose state has been set to `queue_state::recording` using this mechanism, will behave as if it had been passed as an argument to From c7df52cb07b9304b7a90fe6963e451532f102694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Tue, 30 Jul 2024 11:00:53 +0100 Subject: [PATCH 14/15] Address review comments --- sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp | 4 ++-- sycl/unittests/Extensions/CommandGraph/Exceptions.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp index c3db65ed13b33..f4c253dcd2bc8 100644 --- a/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/transitive_queue.cpp @@ -20,8 +20,8 @@ int main() { queue Q2{Ctx, Dev}; queue Q3{Ctx, Dev}; - exp_ext::queue_state Recording = exp_ext::queue_state::recording; - exp_ext::queue_state Executing = exp_ext::queue_state::executing; + const exp_ext::queue_state Recording = exp_ext::queue_state::recording; + const exp_ext::queue_state Executing = exp_ext::queue_state::executing; auto assertQueueState = [&](exp_ext::queue_state ExpectedQ1, exp_ext::queue_state ExpectedQ2, diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 89116c7161642..8a025dc4fba79 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -670,7 +670,6 @@ TEST_F(CommandGraphTest, TransitiveRecordingWrongDevice) { queue Q1{Ctx, Dev1}; queue Q2{Ctx, Dev2}; - ASSERT_EQ(Dev1, Dev2); ext::oneapi::experimental::command_graph Graph{Q1.get_context(), Q1.get_device()}; Graph.begin_recording(Q1); From 51b9d266c41b50e2b1ea6c304c99a8081c47bc9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Thu, 1 Aug 2024 16:18:15 +0100 Subject: [PATCH 15/15] Clarify property inheritance behaviour --- .../experimental/sycl_ext_oneapi_graph.asciidoc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index abcde3238a6ab..a841a7c6e7f4f 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1463,8 +1463,13 @@ submitted to the device (i.e. a new graph node will be created for that command- A queue whose state has been set to `queue_state::recording` using this mechanism, will behave as if it had been passed as an argument to -`command_graph::begin_recording()`. In particular, its state will not -change again until `command_graph::end_recording()` is called. +`command_graph::begin_recording()`. In particular, its state will not change +again until `command_graph::end_recording()` is called. + +The recording properties of the queue whose event triggered the state change +will also be inherited (i.e. any properties passed to the original call of +`command_graph::begin_recording()` will be inherited by the queue whose state +is being transitioned). ===== Example