Skip to content

Commit

Permalink
Add spawn_datadeps for OMP-like task model
Browse files Browse the repository at this point in the history
  • Loading branch information
jpsamaroo committed Jan 26, 2024
1 parent 7754329 commit f2aafea
Show file tree
Hide file tree
Showing 10 changed files with 876 additions and 12 deletions.
58 changes: 46 additions & 12 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@

julia_version = "1.8.5"
manifest_format = "2.0"
project_hash = "8da7911e4788068aaea8c0ef8589d674bce0fb39"
project_hash = "63ad89f514e49fbb0061c336a95c9098f89440c9"

[[deps.ArnoldiMethod]]
deps = ["LinearAlgebra", "Random", "StaticArrays"]
git-tree-sha1 = "62e51b39331de8911e4a7ff6f5aaf38a5f4cc0ae"
uuid = "ec485272-7323-5ecc-a04f-4719b315124d"
version = "0.2.0"

[[deps.Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
Expand All @@ -12,9 +18,9 @@ uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

[[deps.ChainRulesCore]]
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "2118cb2765f8197b08e5958cdd17c165427425ee"
git-tree-sha1 = "0d12ee16b3f62e4e33c3277773730a5b21a74152"
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
version = "1.19.0"
version = "1.20.0"

[[deps.ChangesOfVariables]]
deps = ["InverseFunctions", "LinearAlgebra", "Test"]
Expand All @@ -23,26 +29,26 @@ uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
version = "0.1.8"

[[deps.Compat]]
deps = ["Dates", "LinearAlgebra", "UUIDs"]
git-tree-sha1 = "886826d76ea9e72b35fcd000e535588f7b60f21d"
deps = ["Dates", "LinearAlgebra", "TOML", "UUIDs"]
git-tree-sha1 = "75bd5b6fc5089df449b5d35fa501c846c9b6549b"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "4.10.1"
version = "4.12.0"

[[deps.CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
version = "1.0.1+0"

[[deps.DataAPI]]
git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c"
git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.15.0"
version = "1.16.0"

[[deps.DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "3dbd312d370723b6bb43ba9d02fc36abade4518d"
git-tree-sha1 = "ac67408d9ddf207de5cfa9a97e114352430f01ed"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.15"
version = "0.18.16"

[[deps.Dates]]
deps = ["Printf"]
Expand All @@ -58,11 +64,22 @@ git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.9.3"

[[deps.Graphs]]
deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
git-tree-sha1 = "899050ace26649433ef1af25bc17a815b3db52b7"
uuid = "86223c79-3864-5bf0-83f7-82e725a168b6"
version = "1.9.0"

[[deps.HashArrayMappedTries]]
git-tree-sha1 = "2eaa69a7cab70a52b9687c8bf950a5a93ec895ae"
uuid = "076d061b-32b6-4027-95e0-9a2c6f6d7e74"
version = "0.2.0"

[[deps.Inflate]]
git-tree-sha1 = "ea8031dea4aff6bd41f1df8f2fdfb25b33626381"
uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
version = "0.1.4"

[[deps.InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
Expand Down Expand Up @@ -100,9 +117,9 @@ uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"

[[deps.MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "b211c553c199c111d998ecdaf7623d1b89b69f93"
git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.12"
version = "0.5.13"

[[deps.Markdown]]
deps = ["Base64"]
Expand Down Expand Up @@ -184,6 +201,12 @@ uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"

[[deps.SimpleTraits]]
deps = ["InteractiveUtils", "MacroTools"]
git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231"
uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
version = "0.9.4"

[[deps.Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"

Expand All @@ -197,6 +220,17 @@ version = "1.2.1"
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[[deps.StaticArrays]]
deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore", "Statistics"]
git-tree-sha1 = "f68dd04d131d9a8a8eb836173ee8f105c360b0c5"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.9.1"

[[deps.StaticArraysCore]]
git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d"
uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
version = "1.4.2"

[[deps.Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand Down
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.18.6"
[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
MemPool = "f9f48841-c794-520a-933b-121f7ba6ed94"
Expand All @@ -23,6 +24,7 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[compat]
DataStructures = "0.18"
Graphs = "1"
MacroTools = "0.5"
MemPool = "0.4.6"
PrecompileTools = "1.2"
Expand Down
1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ makedocs(;
"Scopes" => "scopes.md",
"Processors" => "processors.md",
"Task Queues" => "task-queues.md",
"Datadeps" => "datadeps.md",
"Option Propagation" => "propagation.md",
"Logging and Graphing" => "logging.md",
"Checkpointing" => "checkpointing.md",
Expand Down
96 changes: 96 additions & 0 deletions docs/src/datadeps.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Datadeps (Data Dependencies)

For many programs, the restriction that tasks cannot write to their arguments
feels overly restrictive and makes certain kinds of programs (such as in-place
linear algebra) hard to express efficiently in Dagger. Thankfully, there is a
solution: `spawn_datadeps`. This function constructs a "datadeps region",
within which tasks are allowed to write to their arguments, with parallelism
controlled via dependencies specified via argument annotations. Let's look at
a simple example to make things concrete:

```julia
A = rand(1000)
B = rand(1000)
C = zeros(1000)
add!(X, Y) = X .+= Y
Dagger.spawn_datadeps() do
Dagger.@spawn add!(InOut(B), In(A))
Dagger.@spawn copyto!(Out(C), In(B))
end
```

In this example, we have two Dagger tasks being launched, one adding `A` into
`B`, and the other copying `B` into `C`. The `add!` task is specifying that
`A` is being only read from (`In` for "input"), and that `B` is being read
from and written to (`Out` for "output", `InOut` for "input and output"). The
`copyto` task, similarly, is specifying that `B` is being read from, and `C`
is only being written to.

Without `spawn_datadeps` and `In`, `Out`, and `InOut`, the result of these
tasks would be undefined; the two tasks could execute in parallel, or the
`copyto!` could occur before the `add!`, resulting in all kinds of mayhem.
However, `spawn_datadeps` changes things: because we have told Dagger how our
tasks access their arguments, Dagger knows to control the parallelism and
ordering, and ensure that `add!` executes and finishes before `copyto!`
begins, ensuring that `copyto!` "sees" the changes to `B` before executing.

There is another important aspect of `spawn_datadeps` that makes the above
code work: if all of the `Dagger.@spawn` macros are removed, along with the
dependency specifiers, the program would still produce the same results,
without using Dagger. In other words, the parallel (Dagger) version of the
program produces identical results to the serial (non-Dagger) version of the
program. This is similar to using Dagger with purely functional tasks and
without `spawn_datadeps` - removing `Dagger.@spawn` will still result in a
correct (sequential and possibly slower) version of the program. Basically,
`spawn_datadeps` will ensure that Dagger respects the ordering and
dependencies of a program, while still providing parallelism, where possible.

But where is the parallelism? The above example doesn't actually have any
parallelism to exploit! Let's take a look at another example to see the
datadeps model truly shine:

```julia
# Tree reduction of multiple arrays into the first array
function tree_reduce!(op::Base.Callable, As::Vector{<:Array})
Dagger.spawn_datadeps() do
to_reduce = Vector[]
push!(to_reduce, As)
while !isempty(to_reduce)
As = pop!(to_reduce)
n = length(As)
if n == 2
Dagger.@spawn Base.mapreducedim!(identity, op, InOut(As[1]), In(As[2]))
elseif n > 2
push!(to_reduce, [As[1], As[div(n,2)+1]])
push!(to_reduce, As[1:div(n,2)])
push!(to_reduce, As[div(n,2)+1:end])
end
end
end
return As[1]
end

As = [rand(1000) for _ in 1:1000]
Bs = copy.(As)
tree_reduce!(+, As)
@assert isapprox(As[1], reduce((x,y)->x .+ y, Bs))
```

In the above implementation of `tree_reduce!` (which is designed to perform an
elementwise reduction across a vector of arrays), we have a tree reduction
operation where pairs of arrays are reduced, starting with neighboring pairs,
and then reducing pairs of reduction results, etc. until the final result is in
`As[1]`. We can see that the application of Dagger to this algorithm is simple -
only the single `Base.mapreducedim!` call is passed to Dagger - yet due to the
data dependencies and the algorithm's structure, there should be plenty of
parallelism to be exploited across each of the parallel reductions at each
"level" of the reduction tree. Specifically, any two `Dagger.@spawn` calls
which access completely different pairs of arrays can execute in parallel,
while any call which has an `In` on an array will wait for any previous call
which has an `InOut` on that same array.

Additionally, we can notice a powerful feature of this model - if the
`Dagger.@spawn` macro is removed, the code still remains correct, but simply
runs sequentially. This means that the structure of the program doesn't have to
change in order to use Dagger for parallelization, which can make applying
Dagger to existing algorithms quite effortless.
4 changes: 4 additions & 0 deletions src/Dagger.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ include("queue.jl")
include("thunk.jl")
include("submission.jl")
include("chunks.jl")
include("memory-spaces.jl")

# Task scheduling
include("compute.jl")
Expand All @@ -42,6 +43,9 @@ include("utils/system_uuid.jl")
include("utils/caching.jl")
include("sch/Sch.jl"); using .Sch

# Data dependency task queue
include("datadeps.jl")

# Array computations
include("array/darray.jl")
include("array/alloc.jl")
Expand Down
Loading

0 comments on commit f2aafea

Please sign in to comment.