Skip to content

Commit

Permalink
Add initial distributed support
Browse files Browse the repository at this point in the history
  • Loading branch information
jpsamaroo committed Dec 7, 2023
1 parent b169984 commit fcef8ac
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 95 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.18.4"
[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
MemPool = "f9f48841-c794-520a-933b-121f7ba6ed94"
Expand Down
2 changes: 1 addition & 1 deletion src/Dagger.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ include("utils/caching.jl")
include("sch/Sch.jl"); using .Sch

# Data dependency task queue
include("datadep.jl")
include("datadeps.jl")

# Array computations
include("array/darray.jl")
Expand Down
94 changes: 0 additions & 94 deletions src/datadep.jl

This file was deleted.

282 changes: 282 additions & 0 deletions src/datadeps.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
using Graphs

export In, Out, InOut, spawn_datadeps

struct In{T}
x::T
end
struct Out{T}
x::T
end
struct InOut{T}
x::T
end
const AnyInOut = Union{In,Out,InOut}

struct DataDepsTaskQueue <: AbstractTaskQueue
# The queue above us
upper_queue::AbstractTaskQueue
# The mapping of unique objects to previously-launched tasks,
# and their data dependency on the object (read, write)
deps::IdDict{Any, Vector{Pair{Tuple{Bool,Bool}, EagerThunk}}}
# Whether to analyze the DAG statically or eagerly
static::Bool
# If static=true, the set of tasks that have already been seen
seen_tasks::Union{Vector{Pair{EagerTaskSpec,EagerThunk}},Nothing}
# If static=true, the data-dependency graph of all tasks
g::Union{SimpleDiGraph{Int},Nothing}
# If static=true, the mapping from task to graph ID
task_to_id::Union{Dict{EagerThunk,Int},Nothing}
function DataDepsTaskQueue(upper_queue; static::Bool=false)
deps = IdDict{Any, Vector{Pair{Tuple{Bool,Bool}, EagerThunk}}}()
if static
seen_tasks = Pair{EagerTaskSpec,EagerThunk}[]
g = SimpleDiGraph()
task_to_id = Dict{EagerThunk,Int}()
else
seen_tasks = nothing
g = nothing
task_to_id = nothing
end
return new(upper_queue, deps, static, seen_tasks, g, task_to_id)
end
end

function enqueue!(queue::DataDepsTaskQueue, fullspec::Pair{EagerTaskSpec,EagerThunk})
# If static, record this task and its edges in the graph
if queue.static
g = queue.g
task_to_id = queue.task_to_id
end

spec, task = fullspec
if queue.static
add_vertex!(g)
task_to_id[task] = our_task_id = nv(g)
end
opts = spec.options
syncdeps = get(Set{Any}, opts, :syncdeps)
deps_to_add = Vector{Pair{Any, Tuple{Bool,Bool}}}()
for (idx, (pos, arg)) in enumerate(spec.args)
readdep = false
writedep = false
if arg isa In
readdep = true
arg = arg.x
elseif arg isa Out
writedep = true
arg = arg.x
elseif arg isa InOut
readdep = true
writedep = true
arg = arg.x
else
readdep = true
end
spec.args[idx] = pos => arg

push!(deps_to_add, arg => (readdep, writedep))

if !haskey(queue.deps, arg)
continue
end
argdeps = queue.deps[arg]::Vector{Pair{Tuple{Bool,Bool}, EagerThunk}}
if readdep
# When you have an in dependency, sync with the previous out
for ((other_readdep::Bool, other_writedep::Bool),
other_task::EagerThunk) in argdeps
if other_writedep
push!(syncdeps, other_task)
if queue.static
other_task_id = task_to_id[other_task]
add_edge!(g, other_task_id, our_task_id)
end
end
end
end
if writedep
# When you have an out depdendency, sync with the previous in or out
for ((other_readdep::Bool, other_writedep::Bool),
other_task::EagerThunk) in argdeps
if other_readdep || other_writedep
push!(syncdeps, other_task)
if queue.static
other_task_id = task_to_id[other_task]
add_edge!(g, other_task_id, our_task_id)
end
end
end
end
end
for (arg, (readdep, writedep)) in deps_to_add
argdeps = get!(queue.deps, arg) do
Vector{Pair{Tuple{Bool,Bool}, EagerThunk}}()
end
push!(argdeps, (readdep, writedep) => task)
end

spec.options = merge(opts, (;syncdeps,))

if queue.static
push!(queue.seen_tasks, fullspec)
else
enqueue!(queue.upper_queue, fullspec)
end
end
function enqueue!(queue::DataDepsTaskQueue, specs::Vector{Pair{EagerTaskSpec,EagerThunk}})
# FIXME: Don't register as previous tasks until very end
error("Not yet implemented")
for spec in specs
enqueue!(queue, spec)
end
end

function distribute_tasks!(queue::DataDepsTaskQueue)
# "Distributes" the graph by making cuts
#= TODO: We currently assume:
# - All data is local to this worker
# - All data is the same size
# - All tasks take the same amount of time to execute
# - Tasks executing on other workers will have data moved for them
# - All data will be updated locally at the end of the computation
=#
# FIXME: Don't do round-robin
# FIXME: Skip this if only one proc
all_procs = Processor[]
for w in procs()
append!(all_procs, get_processors(OSProc(w)))
end
data_locality = IdDict{Any,Int}(data=>myid() for data in keys(queue.deps))

# Make a copy of each piece of data on each worker
remote_args = Dict{Int,IdDict{Any,Any}}(w=>IdDict{Any,Any}() for w in procs())
# FIXME: Owner can repeat (same arg twice to one task)
args_owner = IdDict{Any,Any}(arg=>nothing for arg in keys(queue.deps))
for w in procs()
for data in keys(queue.deps)
data isa Array || continue
if w == myid()
remote_args[w][data] = data
else
# TODO: Can't use @mutable with custom Chunk scope
#remote_args[w][data] = Dagger.@mutable worker=w copy(data)
remote_args[w][data] = remotecall_fetch(Dagger.tochunk, w, data)
end
end
end

# Round-robin assign tasks to processors
proc_idx = 1
for (spec, task) in queue.seen_tasks
our_proc = all_procs[proc_idx]
our_proc_worker = root_worker_id(our_proc)

# Spawn copies before and after user's task, as necessary
@dagdebug nothing :spawn_datadeps "Scheduling $(spec.f)"
task_queue = get_options(:task_queue)
task_syncdeps = Set()
task_args = copy(spec.args)

# Copy args from local to remote
for (idx, (pos, arg)) in enumerate(task_args)
arg isa Array || continue
data_worker = 1
# TODO: Track initial data locality:
#data_worker = data_locality[arg]
if our_proc_worker != data_worker
# Add copy-to operation (depends on latest owner of arg)
@dagdebug nothing :spawn_datadeps "Enqueueing copy-to: $data_worker => $our_proc_worker"
arg_local = remote_args[data_worker][arg]
@assert arg_local === spec.args[idx][2]
arg_remote = remote_args[our_proc_worker][arg]
copy_to_scope = scope(worker=our_proc_worker)
copy_to_syncdeps = Set()
if (owner = args_owner[arg]) !== nothing
@dagdebug nothing :spawn_datadeps "(copy-to arg) Depending on previous owner"
push!(copy_to_syncdeps, owner)
end
copy_to = Dagger.@spawn scope=copy_to_scope syncdeps=copy_to_syncdeps copyto!(arg_remote, arg_local)
push!(task_syncdeps, copy_to)
spec.args[idx] = pos => arg_remote
# TODO: Allow changing data locality:
#data_locality[arg] = our_proc_worker
else
if (owner = args_owner[arg]) !== nothing
@dagdebug nothing :spawn_datadeps "(local arg) Depending on previous owner"
push!(task_syncdeps, owner)
end
end
end

# Launch user's task
syncdeps = get(Set, spec.options, :syncdeps)
for other_task in task_syncdeps
push!(syncdeps, other_task)
end
task_scope = scope(worker=our_proc_worker)
spec.options = merge(spec.options, (;syncdeps, scope=task_scope))
enqueue!(task_queue, spec=>task)
for (_, arg) in task_args
arg isa Array || continue
args_owner[arg] = task
end

# Copy args from remote to local
# TODO: Don't always copy to-and-from
for (_, arg) in task_args
arg isa Array || continue
data_worker = 1
# TODO: Track initial data locality:
#data_worker = data_locality[arg]
if our_proc_worker != data_worker
# Add copy-from operation
@dagdebug nothing :spawn_datadeps "Enqueueing copy-from: $our_proc_worker => $data_worker"
arg_local = remote_args[data_worker][arg]
arg_remote = remote_args[our_proc_worker][arg]
copy_from_scope = scope(worker=data_worker)
copy_from_syncdeps = Set([task])
copy_from = Dagger.@spawn scope=copy_from_scope syncdeps=copy_from_syncdeps copyto!(arg_local, arg_remote)

# Set copy-from as latest owner of arg
args_owner[arg] = copy_from

# TODO: Allow changing data locality:
#data_locality[arg] = our_proc_worker
end
end
proc_idx = mod1(proc_idx+1, length(all_procs))
end
end

function spawn_datadeps(f::Base.Callable; static::Bool=false)
queue = DataDepsTaskQueue(get_options(:task_queue, EagerTaskQueue()); static)
result = with_options(f; task_queue=queue)
if queue.static
distribute_tasks!(queue)
end
return result
end

# FIXME: Move this elsewhere
struct WaitAllQueue <: AbstractTaskQueue
upper_queue::AbstractTaskQueue
tasks::Vector{EagerThunk}
end
function enqueue!(queue::WaitAllQueue, spec::Pair{EagerTaskSpec,EagerThunk})
push!(queue.tasks, spec[2])
enqueue!(queue.upper_queue, spec)
end
function enqueue!(queue::WaitAllQueue, specs::Vector{Pair{EagerTaskSpec,EagerThunk}})
for (_, task) in specs
push!(queue.tasks, task)
end
enqueue!(queue.upper_queue, specs)
end
function wait_all(f)
queue = WaitAllQueue(get_options(:task_queue, EagerTaskQueue()), EagerThunk[])
result = with_options(f; task_queue=queue)
for task in queue.tasks
fetch(task)
end
return result
end

0 comments on commit fcef8ac

Please sign in to comment.