diff --git a/.travis.yml b/.travis.yml index 4e2877ee528..829b6bfeff8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,11 +32,12 @@ env: global: - COVERALLS_PARALLEL=true jobs: - - TRIXI_TEST=1D - TRIXI_TEST=2D - TRIXI_TEST=3D - - TRIXI_TEST=misc - TRIXI_TEST=paper-self-gravitating-gas-dynamics + - TRIXI_TEST=parallel_2d + - TRIXI_TEST=1D + - TRIXI_TEST=misc notifications: webhooks: https://coveralls.io/webhook email: false diff --git a/Project.toml b/Project.toml index 97f52b31331..c63532e00f8 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,8 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" diff --git a/docs/make.jl b/docs/make.jl index 7e60a9fba79..c0dc5d00fef 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -40,6 +40,7 @@ makedocs( "Home" => "index.md", "Development" => "development.md", "Visualization" => "visualization.md", + "Parallelization" => "parallelization.md", "Style guide" => "styleguide.md", "GitHub & Git" => "github-git.md", "Reference" => [ diff --git a/docs/src/parallelization.md b/docs/src/parallelization.md new file mode 100644 index 00000000000..3909cd2c6b7 --- /dev/null +++ b/docs/src/parallelization.md @@ -0,0 +1,98 @@ +# Parallelization + +## Shared-memory parallelization with threads +Many compute-intensive loops in Trixi.jl are parallelized using the +[multi-threading](https://docs.julialang.org/en/v1/manual/multi-threading/) +support provided by Julia. You can recognize those loops by the +`Threads.@threads` macro prefixed to them, e.g., +```julia +Threads.@threads for element_id in 1:dg.n_elements + ... +end +``` +This will statically assign an equal iteration count to each available thread. + +To use multi-threading, you need to tell Julia at startup how many threads you +want to use by either setting the environment variable `JULIA_NUM_THREADS` or by +providing the `-t/--threads` command line argument. For example, to start Julia +with four threads, start Julia with +```bash +julia -t 4 +``` +If both the environment variable and the command line argument are specified at +the same time, the latter takes precedence. + + +## Distributed computing with MPI +In addition to the shared memory parallelization with multi-threading, Trixi.jl +supports distributed parallelism via +[MPI.jl](https://github.com/JuliaParallel/MPI.jl), which leverages the Message +Passing Interface (MPI). MPI.jl comes with its own MPI library binaries such +that there is no need to install MPI yourself. However, it is also possible to +instead use an existing MPI installation, which is recommended if you are +running MPI programs on a cluster or supercomputer +([see the MPI.jl docs](https://juliaparallel.github.io/MPI.jl/stable/configuration/) +to find out how to select the employed MPI library). + +To start Trixi in parallel with MPI, there are three options: + +1. **Run from the REPL with `mpiexec()`:** You can start a parallel execution directly from the + REPL by executing + ```julia + julia> using MPI + + julia> mpiexec() do cmd + run(`$cmd -n 3 $(Base.julia_cmd()) --project=. -e 'using Trixi; Trixi.run("examples/2d/parameters.toml")'`) + end + ``` + The parameter `-n 3` specifies that Trixi should run with three processes (or + *ranks* in MPI parlance) and should be adapted to your available + computing resources and problem size. The `$(Base.julia_cmd())` argument + ensures that Julia is executed in parallel with the same optimization level + etc. as you used for the REPL; if this is unnecessary or undesired, you can + also just use `julia`. Further, if you are not running Trixi from a local + clone but have installed it as a package, you need to omit the `--project=.`. +2. **Run from the command line with `mpiexecjl`:** Alternatively, you can + use the `mpiexecjl` script provided by MPI.jl, which allows you to start + Trixi in parallel directly from the command line. As a preparation, you need to + install the script *once* by running + ```julia + julia> using MPI + + julia> MPI.install_mpiexecjl(destdir="/somewhere/in/your/PATH") + ``` + Then, to execute Trixi in parallel, execute the following command from your + command line: + ```bash + mpiexecjl -n 3 julia --project=. -e 'using Trixi; Trixi.run("examples/2d/parameters.toml")' + ``` +3. **Run interactively with `tmpi` (Linux/MacOS only):** If you are on a + Linux/macOS system, you have a third option which lets you run Julia in + parallel interactively from the REPL. This comes in handy especially during + development, as in contrast to the first two options, it allows to reuse the + compilation cache and thus facilitates much faster startup times after the + first execution. It requires [tmux](https://github.com/tmux/tmux) and the + [OpenMPI](https://www.open-mpi.org) library to be installed before, both of + which are usually available through a package manager. Once you have + installed both tools, you need to configure MPI.jl to use the OpenMPI for + your system, which is explained + [here](https://juliaparallel.github.io/MPI.jl/stable/configuration/#Using-a-system-provided-MPI). + Then, you can download and install the + [tmpi](https://github.com/Azrael3000/tmpi) + script by executing + ```bash + curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /somewhere/in/your/PATH/tmpi + ``` + Finally, you can start and control multiple Julia REPLs simultaneously by + running + ```bash + tmpi 3 julia --project=. + ``` + This will start Julia inside `tmux` three times and multiplexes all commands + you enter in one REPL to all other REPLs (try for yourself to understand what + it means). If you have no prior experience with `tmux`, handling the REPL + this way feels slightly weird in the beginning. However, there is a lot of + documentation for `tmux` + [available](https://github.com/tmux/tmux/wiki/Getting-Started) and once you + get the hang of it, developing Trixi in parallel becomes much smoother this + way. diff --git a/src/Trixi.jl b/src/Trixi.jl index a34524beda4..9310902a011 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -15,13 +15,15 @@ module Trixi # Include other packages that are used in Trixi # (standard library packages first, other packages next, all of them sorted alphabetically) using LinearAlgebra: dot -using Pkg.TOML: parsefile +using Pkg.TOML: parsefile, parse using Printf: @printf, @sprintf, println using Profile: clear_malloc_data using Random: seed! using EllipsisNotation using HDF5: h5open, attrs +import MPI +using OffsetArrays: OffsetArray, OffsetVector using StaticArrays: @MVector, @SVector, MVector, MMatrix, MArray, SVector, SMatrix, SArray using TimerOutputs: @notimeit, @timeit, TimerOutput, print_timer, reset_timer! using UnPack: @unpack @@ -37,6 +39,7 @@ export globals # Include all top-level source files include("auxiliary/auxiliary.jl") +include("parallel/parallel.jl") include("equations/equations.jl") include("mesh/mesh.jl") include("solvers/solvers.jl") @@ -58,4 +61,9 @@ export flux_central, flux_lax_friedrichs, flux_hll, export examples_dir, get_examples, default_example +function __init__() + init_mpi() +end + + end diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 1d7108936ab..6e288c55c7e 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -13,10 +13,24 @@ const parameters = Dict{Symbol,Any}() # Parse parameters file into global dict -function parse_parameters_file(filename) +parse_parameters_file(filename) = parse_parameters_file(filename, mpi_parallel()) +function parse_parameters_file(filename, mpi_parallel::Val{false}) parameters[:default] = parsefile(filename) parameters[:default]["parameters_file"] = filename end +function parse_parameters_file(filename, mpi_parallel::Val{true}) + if mpi_isroot() + buffer = read(filename) + MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm()) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + else + count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm()) + buffer = Vector{UInt8}(undef, count[]) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + end + parameters[:default] = parse(String(buffer)) + parameters[:default]["parameters_file"] = filename +end # Return parameter by name, optionally taking a default value and a range of valid values. @@ -118,7 +132,7 @@ function print_startup_message() ██║ ██║ ██║██║██╔╝ ██╗██║ ╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═╝╚═╝ """ - println(s) + mpi_println(s) end diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index d513040f1a0..4d6befe9635 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -307,3 +307,15 @@ function clear!(c::AbstractContainer) return c end + + +# Helpful overloads for `raw_copy` +function raw_copy!(c::AbstractContainer, first::Int, last::Int, destination::Int) + raw_copy!(c, c, first, last, destination) +end +function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int) + raw_copy!(target, source, from, from, destination) +end +function raw_copy!(c::AbstractContainer, from::Int, destination::Int) + raw_copy!(c, c, from, from, destination) +end diff --git a/src/io/io.jl b/src/io/io.jl index 26598f8d742..7572b79958e 100644 --- a/src/io/io.jl +++ b/src/io/io.jl @@ -1,19 +1,19 @@ +include("parallel.jl") # Load restart file and store solution in solver -function load_restart_file!(dg::AbstractDg, restart_filename) +load_restart_file!(dg, restart_filename) = load_restart_file!(dg, restart_filename, mpi_parallel()) +function load_restart_file!(dg::AbstractDg, restart_filename, mpi_parallel::Val{false}) # Create variables to be returned later time = NaN step = -1 # Open file h5open(restart_filename, "r") do file - equation = equations(dg) - # Read attributes to perform some sanity checks if read(attrs(file)["ndims"]) != ndims(dg) error("restart mismatch: ndims in solver differs from value in restart file") end - if read(attrs(file)["equations"]) != get_name(equation) + if read(attrs(file)["equations"]) != get_name(equations(dg)) error("restart mismatch: equations in solver differs from value in restart file") end if read(attrs(file)["polydeg"]) != polydeg(dg) @@ -28,7 +28,7 @@ function load_restart_file!(dg::AbstractDg, restart_filename) step = read(attrs(file)["timestep"]) # Read data - varnames = varnames_cons(equation) + varnames = varnames_cons(equations(dg)) for v in 1:nvariables(dg) # Check if variable name matches var = file["variables_$v"] @@ -37,7 +37,6 @@ function load_restart_file!(dg::AbstractDg, restart_filename) end # Read variable - println("Reading variables_$v ($name)...") dg.elements.u[v, .., :] = read(file["variables_$v"]) end end @@ -48,7 +47,10 @@ end # Save current DG solution with some context information as a HDF5 file for # restarting. -function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep) +save_restart_file(dg, mesh, time, dt, timestep) = save_restart_file(dg, mesh, time, dt, timestep, + mpi_parallel()) +function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, + mpi_parallel::Val{false}) # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") mkpath(output_directory) @@ -62,14 +64,12 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep) # Open file (clobber existing content) h5open(filename * ".h5", "w") do file - equation = equations(dg) - # Add context information as attributes attrs(file)["ndims"] = ndims(dg) - attrs(file)["equations"] = get_name(equation) + attrs(file)["equations"] = get_name(equations(dg)) attrs(file)["polydeg"] = polydeg(dg) attrs(file)["n_vars"] = nvariables(dg) - attrs(file)["n_elements"] = dg.n_elements + attrs(file)["n_elements"] = dg.n_elements_global attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] attrs(file)["time"] = time attrs(file)["dt"] = dt @@ -77,7 +77,7 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep) # Restart files always store conservative variables data = dg.elements.u - varnames = varnames_cons(equation) + varnames = varnames_cons(equations(dg)) # Store each variable of the solution for v in 1:nvariables(dg) @@ -95,6 +95,11 @@ end # Save current DG solution with some context information as a HDF5 file for # postprocessing. function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system="") + return save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system, + mpi_parallel()) +end +function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system, + mpi_parallel::Val{false}) # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") mkpath(output_directory) @@ -112,11 +117,9 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, # Open file (clobber existing content) h5open(filename * ".h5", "w") do file - equation = equations(dg) - # Add context information as attributes attrs(file)["ndims"] = ndims(dg) - attrs(file)["equations"] = get_name(equation) + attrs(file)["equations"] = get_name(equations(dg)) attrs(file)["polydeg"] = polydeg(dg) attrs(file)["n_vars"] = nvariables(dg) attrs(file)["n_elements"] = dg.n_elements @@ -130,7 +133,7 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, valid=["conservative", "primitive"]) if solution_variables == "conservative" data = dg.elements.u - varnames = varnames_cons(equation) + varnames = varnames_cons(equations(dg)) else # Reinterpret the solution array as an array of conservative variables, # compute the primitive variables via broadcasting, and reinterpret the @@ -138,7 +141,7 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, data = Array(reinterpret(eltype(dg.elements.u), cons2prim.(reinterpret(SVector{nvariables(dg),eltype(dg.elements.u)}, dg.elements.u), Ref(equations(dg))))) - varnames = varnames_prim(equation) + varnames = varnames_prim(equations(dg)) end # Store each variable of the solution @@ -165,7 +168,8 @@ end # Save current mesh with some context information as an HDF5 file. -function save_mesh_file(mesh::TreeMesh, timestep=-1) +save_mesh_file(mesh, timestep=-1) = save_mesh_file(mesh, timestep, mpi_parallel()) +function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{false}) # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") mkpath(output_directory) diff --git a/src/io/parallel.jl b/src/io/parallel.jl new file mode 100644 index 00000000000..90bc821fa0a --- /dev/null +++ b/src/io/parallel.jl @@ -0,0 +1,248 @@ + +# Load restart file and store solution in solver +function load_restart_file!(dg::AbstractDg, restart_filename, mpi_parallel::Val{true}) + # Create variables to be returned later + time = NaN + step = -1 + + # Calculate node counts by MPI rank + element_size = nnodes(dg)^ndims(dg) + node_counts = convert(Vector{Cint}, collect(dg.n_elements_by_rank)) * Cint(element_size) + + if mpi_isroot() + # Open file + h5open(restart_filename, "r") do file + # Read attributes to perform some sanity checks + if read(attrs(file)["ndims"]) != ndims(dg) + error("restart mismatch: ndims in solver differs from value in restart file") + end + if read(attrs(file)["equations"]) != get_name(equations(dg)) + error("restart mismatch: equations in solver differs from value in restart file") + end + if read(attrs(file)["polydeg"]) != polydeg(dg) + error("restart mismatch: polynomial degree in solver differs from value in restart file") + end + if read(attrs(file)["n_elements"]) != dg.n_elements_global + error("restart mismatch: polynomial degree in solver differs from value in restart file") + end + + # Read time and time step + time = read(attrs(file)["time"]) + step = read(attrs(file)["timestep"]) + MPI.Bcast!(Ref(time), mpi_root(), mpi_comm()) + MPI.Bcast!(Ref(step), mpi_root(), mpi_comm()) + + # Read data + varnames = varnames_cons(equations(dg)) + for v in 1:nvariables(dg) + # Check if variable name matches + var = file["variables_$v"] + if (name = read(attrs(var)["name"])) != varnames[v] + error("mismatch: variables_$v should be '$(varnames[v])', but found '$name'") + end + + # Read variable + dg.elements.u[v, .., :] = MPI.Scatterv(read(file["variables_$v"]), node_counts, mpi_root(), mpi_comm()) + end + end + else # on non-root ranks, receive data from root + time = MPI.Bcast!(Ref(time), mpi_root(), mpi_comm())[] + step = MPI.Bcast!(Ref(step), mpi_root(), mpi_comm())[] + for v in 1:nvariables(dg) + # Read variable + dg.elements.u[v, .., :] = MPI.Scatterv(eltype(dg.elements.u)[], node_counts, mpi_root(), mpi_comm()) + end + end + + return time, step +end + +function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, + mpi_parallel::Val{true}) + # Calculate node counts by MPI rank + element_size = nnodes(dg)^ndims(dg) + node_counts = convert(Vector{Cint}, collect(dg.n_elements_by_rank)) * Cint(element_size) + + # Restart files always store conservative variables + data = dg.elements.u + varnames = varnames_cons(equations(dg)) + + # Only write from MPI root (poor man's version of parallel I/O) + if mpi_isroot() + # Create output directory (if it does not exist) + output_directory = parameter("output_directory", "out") + if mpi_isroot() + mkpath(output_directory) + end + + # Filename without extension based on current time step + filename = joinpath(output_directory, @sprintf("restart_%06d", timestep)) + + # Convert time and time step size to floats + time = convert(Float64, time) + dt = convert(Float64, dt) + + # Open file (clobber existing content) + h5open(filename * ".h5", "w") do file + # Add context information as attributes + attrs(file)["ndims"] = ndims(dg) + attrs(file)["equations"] = get_name(equations(dg)) + attrs(file)["polydeg"] = polydeg(dg) + attrs(file)["n_vars"] = nvariables(dg) + attrs(file)["n_elements"] = dg.n_elements_global + attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attrs(file)["time"] = time + attrs(file)["dt"] = dt + attrs(file)["timestep"] = timestep + + # Store each variable of the solution + for v in 1:nvariables(dg) + # Write to file + file["variables_$v"] = MPI.Gatherv(vec(data[v, .., :]), node_counts, mpi_root(), mpi_comm()) + + # Add variable name as attribute + var = file["variables_$v"] + attrs(var)["name"] = varnames[v] + end + end + else # non-root ranks only send data + # Send nodal data to root + for v in 1:nvariables(dg) + MPI.Gatherv(vec(data[v, .., :]), node_counts, mpi_root(), mpi_comm()) + end + end +end + + +# Save current DG solution with some context information as a HDF5 file for +# postprocessing. +function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system, + mpi_parallel::Val{true}) + + # Calculate element and node counts by MPI rank + element_size = nnodes(dg)^ndims(dg) + element_counts = convert(Vector{Cint}, collect(dg.n_elements_by_rank)) + node_counts = element_counts * Cint(element_size) + + # Convert to primitive variables if requested + solution_variables = parameter("solution_variables", "primitive", + valid=["conservative", "primitive"]) + if solution_variables == "conservative" + data = dg.elements.u + varnames = varnames_cons(equations(dg)) + else + # Reinterpret the solution array as an array of conservative variables, + # compute the primitive variables via broadcasting, and reinterpret the + # result as a plain array of floating point numbers + data = Array(reinterpret(eltype(dg.elements.u), + cons2prim.(reinterpret(SVector{nvariables(dg),eltype(dg.elements.u)}, dg.elements.u), + Ref(equations(dg))))) + varnames = varnames_prim(equations(dg)) + end + + # Only write from MPI root (poor man's version of parallel I/O) + if mpi_isroot() + # Create output directory (if it does not exist) + output_directory = parameter("output_directory", "out") + mkpath(output_directory) + + # Filename without extension based on current time step + if isempty(system) + filename = joinpath(output_directory, @sprintf("solution_%06d", timestep)) + else + filename = joinpath(output_directory, @sprintf("solution_%s_%06d", system, timestep)) + end + + # Convert time and time step size to floats + time = convert(Float64, time) + dt = convert(Float64, dt) + + # Open file (clobber existing content) + h5open(filename * ".h5", "w") do file + # Add context information as attributes + attrs(file)["ndims"] = ndims(dg) + attrs(file)["equations"] = get_name(equations(dg)) + attrs(file)["polydeg"] = polydeg(dg) + attrs(file)["n_vars"] = nvariables(dg) + attrs(file)["n_elements"] = dg.n_elements_global + attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attrs(file)["time"] = time + attrs(file)["dt"] = dt + attrs(file)["timestep"] = timestep + + # Store each variable of the solution + for v in 1:nvariables(dg) + # Write to file + file["variables_$v"] = MPI.Gatherv(vec(data[v, .., :]), node_counts, mpi_root(), mpi_comm()) + + # Add variable name as attribute + var = file["variables_$v"] + attrs(var)["name"] = varnames[v] + end + + # Store element variables + for (v, (key, element_variables)) in enumerate(dg.element_variables) + # Add to file + file["element_variables_$v"] = MPI.Gatherv(element_variables, element_counts, mpi_root(), mpi_comm()) + + # Add variable name as attribute + var = file["element_variables_$v"] + attrs(var)["name"] = string(key) + end + end + else # non-root ranks only send data + # Send nodal data to root + for v in 1:nvariables(dg) + MPI.Gatherv(vec(data[v, .., :]), node_counts, mpi_root(), mpi_comm()) + end + + # Send element data to root + for (v, (key, element_variables)) in enumerate(dg.element_variables) + MPI.Gatherv(element_variables, element_counts, mpi_root(), mpi_comm()) + end + end +end + + +# Save current mesh with some context information as an HDF5 file. +function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{true}) + # Create output directory (if it does not exist) + output_directory = parameter("output_directory", "out") + mpi_isroot() && mkpath(output_directory) + + # Determine file name based on existence of meaningful time step + if timestep >= 0 + filename = joinpath(output_directory, @sprintf("mesh_%06d", timestep)) + else + filename = joinpath(output_directory, "mesh") + end + + # Since the mesh is replicated on all ranks, only save from MPI root + if !mpi_isroot() + return filename * ".h5" + end + + # Create output directory (if it does not exist) + # Open file (clobber existing content) + h5open(filename * ".h5", "w") do file + # Add context information as attributes + n_cells = length(mesh.tree) + attrs(file)["ndims"] = ndims(mesh) + attrs(file)["n_cells"] = n_cells + attrs(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree) + attrs(file)["minimum_level"] = minimum_level(mesh.tree) + attrs(file)["maximum_level"] = maximum_level(mesh.tree) + attrs(file)["center_level_0"] = mesh.tree.center_level_0 + attrs(file)["length_level_0"] = mesh.tree.length_level_0 + attrs(file)["periodicity"] = collect(mesh.tree.periodicity) + + # Add tree data + file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells] + file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells] + file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells] + file["levels"] = @view mesh.tree.levels[1:n_cells] + file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells] + end + + return filename * ".h5" +end diff --git a/src/mesh/tree.jl b/src/mesh/abstract_tree.jl similarity index 58% rename from src/mesh/tree.jl rename to src/mesh/abstract_tree.jl index 209ff05abd9..91ee3e9b920 100644 --- a/src/mesh/tree.jl +++ b/src/mesh/abstract_tree.jl @@ -1,201 +1,61 @@ - -# Composite type that represents a NDIMS-dimensional tree. -# -# Implements everything required for AbstractContainer. -# -# Note: The way the data structures are set up and the way most algorithms -# work, it is *always* assumed that -# a) we have a balanced tree (= at most one level difference between -# neighboring cells, or 2:1 rule) -# b) we may not have all children (= some children may not exist) -# c) the tree is stored depth-first -# -# However, the way the refinement/coarsening algorithms are currently -# implemented, we only have fully refined cells. That is, a cell either has 2^NDIMS children or -# no children at all (= leaf cell). This restriction is also assumed at -# multiple positions in the refinement/coarsening algorithms. -# -# An exception to the 2:1 rule exists for the low-level `refine_unbalanced!` -# function, which is required for implementing level-wise refinement in a sane -# way. Also, depth-first ordering *might* not by guaranteed during -# refinement/coarsening operations. -mutable struct Tree{NDIMS} <: AbstractContainer - parent_ids::Vector{Int} - child_ids::Matrix{Int} - neighbor_ids::Matrix{Int} - levels::Vector{Int} - coordinates::Matrix{Float64} - original_cell_ids::Vector{Int} - - capacity::Int - length::Int - dummy::Int - - center_level_0::MVector{NDIMS, Float64} - length_level_0::Float64 - periodicity::NTuple{NDIMS, Bool} - - function Tree{NDIMS}(capacity::Integer) where NDIMS - # Verify that NDIMS is an integer - @assert NDIMS isa Integer - - # Create instance - t = new() - - # Initialize fields with defaults - # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations - t.parent_ids = fill(typemin(Int), capacity + 1) - t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) - t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1) - t.levels = fill(typemin(Int), capacity + 1) - t.coordinates = fill(NaN, NDIMS, capacity + 1) - t.original_cell_ids = fill(typemin(Int), capacity + 1) - - t.capacity = capacity - t.length = 0 - t.dummy = capacity + 1 - - t.center_level_0 = @MVector fill(NaN, NDIMS) - t.length_level_0 = NaN - - return t - end -end - - -# Constructor for passing the dimension as an argument -Tree(::Val{NDIMS}, args...) where NDIMS = Tree{NDIMS}(args...) - -# Create and initialize tree -function Tree{NDIMS}(capacity::Int, center::AbstractArray{Float64}, - length::Real, periodicity=true) where NDIMS - # Create instance - t = Tree{NDIMS}(capacity) - - # Initialize root cell - init!(t, center, length, periodicity) - - return t -end - -# Constructor accepting a single number as center (as opposed to an array) for 1D -Tree{1}(cap::Int, center::Real, len::Real, periodicity=true) = Tree{1}(cap, [convert(Float64, center)], len, periodicity) - - -# Clear tree with deleting data structures, store center and length, and create root cell -function init!(t::Tree, center::AbstractArray{Float64}, length::Real, periodicity=true) - clear!(t) - - # Set domain information - t.center_level_0 = center - t.length_level_0 = length - - # Create root cell - t.length += 1 - t.parent_ids[1] = 0 - t.child_ids[:, 1] .= 0 - t.levels[1] = 0 - t.coordinates[:, 1] .= t.center_level_0 - t.original_cell_ids[1] = 0 - - # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor - if all(periodicity) - # Also catches case where periodicity = true - t.neighbor_ids[:, 1] .= 1 - t.periodicity = ntuple(x->true, ndims(t)) - elseif !any(periodicity) - # Also catches case where periodicity = false - t.neighbor_ids[:, 1] .= 0 - t.periodicity = ntuple(x->false, ndims(t)) - else - # Default case if periodicity is an iterable - for dimension in 1:ndims(t) - if periodicity[dimension] - t.neighbor_ids[2 * dimension - 1, 1] = 1 - t.neighbor_ids[2 * dimension - 0, 1] = 1 - else - t.neighbor_ids[2 * dimension - 1, 1] = 0 - t.neighbor_ids[2 * dimension - 0, 1] = 0 - end - end - - t.periodicity = Tuple(periodicity) - end -end - - -# Convenience output for debugging -function Base.show(io::IO, t::Tree{NDIMS}) where NDIMS - l = t.length - println(io, '*'^20) - println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") - println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") - println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") - println(io, "t.levels[1:l] = $(t.levels[1:l])") - println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") - println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") - println(io, "t.capacity = $(t.capacity)") - println(io, "t.length = $(t.length)") - println(io, "t.dummy = $(t.dummy)") - println(io, "t.center_level_0 = $(t.center_level_0)") - println(io, "t.length_level_0 = $(t.length_level_0)") - println(io, '*'^20) -end +abstract type AbstractTree{NDIMS} <: AbstractContainer end # Type traits to obtain dimension -@inline Base.ndims(t::Type{Tree{NDIMS}}) where NDIMS = NDIMS -@inline Base.ndims(t::Tree) = ndims(typeof(t)) +@inline Base.ndims(::AbstractTree{NDIMS}) where NDIMS = NDIMS # Auxiliary methods to allow semantic queries on the tree # Check whether cell has parent cell -has_parent(t::Tree, cell_id::Int) = t.parent_ids[cell_id] > 0 +has_parent(t::AbstractTree, cell_id::Int) = t.parent_ids[cell_id] > 0 # Count number of children for a given cell -n_children(t::Tree, cell_id::Int) = count(x -> (x > 0), @view t.child_ids[:, cell_id]) +n_children(t::AbstractTree, cell_id::Int) = count(x -> (x > 0), @view t.child_ids[:, cell_id]) # Check whether cell has any child cell -has_children(t::Tree, cell_id::Int) = n_children(t, cell_id) > 0 +has_children(t::AbstractTree, cell_id::Int) = n_children(t, cell_id) > 0 # Check whether cell is leaf cell -is_leaf(t::Tree, cell_id::Int) = !has_children(t, cell_id) +is_leaf(t::AbstractTree, cell_id::Int) = !has_children(t, cell_id) # Check whether cell has specific child cell -has_child(t::Tree, cell_id::Int, child::Int) = t.child_ids[child, cell_id] > 0 +has_child(t::AbstractTree, cell_id::Int, child::Int) = t.child_ids[child, cell_id] > 0 # Check if cell has a neighbor at the same refinement level in the given direction -has_neighbor(t::Tree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0 +has_neighbor(t::AbstractTree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0 # Check if cell has a coarse neighbor, i.e., with one refinement level lower -function has_coarse_neighbor(t::Tree, cell_id::Int, direction::Int) +function has_coarse_neighbor(t::AbstractTree, cell_id::Int, direction::Int) return has_parent(t, cell_id) && has_neighbor(t, t.parent_ids[cell_id], direction) end # Check if cell has any neighbor (same-level or lower-level) -function has_any_neighbor(t::Tree, cell_id::Int, direction::Int) +function has_any_neighbor(t::AbstractTree, cell_id::Int, direction::Int) return has_neighbor(t, cell_id, direction) || has_coarse_neighbor(t, cell_id, direction) end +# Check if cell is own cell, i.e., belongs to this MPI rank +is_own_cell(t::AbstractTree, cell_id) = true + # Return cell length for a given level -length_at_level(t::Tree, level::Int) = t.length_level_0 / 2^level +length_at_level(t::AbstractTree, level::Int) = t.length_level_0 / 2^level # Return cell length for a given cell -length_at_cell(t::Tree, cell_id::Int) = length_at_level(t, t.levels[cell_id]) +length_at_cell(t::AbstractTree, cell_id::Int) = length_at_level(t, t.levels[cell_id]) # Return minimum level of any leaf cell -minimum_level(t::Tree) = minimum(t.levels[leaf_cells(t)]) +minimum_level(t::AbstractTree) = minimum(t.levels[leaf_cells(t)]) # Return maximum level of any leaf cell -maximum_level(t::Tree) = maximum(t.levels[leaf_cells(t)]) +maximum_level(t::AbstractTree) = maximum(t.levels[leaf_cells(t)]) # Check if tree is periodic -isperiodic(t::Tree) = all(t.periodicity) -isperiodic(t::Tree, dimension) = t.periodicity[dimension] +isperiodic(t::AbstractTree) = all(t.periodicity) +isperiodic(t::AbstractTree, dimension) = t.periodicity[dimension] # Auxiliary methods for often-required calculations # Number of potential child cells -n_children_per_cell(::Tree{NDIMS}) where NDIMS = 2^NDIMS +n_children_per_cell(::AbstractTree{NDIMS}) where NDIMS = 2^NDIMS n_children_per_cell(dims::Integer) = 2^dims # Number of directions @@ -207,7 +67,7 @@ n_children_per_cell(dims::Integer) = 2^dims # 4 -> +y # 5 -> -z # 6 -> +z -n_directions(::Tree{NDIMS}) where NDIMS = 2 * NDIMS +n_directions(::AbstractTree{NDIMS}) where NDIMS = 2 * NDIMS # For a given direction, return its opposite direction # @@ -260,7 +120,7 @@ end # # The function `f` is passed the cell id of each leaf cell # as an argument. -function filter_leaf_cells(f, t::Tree) +function filter_leaf_cells(f, t::AbstractTree) filtered = Vector{Int}(undef, length(t)) count = 0 for cell_id in 1:length(t) @@ -275,21 +135,29 @@ end # Return an array with the ids of all leaf cells -leaf_cells(t::Tree) = filter_leaf_cells((cell_id)->true, t) +leaf_cells(t::AbstractTree) = filter_leaf_cells((cell_id)->true, t) + + +# Return an array with the ids of all leaf cells for a given rank +leaf_cells_by_rank(t::AbstractTree, rank) = leaf_cells(t) + + +# Return an array with the ids of all local leaf cells +local_leaf_cells(t::AbstractTree) = leaf_cells_by_rank(t, mpi_rank()) # Count the number of leaf cells. -count_leaf_cells(t::Tree) = length(leaf_cells(t)) +count_leaf_cells(t::AbstractTree) = length(leaf_cells(t)) # Store cell id in each cell to use for post-AMR analysis -function reset_original_cell_ids!(t::Tree) +function reset_original_cell_ids!(t::AbstractTree) t.original_cell_ids[1:length(t)] .= 1:length(t) end # Refine entire tree by one level -refine!(t::Tree) = refine!(t, leaf_cells(t)) +refine!(t::AbstractTree) = refine!(t, leaf_cells(t)) # Refine given cells and rebalance tree. @@ -298,7 +166,7 @@ refine!(t::Tree) = refine!(t, leaf_cells(t)) # otherwise the 2:1 rule would be violated, which can cause more # refinements. # Note 2: Rebalancing currently only considers *Cartesian* neighbors, not diagonal neighbors! -function refine!(t::Tree, cell_ids) +function refine!(t::AbstractTree, cell_ids) # Reset original cell ids such that each cell knows its current id reset_original_cell_ids!(t) @@ -328,7 +196,7 @@ end # Refine all leaf cells with coordinates in a given rectangular box -function refine_box!(t::Tree{NDIMS}, coordinates_min::AbstractArray{Float64}, +function refine_box!(t::AbstractTree{NDIMS}, coordinates_min::AbstractArray{Float64}, coordinates_max::AbstractArray{Float64}) where NDIMS for dim in 1:NDIMS @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." @@ -345,7 +213,7 @@ function refine_box!(t::Tree{NDIMS}, coordinates_min::AbstractArray{Float64}, end # Convenience method for 1D -function refine_box!(t::Tree{1}, coordinates_min::Real, coordinates_max::Real) +function refine_box!(t::AbstractTree{1}, coordinates_min::Real, coordinates_max::Real) return refine_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) end @@ -356,7 +224,7 @@ end # Note 2: The current algorithm assumes that a previous refinement step has # created level differences of at most 2. That is, before the previous # refinement step, the tree was balanced. -function rebalance!(t::Tree, refined_cell_ids) +function rebalance!(t::AbstractTree, refined_cell_ids) # Create buffer for newly refined cells to_refine = zeros(Int, n_directions(t) * length(refined_cell_ids)) count = 0 @@ -396,83 +264,14 @@ end # Refine given cells without rebalancing tree. # # Note: After a call to this method the tree may be unbalanced! -function refine_unbalanced!(t::Tree, cell_ids) - # Store actual ids refined cells (shifted due to previous insertions) - refined = zeros(Int, length(cell_ids)) - - # Loop over all cells that are to be refined - for (count, original_cell_id) in enumerate(sort(unique(cell_ids))) - # Determine actual cell id, taking into account previously inserted cells - n_children = n_children_per_cell(t) - cell_id = original_cell_id + (count - 1) * n_children - refined[count] = cell_id - - @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined" - - # Insert new cells directly behind parent (depth-first) - insert!(t, cell_id + 1, n_children) - - # Flip sign of refined cell such that we can easily find it later - t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id] - - # Initialize child cells - for child in 1:n_children - # Set child information based on parent - child_id = cell_id + child - t.parent_ids[child_id] = cell_id - t.child_ids[child, cell_id] = child_id - t.neighbor_ids[:, child_id] .= 0 - t.child_ids[:, child_id] .= 0 - t.levels[child_id] = t.levels[cell_id] + 1 - t.coordinates[:, child_id] .= child_coordinates( - t, t.coordinates[:, cell_id], length_at_cell(t, cell_id), child) - t.original_cell_ids[child_id] = 0 - - # For determining neighbors, use neighbor connections of parent cell - for direction in 1:n_directions(t) - # If neighbor is a sibling, establish one-sided connectivity - # Note: two-sided is not necessary, as each sibling will do this - if has_sibling(child, direction) - adjacent = adjacent_child(child, direction) - neighbor_id = cell_id + adjacent - - t.neighbor_ids[direction, child_id] = neighbor_id - continue - end - - # Skip if original cell does have no neighbor in direction - if !has_neighbor(t, cell_id, direction) - continue - end - - # Otherwise, check if neighbor has children - if not, skip again - neighbor_id = t.neighbor_ids[direction, cell_id] - if !has_children(t, neighbor_id) - continue - end - - # Check if neighbor has corresponding child and if yes, establish connectivity - adjacent = adjacent_child(child, direction) - if has_child(t, neighbor_id, adjacent) - neighbor_child_id = t.child_ids[adjacent, neighbor_id] - opposite = opposite_direction(direction) - - t.neighbor_ids[direction, child_id] = neighbor_child_id - t.neighbor_ids[opposite, neighbor_child_id] = child_id - end - end - end - end - - return refined -end +function refine_unbalanced!(t::AbstractTree, cell_ids) end # Wrap single-cell refinements such that `sort(...)` does not complain -refine_unbalanced!(t::Tree, cell_id::Int) = refine_unbalanced!(t, [cell_id]) +refine_unbalanced!(t::AbstractTree, cell_id::Int) = refine_unbalanced!(t, [cell_id]) # Coarsen entire tree by one level -function coarsen!(t::Tree) +function coarsen!(t::AbstractTree) # Special case: if there is only one cell (root), there is nothing to do if length(t) == 1 return Int[] @@ -491,7 +290,7 @@ end # was already refined. Since it is generally not desired that cells are # coarsened without specifically asking for it, these cells will then *not* be # coarsened. -function coarsen!(t::Tree, cell_ids::AbstractArray{Int}) +function coarsen!(t::AbstractTree, cell_ids::AbstractArray{Int}) # Return early if array is empty if length(cell_ids) == 0 return Int[] @@ -608,11 +407,11 @@ function coarsen!(t::Tree, cell_ids::AbstractArray{Int}) end # Wrap single-cell coarsening such that `sort(...)` does not complain -coarsen!(t::Tree, cell_id::Int) = coarsen!(t::Tree, [cell_id]) +coarsen!(t::AbstractTree, cell_id::Int) = coarsen!(t::AbstractTree, [cell_id]) # Coarsen all viable parent cells with coordinates in a given rectangular box -function coarsen_box!(t::Tree{NDIMS}, coordinates_min::AbstractArray{Float64}, +function coarsen_box!(t::AbstractTree{NDIMS}, coordinates_min::AbstractArray{Float64}, coordinates_max::AbstractArray{Float64}) where NDIMS for dim in 1:NDIMS @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." @@ -638,13 +437,13 @@ function coarsen_box!(t::Tree{NDIMS}, coordinates_min::AbstractArray{Float64}, end # Convenience method for 1D -function coarsen_box!(t::Tree{1}, coordinates_min::Real, coordinates_max::Real) +function coarsen_box!(t::AbstractTree{1}, coordinates_min::Real, coordinates_max::Real) return coarsen_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) end # Return coordinates of a child cell based on its relative position to the parent. -function child_coordinates(::Tree{NDIMS}, parent_coordinates, parent_length::Number, child::Int) where NDIMS +function child_coordinates(::AbstractTree{NDIMS}, parent_coordinates, parent_length::Number, child::Int) where NDIMS # Calculate length of child cells and set up data structure child_length = parent_length / 2 coordinates = MVector{NDIMS, Float64}(undef) @@ -661,26 +460,13 @@ end # Reset range of cells to values that are prone to cause errors as soon as they are used. # # Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible. -function invalidate!(t::Tree, first::Int, last::Int) - @assert first > 0 - @assert last <= t.capacity + 1 - - # Integer values are set to smallest negative value, floating point values to NaN - t.parent_ids[first:last] .= typemin(Int) - t.child_ids[:, first:last] .= typemin(Int) - t.neighbor_ids[:, first:last] .= typemin(Int) - t.levels[first:last] .= typemin(Int) - t.coordinates[:, first:last] .= NaN - t.original_cell_ids[first:last] .= typemin(Int) - - return nothing -end -invalidate!(t::Tree, id::Int) = invalidate!(t, id, id) -invalidate!(t::Tree) = invalidate!(t, 1, length(t)) +function invalidate!(t::AbstractTree, first::Int, last::Int) end +invalidate!(t::AbstractTree, id::Int) = invalidate!(t, id, id) +invalidate!(t::AbstractTree) = invalidate!(t, 1, length(t)) # Delete connectivity with parents/children/neighbors before cells are erased -function delete_connectivity!(t::Tree, first::Int, last::Int) +function delete_connectivity!(t::AbstractTree, first::Int, last::Int) @assert first > 0 @assert first <= last @assert last <= t.capacity + 1 @@ -716,7 +502,7 @@ end # Move connectivity with parents/children/neighbors after cells have been moved -function move_connectivity!(t::Tree, first::Int, last::Int, destination::Int) +function move_connectivity!(t::AbstractTree, first::Int, last::Int, destination::Int) @assert first > 0 @assert first <= last @assert last <= t.capacity + 1 @@ -788,35 +574,8 @@ end # Raw copy operation for ranges of cells. # # This method is used by the higher-level copy operations for AbstractContainer -function raw_copy!(target::Tree, source::Tree, first::Int, last::Int, destination::Int) - copy_data!(target.parent_ids, source.parent_ids, first, last, destination) - copy_data!(target.child_ids, source.child_ids, first, last, destination, - n_children_per_cell(target)) - copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, - destination, n_directions(target)) - copy_data!(target.levels, source.levels, first, last, destination) - copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) - copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) -end -function raw_copy!(c::AbstractContainer, first::Int, last::Int, destination::Int) - raw_copy!(c, c, first, last, destination) -end -function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int) - raw_copy!(target, source, from, from, destination) -end -function raw_copy!(c::AbstractContainer, from::Int, destination::Int) - raw_copy!(c, c, from, from, destination) -end +function raw_copy!(target::AbstractTree, source::AbstractTree, first::Int, last::Int, destination::Int) end # Reset data structures by recreating all internal storage containers and invalidating all elements -function reset_data_structures!(t::Tree{NDIMS}) where NDIMS - t.parent_ids = Vector{Int}(undef, t.capacity + 1) - t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) - t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1) - t.levels = Vector{Int}(undef, t.capacity + 1) - t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) - t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) - - invalidate!(t, 1, capacity(t) + 1) -end +function reset_data_structures!(t::AbstractTree{NDIMS}) where NDIMS end diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index db377ff2b24..f41f8f4bd73 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -1,46 +1,54 @@ - -include("tree.jl") +include("abstract_tree.jl") +include("serial_tree.jl") +include("parallel_tree.jl") +include("parallel.jl") # Composite type to hold the actual tree in addition to other mesh-related data # that is not strictly part of the tree. -mutable struct TreeMesh{D} - tree::Tree{D} +mutable struct TreeMesh{TreeType<:AbstractTree} + tree::TreeType current_filename::String unsaved_changes::Bool + first_cell_by_rank::OffsetVector{Int, Vector{Int}} + n_cells_by_rank::OffsetVector{Int, Vector{Int}} - function TreeMesh{D}(n_cells_max::Integer) where D - # Verify that D is an integer - @assert D isa Integer - + function TreeMesh{TreeType}(n_cells_max::Integer) where TreeType # Create mesh m = new() - m.tree = Tree{D}(n_cells_max) + m.tree = TreeType(n_cells_max) m.current_filename = "" m.unsaved_changes = false + m.first_cell_by_rank = OffsetVector(Int[], 0) + m.n_cells_by_rank = OffsetVector(Int[], 0) return m end - function TreeMesh{D}(n_cells_max::Integer, domain_center::AbstractArray{Float64}, - domain_length, periodicity=true) where D - # Verify that D is an integer - @assert D isa Integer - + function TreeMesh{TreeType}(n_cells_max::Integer, domain_center::AbstractArray{Float64}, + domain_length, periodicity=true) where TreeType # Create mesh m = new() - m.tree = Tree{D}(n_cells_max, domain_center, domain_length, periodicity) + m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity) m.current_filename = "" m.unsaved_changes = false + m.first_cell_by_rank = OffsetVector(Int[], 0) + m.n_cells_by_rank = OffsetVector(Int[], 0) return m end end -# Constructor for passing the dimension as an argument -TreeMesh(::Val{D}, args...) where D = TreeMesh{D}(args...) +const TreeMesh1D = TreeMesh{TreeType} where {TreeType <: AbstractTree{1}} +const TreeMesh2D = TreeMesh{TreeType} where {TreeType <: AbstractTree{2}} +const TreeMesh3D = TreeMesh{TreeType} where {TreeType <: AbstractTree{3}} + +# Constructor for passing the dimension and mesh type as an argument +TreeMesh(::Type{TreeType}, args...) where TreeType = TreeMesh{TreeType}(args...) # Constructor accepting a single number as center (as opposed to an array) for 1D -TreeMesh{1}(n::Int, center::Real, len::Real, periodicity=true) = TreeMesh{1}(n, [convert(Float64, center)], len, periodicity) +function TreeMesh{TreeType}(n::Int, center::Real, len::Real, periodicity=true) where {TreeType<:AbstractTree{1}} + return TreeMesh{TreeType}(n, [convert(Float64, center)], len, periodicity) +end @inline Base.ndims(mesh::TreeMesh) = ndims(mesh.tree) @@ -66,7 +74,12 @@ function generate_mesh() periodicity = parameter("periodicity", true) # Create mesh - @timeit timer() "creation" mesh = TreeMesh(Val{ndims_}(), n_cells_max, domain_center, + if mpi_isparallel() + tree_type = ParallelTree{ndims_} + else + tree_type = SerialTree{ndims_} + end + @timeit timer() "creation" mesh = TreeMesh(tree_type, n_cells_max, domain_center, domain_length, periodicity) # Create initial refinement @@ -77,6 +90,7 @@ function generate_mesh() # Apply refinement patches @timeit timer() "refinement patches" for patch in parameter("refinement_patches", []) + mpi_isparallel() && error("non-uniform meshes not supported in parallel") if patch["type"] == "box" refine_box!(mesh.tree, patch["coordinates_min"], patch["coordinates_max"]) else @@ -86,6 +100,7 @@ function generate_mesh() # Apply coarsening patches @timeit timer() "coarsening patches" for patch in parameter("coarsening_patches", []) + mpi_isparallel() && error("non-uniform meshes not supported in parallel") if patch["type"] == "box" coarsen_box!(mesh.tree, patch["coordinates_min"], patch["coordinates_max"]) else @@ -93,12 +108,18 @@ function generate_mesh() end end + # Partition mesh + if mpi_isparallel() + partition!(mesh) + end + return mesh end # Load existing mesh from file -function load_mesh(restart_filename) +load_mesh(restart_filename) = load_mesh(restart_filename, mpi_parallel()) +function load_mesh(restart_filename, mpi_parallel::Val{false}) # Get number of spatial dimensions ndims_ = parameter("ndims") @@ -106,10 +127,10 @@ function load_mesh(restart_filename) n_cells_max = parameter("n_cells_max") # Create mesh - @timeit timer() "creation" mesh = TreeMesh(Val{ndims_}(), n_cells_max) + @timeit timer() "creation" mesh = TreeMesh(SerialTree{ndims_}, n_cells_max) # Determine mesh filename - filename = get_restart_mesh_filename(restart_filename) + filename = get_restart_mesh_filename(restart_filename, Val(false)) mesh.current_filename = filename mesh.unsaved_changes = false @@ -137,7 +158,8 @@ end # Obtain the mesh filename from a restart file -function get_restart_mesh_filename(restart_filename) +get_restart_mesh_filename(restart_filename) = get_restart_mesh_filename(restart_filename, mpi_parallel()) +function get_restart_mesh_filename(restart_filename, mpi_parallel::Val{false}) # Get directory name dirname, _ = splitdir(restart_filename) diff --git a/src/mesh/parallel.jl b/src/mesh/parallel.jl new file mode 100644 index 00000000000..fca6ad1fd84 --- /dev/null +++ b/src/mesh/parallel.jl @@ -0,0 +1,128 @@ +# Partition mesh using a static domain decomposition algorithm based on leaf cell count alone +function partition!(mesh) + # Determine number of leaf cells per rank + leaves = leaf_cells(mesh.tree) + @assert length(leaves) > mpi_nranks() + n_leaves_per_rank = OffsetArray(fill(div(length(leaves), mpi_nranks()), mpi_nranks()), + 0:(mpi_nranks() - 1)) + for d in 0:(rem(length(leaves), mpi_nranks()) - 1) + n_leaves_per_rank[d] += 1 + end + @assert sum(n_leaves_per_rank) == length(leaves) + + # Assign MPI ranks to all cells such that all ancestors of each cell - if not yet assigned to a + # rank - belong to the same rank + mesh.first_cell_by_rank = similar(n_leaves_per_rank) + mesh.n_cells_by_rank = similar(n_leaves_per_rank) + + leaf_count = 0 + last_id = leaves[n_leaves_per_rank[0]] + mesh.first_cell_by_rank[0] = 1 + mesh.n_cells_by_rank[0] = last_id + mesh.tree.mpi_ranks[1:last_id] .= 0 + for d in 1:(length(n_leaves_per_rank)-1) + leaf_count += n_leaves_per_rank[d-1] + last_id = leaves[leaf_count + n_leaves_per_rank[d]] + mesh.first_cell_by_rank[d] = mesh.first_cell_by_rank[d-1] + mesh.n_cells_by_rank[d-1] + mesh.n_cells_by_rank[d] = last_id - mesh.first_cell_by_rank[d] + 1 + mesh.tree.mpi_ranks[mesh.first_cell_by_rank[d]:last_id] .= d + end + + @assert all(x->x >= 0, mesh.tree.mpi_ranks[1:length(mesh.tree)]) + @assert sum(mesh.n_cells_by_rank) == length(mesh.tree) + + return nothing +end + + +function load_mesh(restart_filename, mpi_parallel::Val{true}) + # Get number of spatial dimensions + ndims_ = parameter("ndims") + + # Get maximum number of cells that should be supported + n_cells_max = parameter("n_cells_max") + + # Create mesh + @timeit timer() "creation" mesh = TreeMesh(ParallelTree{ndims_}, n_cells_max) + + # Determine mesh filename + filename = get_restart_mesh_filename(restart_filename, Val(true)) + mesh.current_filename = filename + mesh.unsaved_changes = false + + # Read mesh file + if mpi_isroot() + h5open(filename, "r") do file + # Set domain information + mesh.tree.center_level_0 = read(attrs(file)["center_level_0"]) + mesh.tree.length_level_0 = read(attrs(file)["length_level_0"]) + mesh.tree.periodicity = Tuple(read(attrs(file)["periodicity"])) + MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm()) + MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm()) + MPI.Bcast!(collect(mesh.tree.periodicity), mpi_root(), mpi_comm()) + + # Set length + n_cells = read(attrs(file)["n_cells"]) + MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm()) + resize!(mesh.tree, n_cells) + + # Read in data + mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"]) + mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"]) + mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"]) + mesh.tree.levels[1:n_cells] = read(file["levels"]) + mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"]) + @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), mpi_comm()) + end + else # non-root ranks + # Set domain information + mesh.tree.center_level_0 = MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm()) + mesh.tree.length_level_0 = MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm())[1] + mesh.tree.periodicity = Tuple(MPI.Bcast!(collect(mesh.tree.periodicity), mpi_root(), mpi_comm())) + + # Set length + n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] + resize!(mesh.tree, n_cells) + + # Read in data + @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), mpi_comm()) + end + + # Partition mesh + partition!(mesh) + + return mesh +end + +function get_restart_mesh_filename(restart_filename, mpi_parallel::Val{true}) + # Get directory name + dirname, _ = splitdir(restart_filename) + + if mpi_isroot() + # Read mesh filename from restart file + mesh_file = "" + h5open(restart_filename, "r") do file + mesh_file = read(attrs(file)["mesh_file"]) + end + + buffer = Vector{UInt8}(mesh_file) + MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm()) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + else # non-root ranks + count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm()) + buffer = Vector{UInt8}(undef, count[]) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + mesh_file = String(buffer) + end + + # Construct and return filename + return joinpath(dirname, mesh_file) +end diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl new file mode 100644 index 00000000000..a9c6d71b624 --- /dev/null +++ b/src/mesh/parallel_tree.jl @@ -0,0 +1,286 @@ + +# Composite type that represents a NDIMS-dimensional tree (parallel version). +# +# Implements everything required for AbstractContainer. +# +# Note: The way the data structures are set up and the way most algorithms +# work, it is *always* assumed that +# a) we have a balanced tree (= at most one level difference between +# neighboring cells, or 2:1 rule) +# b) we may not have all children (= some children may not exist) +# c) the tree is stored depth-first +# +# However, the way the refinement/coarsening algorithms are currently +# implemented, we only have fully refined cells. That is, a cell either has 2^NDIMS children or +# no children at all (= leaf cell). This restriction is also assumed at +# multiple positions in the refinement/coarsening algorithms. +# +# An exception to the 2:1 rule exists for the low-level `refine_unbalanced!` +# function, which is required for implementing level-wise refinement in a sane +# way. Also, depth-first ordering *might* not by guaranteed during +# refinement/coarsening operations. +mutable struct ParallelTree{NDIMS} <: AbstractTree{NDIMS} + parent_ids::Vector{Int} + child_ids::Matrix{Int} + neighbor_ids::Matrix{Int} + levels::Vector{Int} + coordinates::Matrix{Float64} + original_cell_ids::Vector{Int} + mpi_ranks::Vector{Int} + + capacity::Int + length::Int + dummy::Int + + center_level_0::SVector{NDIMS, Float64} + length_level_0::Float64 + periodicity::NTuple{NDIMS, Bool} + + function ParallelTree{NDIMS}(capacity::Integer) where NDIMS + # Verify that NDIMS is an integer + @assert NDIMS isa Integer + + # Create instance + t = new() + + # Initialize fields with defaults + # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations + t.parent_ids = fill(typemin(Int), capacity + 1) + t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) + t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1) + t.levels = fill(typemin(Int), capacity + 1) + t.coordinates = fill(NaN, NDIMS, capacity + 1) + t.original_cell_ids = fill(typemin(Int), capacity + 1) + t.mpi_ranks = fill(typemin(Int), capacity + 1) + + t.capacity = capacity + t.length = 0 + t.dummy = capacity + 1 + + t.center_level_0 = @SVector fill(NaN, NDIMS) + t.length_level_0 = NaN + + return t + end +end + + +# Constructor for passing the dimension as an argument +ParallelTree(::Val{NDIMS}, args...) where NDIMS = ParallelTree{NDIMS}(args...) + +# Create and initialize tree +function ParallelTree{NDIMS}(capacity::Int, center::AbstractArray{Float64}, + length::Real, periodicity=true) where NDIMS + # Create instance + t = ParallelTree{NDIMS}(capacity) + + # Initialize root cell + init!(t, center, length, periodicity) + + return t +end + +# Constructor accepting a single number as center (as opposed to an array) for 1D +ParallelTree{1}(cap::Int, center::Real, len::Real, periodicity=true) = ParallelTree{1}(cap, [convert(Float64, center)], len, periodicity) + + +# Clear tree with deleting data structures, store center and length, and create root cell +function init!(t::ParallelTree, center::AbstractArray{Float64}, length::Real, periodicity=true) + clear!(t) + + # Set domain information + t.center_level_0 = center + t.length_level_0 = length + + # Create root cell + t.length += 1 + t.parent_ids[1] = 0 + t.child_ids[:, 1] .= 0 + t.levels[1] = 0 + t.coordinates[:, 1] .= t.center_level_0 + t.original_cell_ids[1] = 0 + t.mpi_ranks[1] = typemin(Int) + + # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor + if all(periodicity) + # Also catches case where periodicity = true + t.neighbor_ids[:, 1] .= 1 + t.periodicity = ntuple(x->true, ndims(t)) + elseif !any(periodicity) + # Also catches case where periodicity = false + t.neighbor_ids[:, 1] .= 0 + t.periodicity = ntuple(x->false, ndims(t)) + else + # Default case if periodicity is an iterable + for dimension in 1:ndims(t) + if periodicity[dimension] + t.neighbor_ids[2 * dimension - 1, 1] = 1 + t.neighbor_ids[2 * dimension - 0, 1] = 1 + else + t.neighbor_ids[2 * dimension - 1, 1] = 0 + t.neighbor_ids[2 * dimension - 0, 1] = 0 + end + end + + t.periodicity = Tuple(periodicity) + end +end + + +# Convenience output for debugging +function Base.show(io::IO, ::MIME"text/plain", t::ParallelTree{NDIMS}) where NDIMS + l = t.length + println(io, '*'^20) + println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") + println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") + println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") + println(io, "t.levels[1:l] = $(t.levels[1:l])") + println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") + println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") + println(io, "t.mpi_ranks[1:l] = $(t.mpi_ranks[1:l])") + println(io, "t.capacity = $(t.capacity)") + println(io, "t.length = $(t.length)") + println(io, "t.dummy = $(t.dummy)") + println(io, "t.center_level_0 = $(t.center_level_0)") + println(io, "t.length_level_0 = $(t.length_level_0)") + println(io, '*'^20) +end + + +# Check if cell is own cell, i.e., belongs to this MPI rank +is_own_cell(t::ParallelTree, cell_id) = t.mpi_ranks[cell_id] == mpi_rank() + + +# Return an array with the ids of all leaf cells for a given rank +leaf_cells_by_rank(t::ParallelTree, rank) = filter_leaf_cells(t) do cell_id + t.mpi_ranks[cell_id] == rank + end + +# Return an array with the ids of all local leaf cells +local_leaf_cells(t::ParallelTree) = leaf_cells_by_rank(t, mpi_rank()) + + +# Refine given cells without rebalancing tree. +# +# Note: After a call to this method the tree may be unbalanced! +function refine_unbalanced!(t::ParallelTree, cell_ids) + # Store actual ids refined cells (shifted due to previous insertions) + refined = zeros(Int, length(cell_ids)) + + # Loop over all cells that are to be refined + for (count, original_cell_id) in enumerate(sort(unique(cell_ids))) + # Determine actual cell id, taking into account previously inserted cells + n_children = n_children_per_cell(t) + cell_id = original_cell_id + (count - 1) * n_children + refined[count] = cell_id + + @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined" + + # Insert new cells directly behind parent (depth-first) + insert!(t, cell_id + 1, n_children) + + # Flip sign of refined cell such that we can easily find it later + t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id] + + # Initialize child cells + for child in 1:n_children + # Set child information based on parent + child_id = cell_id + child + t.parent_ids[child_id] = cell_id + t.child_ids[child, cell_id] = child_id + t.neighbor_ids[:, child_id] .= 0 + t.child_ids[:, child_id] .= 0 + t.levels[child_id] = t.levels[cell_id] + 1 + t.coordinates[:, child_id] .= child_coordinates( + t, t.coordinates[:, cell_id], length_at_cell(t, cell_id), child) + t.original_cell_ids[child_id] = 0 + t.mpi_ranks[child_id] = t.mpi_ranks[cell_id] + + # For determining neighbors, use neighbor connections of parent cell + for direction in 1:n_directions(t) + # If neighbor is a sibling, establish one-sided connectivity + # Note: two-sided is not necessary, as each sibling will do this + if has_sibling(child, direction) + adjacent = adjacent_child(child, direction) + neighbor_id = cell_id + adjacent + + t.neighbor_ids[direction, child_id] = neighbor_id + continue + end + + # Skip if original cell does have no neighbor in direction + if !has_neighbor(t, cell_id, direction) + continue + end + + # Otherwise, check if neighbor has children - if not, skip again + neighbor_id = t.neighbor_ids[direction, cell_id] + if !has_children(t, neighbor_id) + continue + end + + # Check if neighbor has corresponding child and if yes, establish connectivity + adjacent = adjacent_child(child, direction) + if has_child(t, neighbor_id, adjacent) + neighbor_child_id = t.child_ids[adjacent, neighbor_id] + opposite = opposite_direction(direction) + + t.neighbor_ids[direction, child_id] = neighbor_child_id + t.neighbor_ids[opposite, neighbor_child_id] = child_id + end + end + end + end + + return refined +end + + +# Reset range of cells to values that are prone to cause errors as soon as they are used. +# +# Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible. +function invalidate!(t::ParallelTree, first::Int, last::Int) + @assert first > 0 + @assert last <= t.capacity + 1 + + # Integer values are set to smallest negative value, floating point values to NaN + t.parent_ids[first:last] .= typemin(Int) + t.child_ids[:, first:last] .= typemin(Int) + t.neighbor_ids[:, first:last] .= typemin(Int) + t.levels[first:last] .= typemin(Int) + t.coordinates[:, first:last] .= NaN + t.original_cell_ids[first:last] .= typemin(Int) + t.mpi_ranks[first:last] .= typemin(Int) + + return nothing +end + + +# Raw copy operation for ranges of cells. +# +# This method is used by the higher-level copy operations for AbstractContainer +function raw_copy!(target::ParallelTree, source::ParallelTree, first::Int, last::Int, destination::Int) + copy_data!(target.parent_ids, source.parent_ids, first, last, destination) + copy_data!(target.child_ids, source.child_ids, first, last, destination, + n_children_per_cell(target)) + copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, + destination, n_directions(target)) + copy_data!(target.levels, source.levels, first, last, destination) + copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) + copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) + copy_data!(target.mpi_ranks, source.mpi_ranks, first, last, destination) +end + + +# Reset data structures by recreating all internal storage containers and invalidating all elements +function reset_data_structures!(t::ParallelTree{NDIMS}) where NDIMS + t.parent_ids = Vector{Int}(undef, t.capacity + 1) + t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) + t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1) + t.levels = Vector{Int}(undef, t.capacity + 1) + t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) + t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) + t.mpi_ranks = Vector{Int}(undef, t.capacity + 1) + + invalidate!(t, 1, capacity(t) + 1) +end diff --git a/src/mesh/serial_tree.jl b/src/mesh/serial_tree.jl new file mode 100644 index 00000000000..7f3296fe32a --- /dev/null +++ b/src/mesh/serial_tree.jl @@ -0,0 +1,265 @@ + +# Composite type that represents a NDIMS-dimensional tree (serial version). +# +# Implements everything required for AbstractContainer. +# +# Note: The way the data structures are set up and the way most algorithms +# work, it is *always* assumed that +# a) we have a balanced tree (= at most one level difference between +# neighboring cells, or 2:1 rule) +# b) we may not have all children (= some children may not exist) +# c) the tree is stored depth-first +# +# However, the way the refinement/coarsening algorithms are currently +# implemented, we only have fully refined cells. That is, a cell either has 2^NDIMS children or +# no children at all (= leaf cell). This restriction is also assumed at +# multiple positions in the refinement/coarsening algorithms. +# +# An exception to the 2:1 rule exists for the low-level `refine_unbalanced!` +# function, which is required for implementing level-wise refinement in a sane +# way. Also, depth-first ordering *might* not by guaranteed during +# refinement/coarsening operations. +mutable struct SerialTree{NDIMS} <: AbstractTree{NDIMS} + parent_ids::Vector{Int} + child_ids::Matrix{Int} + neighbor_ids::Matrix{Int} + levels::Vector{Int} + coordinates::Matrix{Float64} + original_cell_ids::Vector{Int} + + capacity::Int + length::Int + dummy::Int + + center_level_0::SVector{NDIMS, Float64} + length_level_0::Float64 + periodicity::NTuple{NDIMS, Bool} + + function SerialTree{NDIMS}(capacity::Integer) where NDIMS + # Verify that NDIMS is an integer + @assert NDIMS isa Integer + + # Create instance + t = new() + + # Initialize fields with defaults + # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations + t.parent_ids = fill(typemin(Int), capacity + 1) + t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) + t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1) + t.levels = fill(typemin(Int), capacity + 1) + t.coordinates = fill(NaN, NDIMS, capacity + 1) + t.original_cell_ids = fill(typemin(Int), capacity + 1) + + t.capacity = capacity + t.length = 0 + t.dummy = capacity + 1 + + t.center_level_0 = @SVector fill(NaN, NDIMS) + t.length_level_0 = NaN + + return t + end +end + + +# Constructor for passing the dimension as an argument +SerialTree(::Val{NDIMS}, args...) where NDIMS = SerialTree{NDIMS}(args...) + +# Create and initialize tree +function SerialTree{NDIMS}(capacity::Int, center::AbstractArray{Float64}, + length::Real, periodicity=true) where NDIMS + # Create instance + t = SerialTree{NDIMS}(capacity) + + # Initialize root cell + init!(t, center, length, periodicity) + + return t +end + +# Constructor accepting a single number as center (as opposed to an array) for 1D +SerialTree{1}(cap::Int, center::Real, len::Real, periodicity=true) = SerialTree{1}(cap, [convert(Float64, center)], len, periodicity) + + +# Clear tree with deleting data structures, store center and length, and create root cell +function init!(t::SerialTree, center::AbstractArray{Float64}, length::Real, periodicity=true) + clear!(t) + + # Set domain information + t.center_level_0 = center + t.length_level_0 = length + + # Create root cell + t.length += 1 + t.parent_ids[1] = 0 + t.child_ids[:, 1] .= 0 + t.levels[1] = 0 + t.coordinates[:, 1] .= t.center_level_0 + t.original_cell_ids[1] = 0 + + # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor + if all(periodicity) + # Also catches case where periodicity = true + t.neighbor_ids[:, 1] .= 1 + t.periodicity = ntuple(x->true, ndims(t)) + elseif !any(periodicity) + # Also catches case where periodicity = false + t.neighbor_ids[:, 1] .= 0 + t.periodicity = ntuple(x->false, ndims(t)) + else + # Default case if periodicity is an iterable + for dimension in 1:ndims(t) + if periodicity[dimension] + t.neighbor_ids[2 * dimension - 1, 1] = 1 + t.neighbor_ids[2 * dimension - 0, 1] = 1 + else + t.neighbor_ids[2 * dimension - 1, 1] = 0 + t.neighbor_ids[2 * dimension - 0, 1] = 0 + end + end + + t.periodicity = Tuple(periodicity) + end +end + + +# Convenience output for debugging +function Base.show(io::IO, ::MIME"text/plain", t::SerialTree{NDIMS}) where NDIMS + l = t.length + println(io, '*'^20) + println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") + println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") + println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") + println(io, "t.levels[1:l] = $(t.levels[1:l])") + println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") + println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") + println(io, "t.capacity = $(t.capacity)") + println(io, "t.length = $(t.length)") + println(io, "t.dummy = $(t.dummy)") + println(io, "t.center_level_0 = $(t.center_level_0)") + println(io, "t.length_level_0 = $(t.length_level_0)") + println(io, '*'^20) +end + + +# Refine given cells without rebalancing tree. +# +# Note: After a call to this method the tree may be unbalanced! +function refine_unbalanced!(t::SerialTree, cell_ids) + # Store actual ids refined cells (shifted due to previous insertions) + refined = zeros(Int, length(cell_ids)) + + # Loop over all cells that are to be refined + for (count, original_cell_id) in enumerate(sort(unique(cell_ids))) + # Determine actual cell id, taking into account previously inserted cells + n_children = n_children_per_cell(t) + cell_id = original_cell_id + (count - 1) * n_children + refined[count] = cell_id + + @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined" + + # Insert new cells directly behind parent (depth-first) + insert!(t, cell_id + 1, n_children) + + # Flip sign of refined cell such that we can easily find it later + t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id] + + # Initialize child cells + for child in 1:n_children + # Set child information based on parent + child_id = cell_id + child + t.parent_ids[child_id] = cell_id + t.child_ids[child, cell_id] = child_id + t.neighbor_ids[:, child_id] .= 0 + t.child_ids[:, child_id] .= 0 + t.levels[child_id] = t.levels[cell_id] + 1 + t.coordinates[:, child_id] .= child_coordinates( + t, t.coordinates[:, cell_id], length_at_cell(t, cell_id), child) + t.original_cell_ids[child_id] = 0 + + # For determining neighbors, use neighbor connections of parent cell + for direction in 1:n_directions(t) + # If neighbor is a sibling, establish one-sided connectivity + # Note: two-sided is not necessary, as each sibling will do this + if has_sibling(child, direction) + adjacent = adjacent_child(child, direction) + neighbor_id = cell_id + adjacent + + t.neighbor_ids[direction, child_id] = neighbor_id + continue + end + + # Skip if original cell does have no neighbor in direction + if !has_neighbor(t, cell_id, direction) + continue + end + + # Otherwise, check if neighbor has children - if not, skip again + neighbor_id = t.neighbor_ids[direction, cell_id] + if !has_children(t, neighbor_id) + continue + end + + # Check if neighbor has corresponding child and if yes, establish connectivity + adjacent = adjacent_child(child, direction) + if has_child(t, neighbor_id, adjacent) + neighbor_child_id = t.child_ids[adjacent, neighbor_id] + opposite = opposite_direction(direction) + + t.neighbor_ids[direction, child_id] = neighbor_child_id + t.neighbor_ids[opposite, neighbor_child_id] = child_id + end + end + end + end + + return refined +end + + +# Reset range of cells to values that are prone to cause errors as soon as they are used. +# +# Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible. +function invalidate!(t::SerialTree, first::Int, last::Int) + @assert first > 0 + @assert last <= t.capacity + 1 + + # Integer values are set to smallest negative value, floating point values to NaN + t.parent_ids[first:last] .= typemin(Int) + t.child_ids[:, first:last] .= typemin(Int) + t.neighbor_ids[:, first:last] .= typemin(Int) + t.levels[first:last] .= typemin(Int) + t.coordinates[:, first:last] .= NaN + t.original_cell_ids[first:last] .= typemin(Int) + + return nothing +end + + +# Raw copy operation for ranges of cells. +# +# This method is used by the higher-level copy operations for AbstractContainer +function raw_copy!(target::SerialTree, source::SerialTree, first::Int, last::Int, destination::Int) + copy_data!(target.parent_ids, source.parent_ids, first, last, destination) + copy_data!(target.child_ids, source.child_ids, first, last, destination, + n_children_per_cell(target)) + copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, + destination, n_directions(target)) + copy_data!(target.levels, source.levels, first, last, destination) + copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) + copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) +end + + +# Reset data structures by recreating all internal storage containers and invalidating all elements +function reset_data_structures!(t::SerialTree{NDIMS}) where NDIMS + t.parent_ids = Vector{Int}(undef, t.capacity + 1) + t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) + t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1) + t.levels = Vector{Int}(undef, t.capacity + 1) + t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) + t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) + + invalidate!(t, 1, capacity(t) + 1) +end diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl new file mode 100644 index 00000000000..6c44c28ff45 --- /dev/null +++ b/src/parallel/parallel.jl @@ -0,0 +1,63 @@ +""" + init_mpi + +Initialize MPI by calling `MPI.Initialized()`. The function will check if MPI is already initialized +and if yes, do nothing, thus it is safe to call it multiple times. +""" +function init_mpi() + if MPI_INITIALIZED[] + return nothing + end + + if MPI.Initialized() + @assert MPI.Query_thread() >= MPI.THREAD_FUNNELED "MPI already initialized with insufficient threading support" + else + # MPI.THREAD_FUNNELED: Only main thread makes MPI calls + provided = MPI.Init_thread(MPI.THREAD_FUNNELED) + @assert provided >= MPI.THREAD_FUNNELED "MPI library with insufficient threading support" + end + + # Initialize global MPI state + MPI_RANK[] = MPI.Comm_rank(MPI.COMM_WORLD) + MPI_SIZE[] = MPI.Comm_size(MPI.COMM_WORLD) + MPI_IS_PARALLEL[] = MPI_SIZE[] > 1 + MPI_IS_SERIAL[] = !MPI_IS_PARALLEL[] + MPI_IS_ROOT[] = MPI_IS_SERIAL[] || MPI_RANK[] == 0 + + # Initialize methods for dispatching on parallel execution + if MPI_IS_PARALLEL[] + eval(:(mpi_parallel() = Val(true))) + else + eval(:(mpi_parallel() = Val(false))) + end + + MPI_INITIALIZED[] = true + + return nothing +end + + +const MPI_INITIALIZED = Ref(false) +const MPI_RANK = Ref(-1) +const MPI_SIZE = Ref(-1) +const MPI_IS_PARALLEL = Ref(false) +const MPI_IS_SERIAL = Ref(true) +const MPI_IS_ROOT = Ref(true) + + +@inline mpi_comm() = MPI.COMM_WORLD + +@inline mpi_rank() = MPI_RANK[] + +@inline mpi_nranks() = MPI_SIZE[] + +@inline mpi_isparallel() = MPI_IS_PARALLEL[] + +@inline mpi_isserial() = MPI_IS_SERIAL[] + +@inline mpi_isroot() = MPI_IS_ROOT[] + +@inline mpi_root() = 0 + +@inline mpi_println(args...) = mpi_isroot() && println(args...) +@inline mpi_print(args...) = mpi_isroot() && print(args...) diff --git a/src/run.jl b/src/run.jl index d787ca16bfa..6b4fed6b5b6 100644 --- a/src/run.jl +++ b/src/run.jl @@ -83,28 +83,32 @@ function init_simulation() # Initialize mesh if restart - print("Loading mesh... ") + mpi_print("Loading mesh... ") @timeit timer() "mesh loading" mesh = load_mesh(restart_filename) - println("done") + mpi_isparallel() && MPI.Barrier(mpi_comm()) + mpi_println("done") else - print("Creating mesh... ") + mpi_print("Creating mesh... ") @timeit timer() "mesh creation" mesh = generate_mesh() mesh.current_filename = save_mesh_file(mesh) mesh.unsaved_changes = false - println("done") + mpi_isparallel() && MPI.Barrier(mpi_comm()) + mpi_println("done") end # Initialize system of equations - print("Initializing system of equations... ") + mpi_print("Initializing system of equations... ") equations_name = parameter("equations") equations = make_equations(equations_name, ndims_) - println("done") + mpi_isparallel() && MPI.Barrier(mpi_comm()) + mpi_println("done") # Initialize solver - print("Initializing solver... ") + mpi_print("Initializing solver... ") solver_name = parameter("solver", valid=["dg"]) solver = make_solver(solver_name, equations, mesh) - println("done") + mpi_isparallel() && MPI.Barrier(mpi_comm()) + mpi_println("done") # Sanity checks # If DG volume integral type is weak form, volume flux type must be flux_central, @@ -122,16 +126,18 @@ function init_simulation() adapt_initial_conditions = parameter("adapt_initial_conditions", true) adapt_initial_conditions_only_refine = parameter("adapt_initial_conditions_only_refine", true) if restart - print("Loading restart file...") + mpi_print("Loading restart file...") time, step = load_restart_file!(solver, restart_filename) - println("done") + mpi_isparallel() && MPI.Barrier(mpi_comm()) + mpi_println("done") else - print("Applying initial conditions... ") + mpi_print("Applying initial conditions... ") t_start = parameter("t_start") time = t_start step = 0 set_initial_conditions!(solver, time) - println("done") + mpi_isparallel() && MPI.Barrier(mpi_comm()) + mpi_println("done") # If AMR is enabled, adapt mesh and re-apply ICs if amr_interval > 0 && adapt_initial_conditions @@ -199,9 +205,10 @@ function init_simulation() | time integration: $(get_name(time_integration_function)) | restart interval: $restart_interval | solution interval: $solution_interval - | #parallel threads: $(Threads.nthreads()) + | #MPI ranks: $(mpi_nranks()) + | #threads/rank: $(Threads.nthreads()) | - | Solver + | Solver (local) | | solver: $solver_name | | polydeg: $polydeg | | CFL: $cfl @@ -214,7 +221,7 @@ function init_simulation() | | #l2mortars: $(solver.n_l2mortars) | | #DOFs: $(ndofs(solver)) | - | Mesh + | Mesh (global) | | #cells: $(length(mesh.tree)) | | #leaf cells: $n_leaf_cells | | minimum level: $min_level @@ -224,8 +231,8 @@ function init_simulation() | | minimum dx: $min_dx | | maximum dx: $max_dx """ - println() - println(s) + mpi_println() + mpi_println(s) # Set up main loop save_final_solution = parameter("save_final_solution", true) @@ -310,12 +317,19 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function # Check steady-state integration residual if solver.equations isa AbstractHyperbolicDiffusionEquations - if maximum(abs, view(solver.elements.u_t, 1, .., :)) <= solver.equations.resid_tol - println() - println("-"^80) - println(" Steady state tolerance of ",solver.equations.resid_tol," reached at time ",time) - println("-"^80) - println() + resid = maximum(abs, view(solver.elements.u_t, 1, .., :)) + + if mpi_isparallel() + resid = MPI.Allreduce!(Ref(resid), max, mpi_comm())[] + end + + if resid <= solver.equations.resid_tol + mpi_println() + mpi_println("-"^80) + mpi_println(" Steady state tolerance of ", solver.equations.resid_tol, + " reached at time ", time) + mpi_println("-"^80) + mpi_println() finalstep = true end end @@ -323,9 +337,15 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function # Analyze solution errors if analysis_interval > 0 && (step % analysis_interval == 0 || finalstep) # Calculate absolute and relative runtime + if mpi_isparallel() + total_dofs = MPI.Reduce!(Ref(ndofs(solver)), +, mpi_root(), mpi_comm()) + total_dofs = mpi_isroot() ? total_dofs[] : -1 + else + total_dofs = ndofs(solver) + end runtime_absolute = (time_ns() - loop_start_time) / 10^9 runtime_relative = ((time_ns() - analysis_start_time - output_time) / 10^9 / - (n_analysis_timesteps * ndofs(solver))) + (n_analysis_timesteps * total_dofs)) # Analyze solution l2_error, linf_error = @timeit timer() "analyze solution" analyze_solution( @@ -336,12 +356,12 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function output_time = 0.0 n_analysis_timesteps = 0 if finalstep - println("-"^80) - println("Trixi simulation run finished. Final time: $time Time steps: $step") - println("-"^80) - println() + mpi_println("-"^80) + mpi_println("Trixi simulation run finished. Final time: $time Time steps: $step") + mpi_println("-"^80) + mpi_println() end - elseif alive_interval > 0 && step % alive_interval == 0 + elseif alive_interval > 0 && step % alive_interval == 0 && mpi_isroot() runtime_absolute = (time_ns() - loop_start_time) / 10^9 @printf("#t/s: %6d | dt: %.4e | Sim. time: %.4e | Run time: %.4e s\n", step, dt, time, runtime_absolute) @@ -407,8 +427,16 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function end # Print timer information - print_timer(timer(), title="Trixi.jl", allocations=true, linechars=:ascii, compact=false) - println() + if mpi_isroot() + print_timer(timer(), title="Trixi.jl", allocations=true, linechars=:ascii, compact=false) + println() + end + + # Distribute l2_errors from root such that all ranks have correct return value + if mpi_isparallel() + l2_error = convert(typeof(l2_error), MPI.Bcast!(collect(l2_error), mpi_root(), mpi_comm())) + linf_error = convert(typeof(linf_error), MPI.Bcast!(collect(linf_error), mpi_root(), mpi_comm())) + end # Return error norms for EOC calculation return l2_error, linf_error, varnames_cons(solver.equations) @@ -425,7 +453,9 @@ refinement level will be increased by 1. Parameters can be overriden by specifyi additional keyword arguments, which are passed to the respective call to `run`.. """ function convtest(parameters_file, iterations; parameters...) - @assert(iterations > 1, "Number of iterations must be bigger than 1 for a convergence analysis") + if mpi_isroot() + @assert(iterations > 1, "Number of iterations must be bigger than 1 for a convergence analysis") + end # Types of errors to be calcuated errors = Dict(:L2 => Float64[], :Linf => Float64[]) @@ -435,7 +465,7 @@ function convtest(parameters_file, iterations; parameters...) # Run trixi and extract errors for i = 1:iterations - println(string("Running convtest iteration ", i, "/", iterations)) + mpi_println(string("Running convtest iteration ", i, "/", iterations)) l2_error, linf_error, variablenames = run(parameters_file; refinement_level_increment = i - 1, parameters...) @@ -456,44 +486,46 @@ function convtest(parameters_file, iterations; parameters...) eocs = Dict(kind => log.(error[2:end, :] ./ error[1:end-1, :]) ./ log(1 / 2) for (kind, error) in errorsmatrix) - for (kind, error) in errorsmatrix - println(kind) - - for v in variablenames - @printf("%-20s", v) - end - println("") + if mpi_isroot() + for (kind, error) in errorsmatrix + println(kind) - for k = 1:nvariables - @printf("%-10s", "error") - @printf("%-10s", "EOC") - end - println("") + for v in variablenames + @printf("%-20s", v) + end + println("") - # Print errors for the first iteration - for k = 1:nvariables - @printf("%-10.2e", error[1, k]) - @printf("%-10s", "-") - end - println("") + for k = 1:nvariables + @printf("%-10s", "error") + @printf("%-10s", "EOC") + end + println("") - # For the following iterations print errors and EOCs - for j = 2:iterations + # Print errors for the first iteration for k = 1:nvariables - @printf("%-10.2e", error[j, k]) - @printf("%-10.2f", eocs[kind][j-1, k]) + @printf("%-10.2e", error[1, k]) + @printf("%-10s", "-") + end + println("") + + # For the following iterations print errors and EOCs + for j = 2:iterations + for k = 1:nvariables + @printf("%-10.2e", error[j, k]) + @printf("%-10.2f", eocs[kind][j-1, k]) + end + println("") end println("") - end - println("") - # Print mean EOCs - for k = 1:nvariables - @printf("%-10s", "mean") - @printf("%-10.2f", sum(eocs[kind][:, k]) ./ length(eocs[kind][:, k])) + # Print mean EOCs + for k = 1:nvariables + @printf("%-10s", "mean") + @printf("%-10.2f", sum(eocs[kind][:, k]) ./ length(eocs[kind][:, k])) + end + println("") + println("-"^80) end - println("") - println("-"^80) end end diff --git a/src/run_euler_gravity.jl b/src/run_euler_gravity.jl index aad2c3cd226..77ab67576d0 100644 --- a/src/run_euler_gravity.jl +++ b/src/run_euler_gravity.jl @@ -1,5 +1,7 @@ function init_simulation_euler_gravity() - # Print starup message + mpi_isparallel() && error("coupled simulations are not yet tested for parallel runs") # TODO parallel + + # Print startup message print_startup_message() # Get number of dimensions @@ -205,6 +207,8 @@ end function run_simulation_euler_gravity(mesh, solvers, time_parameters, time_integration_function) + mpi_isparallel() && error("coupled simulations are not yet tested for parallel runs") # TODO parallel + @unpack time, step, t_end, cfl, n_steps_max, save_final_solution, save_final_restart, analysis_interval, alive_interval, diff --git a/src/solvers/dg/1d/amr.jl b/src/solvers/dg/1d/amr.jl index e2ad5385c9e..d629b8f332f 100644 --- a/src/solvers/dg/1d/amr.jl +++ b/src/solvers/dg/1d/amr.jl @@ -1,8 +1,8 @@ # This file contains functions that are related to the AMR capabilities of the DG solver # Refine elements in the DG solver based on a list of cell_ids that should be refined -function refine!(dg::Dg1D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - cells_to_refine::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function refine!(dg::Dg1D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + cells_to_refine::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(cells_to_refine) return @@ -60,6 +60,7 @@ function refine!(dg::Dg1D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, # Update DG instance with new data dg.elements = elements dg.n_elements = n_elements + dg.n_elements_global = n_elements dg.interfaces = interfaces dg.n_interfaces = n_interfaces dg.boundaries = boundaries @@ -97,8 +98,8 @@ end # Coarsen elements in the DG solver based on a list of cell_ids that should be removed -function coarsen!(dg::Dg1D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function coarsen!(dg::Dg1D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(child_cells_to_coarsen) return @@ -166,6 +167,7 @@ function coarsen!(dg::Dg1D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, # Update DG instance with new data dg.elements = elements dg.n_elements = n_elements + dg.n_elements_global = n_elements dg.interfaces = interfaces dg.n_interfaces = n_interfaces dg.boundaries = boundaries diff --git a/src/solvers/dg/1d/dg.jl b/src/solvers/dg/1d/dg.jl index da3b13a0d31..626338a03dd 100644 --- a/src/solvers/dg/1d/dg.jl +++ b/src/solvers/dg/1d/dg.jl @@ -1,10 +1,10 @@ # Main DG data structure that contains all relevant data for the DG solver -mutable struct Dg1D{Eqn<:AbstractEquation, NVARS, POLYDEG, +mutable struct Dg1D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, SurfaceFlux, VolumeFlux, InitialConditions, SourceTerms, BoundaryConditions, VolumeIntegralType, ShockIndicatorVariable, VectorNnodes, MatrixNnodes, MatrixNnodes2, InverseVandermondeLegendre, MortarMatrix, - VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{1, POLYDEG} + VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{1, POLYDEG, MeshType} equations::Eqn surface_flux_function::SurfaceFlux @@ -62,6 +62,8 @@ mutable struct Dg1D{Eqn<:AbstractEquation, NVARS, POLYDEG, amr_alpha_min::Float64 amr_alpha_smooth::Bool + n_elements_global::Int + element_variables::Dict{Symbol, Union{Vector{Float64}, Vector{Int}}} cache::Dict{Symbol, Any} thread_cache::Any # to make fully-typed output more readable @@ -70,7 +72,7 @@ end # Convenience constructor to create DG solver instance -function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh{NDIMS}, POLYDEG) where {NDIMS, NVARS} +function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh1D, POLYDEG) where {NDIMS, NVARS} # Get cells for which an element needs to be created (i.e., all leaf cells) leaf_cell_ids = leaf_cells(mesh.tree) @@ -155,6 +157,9 @@ function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v amr_indicator = Symbol(parameter("amr_indicator", "n/a", valid=["n/a", "gauss", "blast_wave"])) + # Set global number of elements + n_elements_global = n_elements + # Initialize storage for element variables element_variables = Dict{Symbol, Union{Vector{Float64}, Vector{Int}}}() @@ -186,8 +191,29 @@ function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Store initial state integrals for conservation error calculation initial_state_integrals = Vector{Float64}() + # Convert all performance-critical fields to StaticArrays types + nodes = SVector{POLYDEG+1}(nodes) + weights = SVector{POLYDEG+1}(weights) + inverse_weights = SVector{POLYDEG+1}(inverse_weights) + lhat = SMatrix{POLYDEG+1,2}(lhat) + dhat = SMatrix{POLYDEG+1,POLYDEG+1}(dhat) + dsplit = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit) + dsplit_transposed = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed) + amr_refine_right = SMatrix{POLYDEG+1,POLYDEG+1}(amr_refine_right) + amr_refine_left = SMatrix{POLYDEG+1,POLYDEG+1}(amr_refine_left) + amr_coarsen_right = SMatrix{POLYDEG+1,POLYDEG+1}(amr_coarsen_right) + amr_coarsen_left = SMatrix{POLYDEG+1,POLYDEG+1}(amr_coarsen_left) + analysis_nodes = SVector{analysis_polydeg+1}(analysis_nodes) + analysis_weights = SVector{analysis_polydeg+1}(analysis_weights) + analysis_weights_volume = SVector{analysis_polydeg+1}(analysis_weights_volume) + # Create actual DG solver instance - dg = Dg1D( + dg = Dg1D{typeof(equation), typeof(mesh), NVARS, POLYDEG, + typeof(surface_flux_function), typeof(volume_flux_function), typeof(initial_conditions), + typeof(source_terms), typeof(boundary_conditions), + typeof(volume_integral_type), typeof(shock_indicator_variable), + typeof(nodes), typeof(dhat), typeof(lhat), typeof(inverse_vandermonde_legendre), + typeof(amr_refine_right), typeof(analysis_nodes), typeof(analysis_vandermonde)}( equation, surface_flux_function, volume_flux_function, initial_conditions, source_terms, @@ -195,18 +221,19 @@ function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v interfaces, n_interfaces, boundaries, n_boundaries, n_boundaries_per_direction, n_l2mortars, - Tuple(boundary_conditions), - SVector{POLYDEG+1}(nodes), SVector{POLYDEG+1}(weights), SVector{POLYDEG+1}(inverse_weights), - inverse_vandermonde_legendre, SMatrix{POLYDEG+1,2}(lhat), + boundary_conditions, + nodes, weights, inverse_weights, + inverse_vandermonde_legendre, lhat, volume_integral_type, - SMatrix{POLYDEG+1,POLYDEG+1}(dhat), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed), - SMatrix{POLYDEG+1,POLYDEG+1}(amr_refine_right), SMatrix{POLYDEG+1,POLYDEG+1}(amr_refine_left), - SMatrix{POLYDEG+1,POLYDEG+1}(amr_coarsen_right), SMatrix{POLYDEG+1,POLYDEG+1}(amr_coarsen_left), - SVector{analysis_polydeg+1}(analysis_nodes), SVector{analysis_polydeg+1}(analysis_weights), SVector{analysis_polydeg+1}(analysis_weights_volume), + dhat, dsplit, dsplit_transposed, + amr_refine_right, amr_refine_left, + amr_coarsen_right, amr_coarsen_left, + analysis_nodes, analysis_weights, analysis_weights_volume, analysis_vandermonde, analysis_total_volume, analysis_quantities, save_analysis, analysis_filename, shock_indicator_variable, shock_alpha_max, shock_alpha_min, shock_alpha_smooth, amr_indicator, amr_alpha_max, amr_alpha_min, amr_alpha_smooth, + n_elements_global, element_variables, cache, thread_cache, initial_state_integrals) @@ -236,7 +263,7 @@ end # Count the number of interfaces that need to be created -function count_required_interfaces(mesh::TreeMesh{1}, cell_ids) +function count_required_interfaces(mesh::TreeMesh1D, cell_ids) count = 0 # Iterate over all cells @@ -261,7 +288,7 @@ end # Count the number of boundaries that need to be created -function count_required_boundaries(mesh::TreeMesh{1}, cell_ids) +function count_required_boundaries(mesh::TreeMesh1D, cell_ids) count = 0 # Iterate over all cells @@ -290,7 +317,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_elements(cell_ids, mesh::TreeMesh{1}, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} +function init_elements(cell_ids, mesh::TreeMesh1D, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} # Initialize container n_elements = length(cell_ids) elements = ElementContainer1D{NVARS, POLYDEG}(n_elements) @@ -328,7 +355,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_interfaces(cell_ids, mesh::TreeMesh{1}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_interfaces(cell_ids, mesh::TreeMesh1D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_interfaces = count_required_interfaces(mesh, cell_ids) interfaces = InterfaceContainer1D{NVARS, POLYDEG}(n_interfaces) @@ -344,7 +371,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_boundaries(cell_ids, mesh::TreeMesh{1}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_boundaries(cell_ids, mesh::TreeMesh1D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_boundaries = count_required_boundaries(mesh, cell_ids) boundaries = BoundaryContainer1D{NVARS, POLYDEG}(n_boundaries) @@ -357,7 +384,7 @@ end # Initialize connectivity between elements and interfaces -function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{1}) +function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh1D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) @@ -412,7 +439,7 @@ end # Initialize connectivity between elements and boundaries -function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{1}) +function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh1D) # Reset boundaries count count = 0 @@ -476,7 +503,7 @@ function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{1}) return SVector(counts_per_direction) end -function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{1}) +function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh1D) # "eval is evil" # This is a temporary hack until we have switched to a library based approach # with pure Julia code instead of parameter files. @@ -505,7 +532,7 @@ function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{1}) end end - return boundary_conditions + return Tuple(boundary_conditions) end diff --git a/src/solvers/dg/2d/amr.jl b/src/solvers/dg/2d/amr.jl index 52770ebde23..e0ddd09382b 100644 --- a/src/solvers/dg/2d/amr.jl +++ b/src/solvers/dg/2d/amr.jl @@ -1,8 +1,8 @@ # This file contains functions that are related to the AMR capabilities of the DG solver # Refine elements in the DG solver based on a list of cell_ids that should be refined -function refine!(dg::Dg2D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - cells_to_refine::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function refine!(dg::Dg2D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + cells_to_refine::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(cells_to_refine) return @@ -124,8 +124,8 @@ end # Coarsen elements in the DG solver based on a list of cell_ids that should be removed -function coarsen!(dg::Dg2D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function coarsen!(dg::Dg2D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(child_cells_to_coarsen) return diff --git a/src/solvers/dg/2d/containers.jl b/src/solvers/dg/2d/containers.jl index 75a87c8440e..9444c3ddbb8 100644 --- a/src/solvers/dg/2d/containers.jl +++ b/src/solvers/dg/2d/containers.jl @@ -63,6 +63,34 @@ end ninterfaces(interfaces::InterfaceContainer2D) = length(interfaces.orientations) +# Container data structure (structure-of-arrays style) for DG MPI interfaces +struct MpiInterfaceContainer2D{NVARS, POLYDEG} <: AbstractContainer + u::Array{Float64, 4} # [leftright, variables, i, interfaces] + local_element_ids::Vector{Int} # [interfaces] + orientations::Vector{Int} # [interfaces] + remote_sides::Vector{Int} # [interfaces] +end + + +function MpiInterfaceContainer2D{NVARS, POLYDEG}(capacity::Integer) where {NVARS, POLYDEG} + # Initialize fields with defaults + n_nodes = POLYDEG + 1 + u = fill(NaN, 2, NVARS, n_nodes, capacity) + local_element_ids = fill(typemin(Int), capacity) + orientations = fill(typemin(Int), capacity) + remote_sides = fill(typemin(Int), capacity) + + mpi_interfaces = MpiInterfaceContainer2D{NVARS, POLYDEG}(u, local_element_ids, orientations, + remote_sides) + + return mpi_interfaces +end + + +# Return number of interfaces +nmpiinterfaces(mpi_interfaces::MpiInterfaceContainer2D) = length(mpi_interfaces.orientations) + + # Container data structure (structure-of-arrays style) for DG boundaries struct BoundaryContainer2D{NVARS, POLYDEG} <: AbstractContainer u::Array{Float64, 4} # [leftright, variables, i, boundaries] @@ -133,7 +161,7 @@ nmortars(l2mortars::L2MortarContainer2D) = length(l2mortars.orientations) # Allow printing container contents -function Base.show(io::IO, c::L2MortarContainer2D{NVARS, POLYDEG}) where {NVARS, POLYDEG} +function Base.show(io::IO, ::MIME"text/plain", c::L2MortarContainer2D{NVARS, POLYDEG}) where {NVARS, POLYDEG} println(io, '*'^20) for idx in CartesianIndices(c.u_upper) println(io, "c.u_upper[$idx] = $(c.u_upper[idx])") diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 97538b43aee..503423e7b38 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -1,10 +1,10 @@ # Main DG data structure that contains all relevant data for the DG solver -mutable struct Dg2D{Eqn<:AbstractEquation, NVARS, POLYDEG, +mutable struct Dg2D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, SurfaceFlux, VolumeFlux, InitialConditions, SourceTerms, BoundaryConditions, MortarType, VolumeIntegralType, ShockIndicatorVariable, VectorNnodes, MatrixNnodes, MatrixNnodes2, InverseVandermondeLegendre, MortarMatrix, - VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{2, POLYDEG} + VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{2, POLYDEG, MeshType} equations::Eqn surface_flux_function::SurfaceFlux @@ -19,6 +19,9 @@ mutable struct Dg2D{Eqn<:AbstractEquation, NVARS, POLYDEG, interfaces::InterfaceContainer2D{NVARS, POLYDEG} n_interfaces::Int + mpi_interfaces::MpiInterfaceContainer2D{NVARS, POLYDEG} + n_mpi_interfaces::Int + boundaries::BoundaryContainer2D{NVARS, POLYDEG} n_boundaries::Int n_boundaries_per_direction::SVector{4, Int} @@ -67,6 +70,16 @@ mutable struct Dg2D{Eqn<:AbstractEquation, NVARS, POLYDEG, amr_alpha_min::Float64 amr_alpha_smooth::Bool + mpi_neighbor_ranks::Vector{Int} + mpi_neighbor_interfaces::Vector{Vector{Int}} + mpi_send_buffers::Vector{Vector{Float64}} + mpi_recv_buffers::Vector{Vector{Float64}} + mpi_send_requests::Vector{MPI.Request} + mpi_recv_requests::Vector{MPI.Request} + n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} + n_elements_global::Int + first_element_global_id::Int + element_variables::Dict{Symbol, Union{Vector{Float64}, Vector{Int}}} cache::Dict{Symbol, Any} thread_cache::Any # to make fully-typed output more readable @@ -76,9 +89,13 @@ end # Convenience constructor to create DG solver instance -function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh{NDIMS}, POLYDEG) where {NDIMS, NVARS} - # Get cells for which an element needs to be created (i.e., all leaf cells) - leaf_cell_ids = leaf_cells(mesh.tree) +function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh, POLYDEG) where {NDIMS, NVARS} + # Get local cells for which an element needs to be created (i.e., all leaf cells) + if mpi_isparallel() + leaf_cell_ids = local_leaf_cells(mesh.tree) + else + leaf_cell_ids = leaf_cells(mesh.tree) + end # Initialize element container elements = init_elements(leaf_cell_ids, mesh, Val(NVARS), Val(POLYDEG)) @@ -88,6 +105,10 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v interfaces = init_interfaces(leaf_cell_ids, mesh, Val(NVARS), Val(POLYDEG), elements) n_interfaces = ninterfaces(interfaces) + # Initialize MPI interface container + mpi_interfaces = init_mpi_interfaces(leaf_cell_ids, mesh, Val(NVARS), Val(POLYDEG), elements) + n_mpi_interfaces = nmpiinterfaces(mpi_interfaces) + # Initialize boundaries boundaries, n_boundaries_per_direction = init_boundaries(leaf_cell_ids, mesh, Val(NVARS), Val(POLYDEG), elements) n_boundaries = nboundaries(boundaries) @@ -99,7 +120,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v n_ecmortars = nmortars(ecmortars) # Sanity checks - if isperiodic(mesh.tree) && n_l2mortars == 0 && n_ecmortars == 0 + if isperiodic(mesh.tree) && n_l2mortars == 0 && n_ecmortars == 0 && mpi_isserial() @assert n_interfaces == 2*n_elements ("For 2D and periodic domains and conforming elements, " * "n_surf must be the same as 2*n_elem") end @@ -193,6 +214,45 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v amr_alpha_min = parameter("amr_alpha_min", 0.001) amr_alpha_smooth = parameter("amr_alpha_smooth", false) + # Set up MPI neighbor connectivity and communication data structures + if mpi_isparallel() + (mpi_neighbor_ranks, + mpi_neighbor_interfaces) = init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh) + (mpi_send_buffers, + mpi_recv_buffers, + mpi_send_requests, + mpi_recv_requests) = init_mpi_data_structures(mpi_neighbor_interfaces, + Val(NDIMS), Val(NVARS), Val(POLYDEG)) + + # Determine local and total number of elements + n_elements_by_rank = Vector{Int}(undef, mpi_nranks()) + n_elements_by_rank[mpi_rank() + 1] = n_elements + MPI.Allgather!(n_elements_by_rank, 1, mpi_comm()) + n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1)) + n_elements_global = MPI.Allreduce(n_elements, +, mpi_comm()) + @assert n_elements_global == sum(n_elements_by_rank) "error in total number of elements" + + # Determine the global element id of the first element + first_element_global_id = MPI.Exscan(n_elements, +, mpi_comm()) + if mpi_isroot() + # With Exscan, the result on the first rank is undefined + first_element_global_id = 1 + else + # On all other ranks we need to add one, since Julia has one-based indices + first_element_global_id += 1 + end + else + mpi_neighbor_ranks = Int[] + mpi_neighbor_interfaces = Vector{Int}[] + mpi_send_buffers = Vector{Float64}[] + mpi_recv_buffers = Vector{Float64}[] + mpi_send_requests = MPI.Request[] + mpi_recv_requests = MPI.Request[] + n_elements_by_rank = OffsetArray([n_elements], 0:0) + n_elements_global = n_elements + first_element_global_id = 1 + end + # Initialize element variables such that they are available in the first solution file if volume_integral_type === Val(:shock_capturing) element_variables[:blending_factor] = zeros(n_elements) @@ -205,30 +265,57 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Store initial state integrals for conservation error calculation initial_state_integrals = Vector{Float64}() + # Convert all performance-critical fields to StaticArrays types + nodes = SVector{POLYDEG+1}(nodes) + weights = SVector{POLYDEG+1}(weights) + inverse_weights = SVector{POLYDEG+1}(inverse_weights) + lhat = SMatrix{POLYDEG+1,2}(lhat) + dhat = SMatrix{POLYDEG+1,POLYDEG+1}(dhat) + dsplit = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit) + dsplit_transposed = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed) + mortar_forward_upper = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper) + mortar_forward_lower = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower) + l2mortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper) + l2mortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower) + ecmortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_upper) + ecmortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_lower) + analysis_nodes = SVector{analysis_polydeg+1}(analysis_nodes) + analysis_weights = SVector{analysis_polydeg+1}(analysis_weights) + analysis_weights_volume = SVector{analysis_polydeg+1}(analysis_weights_volume) + # Create actual DG solver instance - dg = Dg2D( + dg = Dg2D{typeof(equation), typeof(mesh), NVARS, POLYDEG, + typeof(surface_flux_function), typeof(volume_flux_function), typeof(initial_conditions), + typeof(source_terms), typeof(boundary_conditions), + typeof(mortar_type), typeof(volume_integral_type), typeof(shock_indicator_variable), + typeof(nodes), typeof(dhat), typeof(lhat), typeof(inverse_vandermonde_legendre), + typeof(mortar_forward_upper), typeof(analysis_nodes), typeof(analysis_vandermonde)}( equation, surface_flux_function, volume_flux_function, initial_conditions, source_terms, elements, n_elements, interfaces, n_interfaces, + mpi_interfaces, n_mpi_interfaces, boundaries, n_boundaries, n_boundaries_per_direction, mortar_type, l2mortars, n_l2mortars, ecmortars, n_ecmortars, - Tuple(boundary_conditions), - SVector{POLYDEG+1}(nodes), SVector{POLYDEG+1}(weights), SVector{POLYDEG+1}(inverse_weights), - inverse_vandermonde_legendre, SMatrix{POLYDEG+1,2}(lhat), + boundary_conditions, + nodes, weights, inverse_weights, + inverse_vandermonde_legendre, lhat, volume_integral_type, - SMatrix{POLYDEG+1,POLYDEG+1}(dhat), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed), - SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper), SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower), - SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper), SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower), - SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_upper), SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_lower), - SVector{analysis_polydeg+1}(analysis_nodes), SVector{analysis_polydeg+1}(analysis_weights), SVector{analysis_polydeg+1}(analysis_weights_volume), + dhat, dsplit, dsplit_transposed, + mortar_forward_upper, mortar_forward_lower, + l2mortar_reverse_upper, l2mortar_reverse_lower, + ecmortar_reverse_upper, ecmortar_reverse_lower, + analysis_nodes, analysis_weights, analysis_weights_volume, analysis_vandermonde, analysis_total_volume, analysis_quantities, save_analysis, analysis_filename, shock_indicator_variable, shock_alpha_max, shock_alpha_min, shock_alpha_smooth, amr_indicator, amr_alpha_max, amr_alpha_min, amr_alpha_smooth, + mpi_neighbor_ranks, mpi_neighbor_interfaces, + mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests, + n_elements_by_rank, n_elements_global, first_element_global_id, element_variables, cache, thread_cache, initial_state_integrals) @@ -268,7 +355,7 @@ end # Count the number of interfaces that need to be created -function count_required_interfaces(mesh::TreeMesh{2}, cell_ids) +function count_required_interfaces(mesh::TreeMesh2D, cell_ids) count = 0 # Iterate over all cells @@ -285,8 +372,13 @@ function count_required_interfaces(mesh::TreeMesh{2}, cell_ids) end # Skip if neighbor has children - neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_id) + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on different rank -> create MPI interface instead + if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id) continue end @@ -299,7 +391,7 @@ end # Count the number of boundaries that need to be created -function count_required_boundaries(mesh::TreeMesh{2}, cell_ids) +function count_required_boundaries(mesh::TreeMesh2D, cell_ids) count = 0 # Iterate over all cells @@ -325,7 +417,7 @@ end # Count the number of mortars that need to be created -function count_required_mortars(mesh::TreeMesh{2}, cell_ids) +function count_required_mortars(mesh::TreeMesh2D, cell_ids) count = 0 # Iterate over all cells and count mortars from perspective of coarse cells @@ -354,7 +446,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_elements(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} +function init_elements(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} # Initialize container n_elements = length(cell_ids) elements = ElementContainer2D{NVARS, POLYDEG}(n_elements) @@ -396,7 +488,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_interfaces(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_interfaces(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_interfaces = count_required_interfaces(mesh, cell_ids) interfaces = InterfaceContainer2D{NVARS, POLYDEG}(n_interfaces) @@ -412,7 +504,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_boundaries(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_boundaries(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_boundaries = count_required_boundaries(mesh, cell_ids) boundaries = BoundaryContainer2D{NVARS, POLYDEG}(n_boundaries) @@ -428,7 +520,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_mortars(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements, mortar_type) where {NVARS, POLYDEG} +function init_mortars(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}, elements, mortar_type) where {NVARS, POLYDEG} # Initialize containers n_mortars = count_required_mortars(mesh, cell_ids) if mortar_type === Val(:l2) @@ -457,7 +549,7 @@ end # Initialize connectivity between elements and interfaces -function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{2}) +function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh2D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) @@ -491,6 +583,11 @@ function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{2}) continue end + # Skip if neighbor is on different rank -> create MPI interface instead + if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface) count += 1 interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] @@ -507,7 +604,7 @@ end # Initialize connectivity between elements and boundaries -function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{2}) +function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh2D) # Reset boundaries count count = 0 @@ -580,7 +677,7 @@ end # Initialize connectivity between elements and mortars -function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh{2}) +function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh2D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) @@ -651,7 +748,7 @@ function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh{2}) end -function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{2}) +function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh2D) # "eval is evil" # This is a temporary hack until we have switched to a library based approach # with pure Julia code instead of parameter files. @@ -684,7 +781,7 @@ function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{2}) end end - return boundary_conditions + return Tuple(boundary_conditions) end @@ -710,7 +807,9 @@ dsdu_ut = integrate(dg, dg.elements.u, dg.elements.u_t) do i, j, element_id, dg, end ``` """ -function integrate(func, dg::Dg2D, args...; normalize=true) +integrate(func, dg::Dg2D, args...; normalize=true) = integrate(func, dg, uses_mpi(dg), args...; + normalize=normalize) +function integrate(func, dg::Dg2D, uses_mpi::Val{false}, args...; normalize=true) # Initialize integral with zeros of the right shape integral = zero(func(1, 1, 1, dg, args...)) @@ -751,18 +850,21 @@ Calculate the integral over all conservative variables: state_integrals = integrate(dg.elements.u, dg) ``` """ -function integrate(func, u, dg::Dg2D; normalize=true) +integrate(func, u, dg::Dg2D; normalize=true) = integrate(func, u, dg, uses_mpi(dg); + normalize=normalize) +function integrate(func, u, dg::Dg2D, uses_mpi; normalize=true) func_wrapped = function(i, j, element_id, dg, u) u_local = get_node_vars(u, dg, i, j, element_id) return func(u_local) end - return integrate(func_wrapped, dg, u; normalize=normalize) + return integrate(func_wrapped, dg, uses_mpi, u; normalize=normalize) end integrate(u, dg::Dg2D; normalize=true) = integrate(identity, u, dg; normalize=normalize) # Calculate L2/Linf error norms based on "exact solution" -function calc_error_norms(func, dg::Dg2D, t) +calc_error_norms(func, dg::Dg2D, t) = calc_error_norms(func, dg, t, uses_mpi(dg)) +function calc_error_norms(func, dg::Dg2D, t, uses_mpi::Val{false}) # Gather necessary information equation = equations(dg) n_nodes_analysis = size(dg.analysis_vandermonde, 1) @@ -806,12 +908,13 @@ end # Integrate ∂S/∂u ⋅ ∂u/∂t over the entire domain -function calc_entropy_timederivative(dg::Dg2D, t) +calc_entropy_timederivative(dg::Dg2D, t) = calc_entropy_timederivative(dg, t, uses_mpi(dg)) +function calc_entropy_timederivative(dg::Dg2D, t, uses_mpi) # Compute ut = rhs(u) with current solution u @notimeit timer() rhs!(dg, t) # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ - dsdu_ut = integrate(dg, dg.elements.u, dg.elements.u_t) do i, j, element_id, dg, u, u_t + dsdu_ut = integrate(dg, uses_mpi, dg.elements.u, dg.elements.u_t) do i, j, element_id, dg, u, u_t u_node = get_node_vars(u, dg, i, j, element_id) u_t_node = get_node_vars(u_t, dg, i, j, element_id) dot(cons2entropy(u_node, equations(dg)), u_t_node) @@ -824,7 +927,8 @@ end # Calculate L2/Linf norms of a solenoidal condition ∇ ⋅ B = 0 # OBS! This works only when the problem setup is designed such that ∂B₁/∂x + ∂B₂/∂y = 0. Cannot # compute the full 3D divergence from the given data -function calc_mhd_solenoid_condition(dg::Dg2D, t::Float64) +calc_mhd_solenoid_condition(dg::Dg2D, t) = calc_mhd_solenoid_condition(dg, t, mpi_parallel()) +function calc_mhd_solenoid_condition(dg::Dg2D, t, mpi_parallel::Val{false}) @assert equations(dg) isa IdealGlmMhdEquations2D "Only relevant for MHD" # Local copy of standard derivative matrix @@ -868,8 +972,13 @@ performance index is specified in `runtime_relative`. **Note:** Keep order of analysis quantities in sync with [`save_analysis_header`](@ref) when adding or changing quantities. """ -function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step::Integer, - runtime_absolute::Real, runtime_relative::Real; solver_gravity=nothing) +function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, + runtime_absolute, runtime_relative; solver_gravity=nothing) + analyze_solution(dg, mesh, time, dt, step, runtime_absolute, runtime_relative, uses_mpi(dg), + solver_gravity=solver_gravity) +end +function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_absolute, + runtime_relative, uses_mpi::Val{false}; solver_gravity=nothing) equation = equations(dg) # General information @@ -913,7 +1022,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: # Calculate and print derived quantities (error norms, entropy etc.) # Variable names required for L2 error, Linf error, and conservation error if any(q in dg.analysis_quantities for q in - (:l2_error, :linf_error, :conservation_error, :residual)) + (:l2_error, :linf_error, :conservation_error, :residual)) print(" Variable: ") for v in 1:nvariables(equation) @printf(" %-14s", varnames_cons(equation)[v]) @@ -1009,10 +1118,10 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: # Entropy time derivative if :dsdu_ut in dg.analysis_quantities - duds_ut = calc_entropy_timederivative(dg, time) + dsdu_ut = calc_entropy_timederivative(dg, time) print(" ∑∂S/∂U ⋅ Uₜ: ") - @printf(" % 10.8e", duds_ut) - dg.save_analysis && @printf(f, " % 10.8e", duds_ut) + @printf(" % 10.8e", dsdu_ut) + dg.save_analysis && @printf(f, " % 10.8e", dsdu_ut) println() end @@ -1237,7 +1346,8 @@ end # Calculate time derivative -function rhs!(dg::Dg2D, t_stage) +@inline rhs!(dg::Dg2D, t_stage) = rhs!(dg, t_stage, uses_mpi(dg)) +function rhs!(dg::Dg2D, t_stage, uses_mpi::Val{false}) # Reset u_t @timeit timer() "reset ∂u/∂t" dg.elements.u_t .= 0 @@ -2269,7 +2379,8 @@ end # Calculate stable time step size -function calc_dt(dg::Dg2D, cfl) +@inline calc_dt(dg, cfl) = calc_dt(dg, cfl, uses_mpi(dg)) +function calc_dt(dg::Dg2D, cfl, uses_mpi::Val{false}) min_dt = Inf for element_id in 1:dg.n_elements dt = calc_max_dt(dg.elements.u, element_id, @@ -2284,6 +2395,12 @@ end function calc_blending_factors!(alpha, alpha_pre_smooth, u, alpha_max, alpha_min, do_smoothing, indicator_variable, thread_cache, dg::Dg2D) + calc_blending_factors!(alpha, alpha_pre_smooth, u, alpha_max, alpha_min, do_smoothing, + indicator_variable, thread_cache, dg, uses_mpi(dg)) +end +function calc_blending_factors!(alpha, alpha_pre_smooth, u, + alpha_max, alpha_min, do_smoothing, + indicator_variable, thread_cache, dg::Dg2D, uses_mpi::Val{false}) # temporary buffers @unpack indicator_threaded, modal_threaded, modal_tmp1_threaded = thread_cache # magic parameters @@ -2336,48 +2453,54 @@ function calc_blending_factors!(alpha, alpha_pre_smooth, u, end if (do_smoothing) - # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_pre_smooth .= alpha - - # Loop over interfaces - for interface_id in 1:dg.n_interfaces - # Get neighboring element ids - left = dg.interfaces.neighbor_ids[1, interface_id] - right = dg.interfaces.neighbor_ids[2, interface_id] - - # Apply smoothing - alpha[left] = max(alpha_pre_smooth[left], 0.5 * alpha_pre_smooth[right], alpha[left]) - alpha[right] = max(alpha_pre_smooth[right], 0.5 * alpha_pre_smooth[left], alpha[right]) - end - - # Loop over L2 mortars - for l2mortar_id in 1:dg.n_l2mortars - # Get neighboring element ids - lower = dg.l2mortars.neighbor_ids[1, l2mortar_id] - upper = dg.l2mortars.neighbor_ids[2, l2mortar_id] - large = dg.l2mortars.neighbor_ids[3, l2mortar_id] - - # Apply smoothing - alpha[lower] = max(alpha_pre_smooth[lower], 0.5 * alpha_pre_smooth[large], alpha[lower]) - alpha[upper] = max(alpha_pre_smooth[upper], 0.5 * alpha_pre_smooth[large], alpha[upper]) - alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[lower], alpha[large]) - alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[upper], alpha[large]) - end - - # Loop over EC mortars - for ecmortar_id in 1:dg.n_ecmortars - # Get neighboring element ids - lower = dg.ecmortars.neighbor_ids[1, ecmortar_id] - upper = dg.ecmortars.neighbor_ids[2, ecmortar_id] - large = dg.ecmortars.neighbor_ids[3, ecmortar_id] - - # Apply smoothing - alpha[lower] = max(alpha_pre_smooth[lower], 0.5 * alpha_pre_smooth[large], alpha[lower]) - alpha[upper] = max(alpha_pre_smooth[upper], 0.5 * alpha_pre_smooth[large], alpha[upper]) - alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[lower], alpha[large]) - alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[upper], alpha[large]) - end + smooth_alpha!(alpha, alpha_pre_smooth, dg, uses_mpi) + end +end + + +smooth_alpha!(alpha, alpha_pre_smooth, dg::Dg2D) = smooth_alpha!(alpha, alpha_pre_smooth, dg, uses_mpi(dg)) +function smooth_alpha!(alpha, alpha_pre_smooth, dg::Dg2D, uses_mpi::Val{false}) + # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_pre_smooth .= alpha + + # Loop over interfaces + for interface_id in 1:dg.n_interfaces + # Get neighboring element ids + left = dg.interfaces.neighbor_ids[1, interface_id] + right = dg.interfaces.neighbor_ids[2, interface_id] + + # Apply smoothing + alpha[left] = max(alpha_pre_smooth[left], 0.5 * alpha_pre_smooth[right], alpha[left]) + alpha[right] = max(alpha_pre_smooth[right], 0.5 * alpha_pre_smooth[left], alpha[right]) + end + + # Loop over L2 mortars + for l2mortar_id in 1:dg.n_l2mortars + # Get neighboring element ids + lower = dg.l2mortars.neighbor_ids[1, l2mortar_id] + upper = dg.l2mortars.neighbor_ids[2, l2mortar_id] + large = dg.l2mortars.neighbor_ids[3, l2mortar_id] + + # Apply smoothing + alpha[lower] = max(alpha_pre_smooth[lower], 0.5 * alpha_pre_smooth[large], alpha[lower]) + alpha[upper] = max(alpha_pre_smooth[upper], 0.5 * alpha_pre_smooth[large], alpha[upper]) + alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[lower], alpha[large]) + alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[upper], alpha[large]) + end + + # Loop over EC mortars + for ecmortar_id in 1:dg.n_ecmortars + # Get neighboring element ids + lower = dg.ecmortars.neighbor_ids[1, ecmortar_id] + upper = dg.ecmortars.neighbor_ids[2, ecmortar_id] + large = dg.ecmortars.neighbor_ids[3, ecmortar_id] + + # Apply smoothing + alpha[lower] = max(alpha_pre_smooth[lower], 0.5 * alpha_pre_smooth[large], alpha[lower]) + alpha[upper] = max(alpha_pre_smooth[upper], 0.5 * alpha_pre_smooth[large], alpha[upper]) + alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[lower], alpha[large]) + alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[upper], alpha[large]) end end diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl new file mode 100644 index 00000000000..4c7eb07f046 --- /dev/null +++ b/src/solvers/dg/2d/parallel.jl @@ -0,0 +1,721 @@ +# Calculate time derivative +function rhs!(dg::Dg2D, t_stage, uses_mpi::Val{true}) + # Start to receive MPI data + @timeit timer() "start MPI receive" start_mpi_receive!(dg) + + # Prolong solution to MPI interfaces + @timeit timer() "prolong2mpiinterfaces" prolong2mpiinterfaces!(dg) + + # Start to send MPI data + @timeit timer() "start MPI send" start_mpi_send!(dg) + + # Reset u_t + @timeit timer() "reset ∂u/∂t" dg.elements.u_t .= 0 + + # Calculate volume integral + @timeit timer() "volume integral" calc_volume_integral!(dg) + + # Prolong solution to interfaces + @timeit timer() "prolong2interfaces" prolong2interfaces!(dg) + + # Calculate interface fluxes + @timeit timer() "interface flux" calc_interface_flux!(dg) + + # Prolong solution to boundaries + @timeit timer() "prolong2boundaries" prolong2boundaries!(dg) + + # Calculate boundary fluxes + @timeit timer() "boundary flux" calc_boundary_flux!(dg, t_stage) + + # Prolong solution to mortars + @timeit timer() "prolong2mortars" prolong2mortars!(dg) + + # Calculate mortar fluxes + @timeit timer() "mortar flux" calc_mortar_flux!(dg) + + # Finish to receive MPI data + @timeit timer() "finish MPI receive" finish_mpi_receive!(dg) + + # Calculate MPI interface fluxes + @timeit timer() "MPI interface flux" calc_mpi_interface_flux!(dg) + + # Calculate surface integrals + @timeit timer() "surface integral" calc_surface_integral!(dg) + + # Apply Jacobian from mapping to reference element + @timeit timer() "Jacobian" apply_jacobian!(dg) + + # Calculate source terms + @timeit timer() "source terms" calc_sources!(dg, dg.source_terms, t_stage) + + # Finish to send MPI data + @timeit timer() "finish MPI send" finish_mpi_send!(dg) +end + + +# Count the number of MPI interfaces that need to be created +function count_required_mpi_interfaces(mesh::TreeMesh2D, cell_ids) + count = 0 + + # Iterate over all cells + for cell_id in cell_ids + for direction in 1:n_directions(mesh.tree) + # If no neighbor exists, current cell is small or at boundary and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on this rank -> create regular interface instead + if mpi_isparallel() && is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + count += 1 + end + end + + return count +end + + +# Create MPI interface container, initialize interface data, and return interface container for further use +function init_mpi_interfaces(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} + # Initialize container + n_mpi_interfaces = count_required_mpi_interfaces(mesh, cell_ids) + mpi_interfaces = MpiInterfaceContainer2D{NVARS, POLYDEG}(n_mpi_interfaces) + + # Connect elements with interfaces + init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh) + + return mpi_interfaces +end + + +function start_mpi_receive!(dg::Dg2D) + for (index, d) in enumerate(dg.mpi_neighbor_ranks) + dg.mpi_recv_requests[index] = MPI.Irecv!(dg.mpi_recv_buffers[index], d, d, mpi_comm()) + end +end + + +# Initialize connectivity between elements and interfaces +function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMesh2D) + # Reset interface count + count = 0 + + # Iterate over all elements to find neighbors and to connect via mpi_interfaces + for element_id in 1:nelements(elements) + # Get cell id + cell_id = elements.cell_ids[element_id] + + # Loop over directions + for direction in 1:n_directions(mesh.tree) + # If no neighbor exists, current cell is small and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on this MPI rank -> create regular interface instead + if mpi_isparallel() && is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + # Create interface between elements + count += 1 + mpi_interfaces.local_element_ids[count] = element_id + + if direction in (2, 4) # element is "left" of interface, remote cell is "right" of interface + mpi_interfaces.remote_sides[count] = 2 + else + mpi_interfaces.remote_sides[count] = 1 + end + + # Set orientation (x -> 1, y -> 2) + if direction in (1, 2) # x-direction + mpi_interfaces.orientations[count] = 1 + else # y-direction + mpi_interfaces.orientations[count] = 2 + end + end + end + + @assert count == nmpiinterfaces(mpi_interfaces) ("Actual interface count ($count) does not match " + * "expectations $(nmpiinterfaces(mpi_interfaces))") +end + + +# Initialize connectivity between MPI neighbor ranks +function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh2D) + tree = mesh.tree + + # Determine neighbor ranks and sides for MPI interfaces + neighbor_ranks = fill(-1, nmpiinterfaces(mpi_interfaces)) + # The global interface id is the smaller of the (globally unique) neighbor cell ids, multiplied by + # number of directions (2 * ndims) plus direction minus one + global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) + for interface_id in 1:nmpiinterfaces(mpi_interfaces) + orientation = mpi_interfaces.orientations[interface_id] + remote_side = mpi_interfaces.remote_sides[interface_id] + # Direction is from local cell to remote cell + if orientation == 1 # MPI interface in x-direction + if remote_side == 1 # remote cell on the "left" of MPI interface + direction = 1 + else # remote cell on the "right" of MPI interface + direction = 2 + end + else # MPI interface in y-direction + if remote_side == 1 # remote cell on the "left" of MPI interface + direction = 3 + else # remote cell on the "right" of MPI interface + direction = 4 + end + end + local_element_id = mpi_interfaces.local_element_ids[interface_id] + local_cell_id = elements.cell_ids[local_element_id] + remote_cell_id = tree.neighbor_ids[direction, local_cell_id] + neighbor_ranks[interface_id] = tree.mpi_ranks[remote_cell_id] + if local_cell_id < remote_cell_id + global_interface_ids[interface_id] = 2 * ndims(tree) * local_cell_id + direction - 1 + else + global_interface_ids[interface_id] = (2 * ndims(tree) * remote_cell_id + + opposite_direction(direction) - 1) + end + end + + # Get sorted, unique neighbor ranks + mpi_neighbor_ranks = unique(sort(neighbor_ranks)) + + # Sort interfaces by global interface id + p = sortperm(global_interface_ids) + neighbor_ranks .= neighbor_ranks[p] + interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p] + + # For each neighbor rank, init connectivity data structures + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) + for (index, d) in enumerate(mpi_neighbor_ranks) + mpi_neighbor_interfaces[index] = interface_ids[findall(x->(x == d), neighbor_ranks)] + end + + # Sanity check that we counted all interfaces exactly once + @assert sum(length(v) for v in mpi_neighbor_interfaces) == nmpiinterfaces(mpi_interfaces) + + return mpi_neighbor_ranks, mpi_neighbor_interfaces +end + + +# Initialize MPI data structures +function init_mpi_data_structures(mpi_neighbor_interfaces, ::Val{NDIMS}, ::Val{NVARS}, + ::Val{POLYDEG}) where {NDIMS, NVARS, POLYDEG} + data_size = NVARS * (POLYDEG + 1)^(NDIMS - 1) + mpi_send_buffers = Vector{Vector{Float64}}(undef, length(mpi_neighbor_interfaces)) + mpi_recv_buffers = Vector{Vector{Float64}}(undef, length(mpi_neighbor_interfaces)) + for index in 1:length(mpi_neighbor_interfaces) + mpi_send_buffers[index] = Vector{Float64}(undef, length(mpi_neighbor_interfaces[index]) * data_size) + mpi_recv_buffers[index] = Vector{Float64}(undef, length(mpi_neighbor_interfaces[index]) * data_size) + end + + mpi_send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) + mpi_recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) + + return mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests +end + + +function prolong2mpiinterfaces!(dg::Dg2D) + equation = equations(dg) + + Threads.@threads for s in 1:dg.n_mpi_interfaces + local_element_id = dg.mpi_interfaces.local_element_ids[s] + if dg.mpi_interfaces.orientations[s] == 1 # interface in x-direction + if dg.mpi_interfaces.remote_sides[s] == 1 # local element in positive direction + for j in 1:nnodes(dg), v in 1:nvariables(dg) + dg.mpi_interfaces.u[2, v, j, s] = dg.elements.u[v, 1, j, local_element_id] + end + else # local element in negative direction + for j in 1:nnodes(dg), v in 1:nvariables(dg) + dg.mpi_interfaces.u[1, v, j, s] = dg.elements.u[v, nnodes(dg), j, local_element_id] + end + end + else # interface in y-direction + if dg.mpi_interfaces.remote_sides[s] == 1 # local element in positive direction + for i in 1:nnodes(dg), v in 1:nvariables(dg) + dg.mpi_interfaces.u[2, v, i, s] = dg.elements.u[v, i, 1, local_element_id] + end + else # local element in negative direction + for i in 1:nnodes(dg), v in 1:nvariables(dg) + dg.mpi_interfaces.u[1, v, i, s] = dg.elements.u[v, i, nnodes(dg), local_element_id] + end + end + end + end +end + + +function start_mpi_send!(dg::Dg2D) + data_size = nvariables(dg) * nnodes(dg)^(ndims(dg) - 1) + + for d in 1:length(dg.mpi_neighbor_ranks) + send_buffer = dg.mpi_send_buffers[d] + + for (index, s) in enumerate(dg.mpi_neighbor_interfaces[d]) + first = (index - 1) * data_size + 1 + last = (index - 1) * data_size + data_size + + if dg.mpi_interfaces.remote_sides[s] == 1 # local element in positive direction + @views send_buffer[first:last] .= vec(dg.mpi_interfaces.u[2, :, :, s]) + else # local element in negative direction + @views send_buffer[first:last] .= vec(dg.mpi_interfaces.u[1, :, :, s]) + end + end + end + + # Start sending + for (index, d) in enumerate(dg.mpi_neighbor_ranks) + dg.mpi_send_requests[index] = MPI.Isend(dg.mpi_send_buffers[index], d, mpi_rank(), mpi_comm()) + end +end + + +function finish_mpi_receive!(dg::Dg2D) + data_size = nvariables(dg) * nnodes(dg)^(ndims(dg) - 1) + + # Start receiving and unpack received data until all communication is finished + d, _ = MPI.Waitany!(dg.mpi_recv_requests) + while d != 0 + recv_buffer = dg.mpi_recv_buffers[d] + + for (index, s) in enumerate(dg.mpi_neighbor_interfaces[d]) + first = (index - 1) * data_size + 1 + last = (index - 1) * data_size + data_size + + if dg.mpi_interfaces.remote_sides[s] == 1 # local element in positive direction + @views vec(dg.mpi_interfaces.u[1, :, :, s]) .= recv_buffer[first:last] + else # local element in negative direction + @views vec(dg.mpi_interfaces.u[2, :, :, s]) .= recv_buffer[first:last] + end + end + + d, _ = MPI.Waitany!(dg.mpi_recv_requests) + end +end + + +# Calculate and store the surface fluxes (standard Riemann and nonconservative parts) at an MPI interface +# OBS! Regarding the nonconservative terms: 1) currently only needed for the MHD equations +# 2) not implemented for MPI +calc_mpi_interface_flux!(dg::Dg2D) = calc_mpi_interface_flux!(dg.elements.surface_flux_values, + have_nonconservative_terms(dg.equations), + dg) + +function calc_mpi_interface_flux!(surface_flux_values, nonconservative_terms::Val{false}, dg::Dg2D) + @unpack surface_flux_function = dg + @unpack u, local_element_ids, orientations, remote_sides = dg.mpi_interfaces + + Threads.@threads for s in 1:dg.n_mpi_interfaces + # Get local neighboring element + element_id = local_element_ids[s] + + # Determine interface direction with respect to element: + if orientations[s] == 1 # interface in x-direction + if remote_sides[s] == 1 # local element in positive direction + direction = 1 + else # local element in negative direction + direction = 2 + end + else # interface in y-direction + if remote_sides[s] == 1 # local element in positive direction + direction = 3 + else # local element in negative direction + direction = 4 + end + end + + for i in 1:nnodes(dg) + # Call pointwise Riemann solver + u_ll, u_rr = get_surface_node_vars(u, dg, i, s) + flux = surface_flux_function(u_ll, u_rr, orientations[s], equations(dg)) + + # Copy flux to local element storage + for v in 1:nvariables(dg) + surface_flux_values[v, i, direction, element_id] = flux[v] + end + end + end +end + + +function finish_mpi_send!(dg::Dg2D) + MPI.Waitall!(dg.mpi_send_requests) +end + + +function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_absolute, + runtime_relative, uses_mpi::Val{true}; solver_gravity=nothing) + equation = equations(dg) + + # General information + mpi_println() + mpi_println("-"^80) + mpi_println(" Simulation running '", get_name(equation), "' with POLYDEG = ", polydeg(dg)) + mpi_println("-"^80) + mpi_println(" #timesteps: " * @sprintf("% 14d", step) * + " " * + " run time: " * @sprintf("%10.8e s", runtime_absolute)) + mpi_println(" dt: " * @sprintf("%10.8e", dt) * + " " * + " PID: " * @sprintf("%10.8e s", runtime_relative)) + mpi_println(" sim. time: " * @sprintf("%10.8e", time) * + " " * + " PID × #ranks: " * @sprintf("%10.8e s", runtime_relative * mpi_nranks())) + + # Level information (only show for AMR) #TODO MPI add when AMR is enabled + # if parameter("amr_interval", 0)::Int > 0 && mpi_isroot() + # levels = Vector{Int}(undef, dg.n_elements) + # for element_id in 1:dg.n_elements + # levels[element_id] = mesh.tree.levels[dg.elements.cell_ids[element_id]] + # end + # min_level = minimum(levels) + # max_level = maximum(levels) + + # mpi_println(" #elements: " * @sprintf("% 14d", dg.n_elements)) + # for level = max_level:-1:min_level+1 + # mpi_println(" ├── level $level: " * @sprintf("% 14d", count(x->x==level, levels))) + # end + # mpi_println(" └── level $min_level: " * @sprintf("% 14d", count(x->x==min_level, levels))) + # end + # mpi_println() + + # Open file for appending and store time step and time information + if dg.save_analysis && mpi_isroot() + f = open(dg.analysis_filename, "a") + @printf(f, "% 9d", step) + @printf(f, " %10.8e", time) + @printf(f, " %10.8e", dt) + end + + # Calculate and print derived quantities (error norms, entropy etc.) + # Variable names required for L2 error, Linf error, and conservation error + if mpi_isroot() + if any(q in dg.analysis_quantities for q in + (:l2_error, :linf_error, :conservation_error, :residual)) + print(" Variable: ") + for v in 1:nvariables(equation) + @printf(" %-14s", varnames_cons(equation)[v]) + end + println() + end + end + + # Calculate L2/Linf errors, which are also returned by analyze_solution + l2_error, linf_error = calc_error_norms(dg, time, Val(true)) + + if mpi_isroot() + # L2 error + if :l2_error in dg.analysis_quantities + print(" L2 error: ") + for v in 1:nvariables(equation) + @printf(" % 10.8e", l2_error[v]) + dg.save_analysis && @printf(f, " % 10.8e", l2_error[v]) + end + println() + end + + # Linf error + if :linf_error in dg.analysis_quantities + print(" Linf error: ") + for v in 1:nvariables(equation) + @printf(" % 10.8e", linf_error[v]) + dg.save_analysis && @printf(f, " % 10.8e", linf_error[v]) + end + println() + end + end + + # Conservation errror + if :conservation_error in dg.analysis_quantities + # Calculate state integrals + state_integrals = integrate(dg.elements.u, dg) + + # Store initial state integrals at first invocation + if isempty(dg.initial_state_integrals) + dg.initial_state_integrals = zeros(nvariables(equation)) + if mpi_isroot() + # Only set on MPI root; all other ranks do not get any value from `integrate` + dg.initial_state_integrals .= state_integrals + end + end + + if mpi_isroot() + print(" |∑U - ∑U₀|: ") + for v in 1:nvariables(equation) + err = abs(state_integrals[v] - dg.initial_state_integrals[v]) + @printf(" % 10.8e", err) + dg.save_analysis && @printf(f, " % 10.8e", err) + end + println() + end + end + + # Residual (defined here as the vector maximum of the absolute values of the time derivatives) + if :residual in dg.analysis_quantities + mpi_print(" max(|Uₜ|): ") + for v in 1:nvariables(equation) + # Calculate maximum absolute value of Uₜ + res = maximum(abs, view(dg.elements.u_t, v, :, :, :)) + res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm()) + mpi_isroot() && @printf(" % 10.8e", res[]) + mpi_isroot() && dg.save_analysis && @printf(f, " % 10.8e", res[]) + end + mpi_println() + end + + # L2/L∞ errors of the primitive variables + if :l2_error_primitive in dg.analysis_quantities || :linf_error_primitive in dg.analysis_quantities + l2_error_prim, linf_error_prim = calc_error_norms(cons2prim, dg, time, Val(true)) + + if mpi_isroot() + print(" Variable: ") + for v in 1:nvariables(equation) + @printf(" %-14s", varnames_prim(equation)[v]) + end + println() + + # L2 error + if :l2_error_primitive in dg.analysis_quantities + print(" L2 error prim.: ") + for v in 1:nvariables(equation) + @printf("%10.8e ", l2_error_prim[v]) + dg.save_analysis && @printf(f, " % 10.8e", l2_error_prim[v]) + end + println() + end + + # L∞ error + if :linf_error_primitive in dg.analysis_quantities + print(" Linf error pri.:") + for v in 1:nvariables(equation) + @printf("%10.8e ", linf_error_prim[v]) + dg.save_analysis && @printf(f, " % 10.8e", linf_error_prim[v]) + end + println() + end + end + end + + # Entropy time derivative + if :dsdu_ut in dg.analysis_quantities + dsdu_ut = calc_entropy_timederivative(dg, time) + if mpi_isroot() + print(" ∑∂S/∂U ⋅ Uₜ: ") + @printf(" % 10.8e", dsdu_ut) + dg.save_analysis && @printf(f, " % 10.8e", dsdu_ut) + println() + end + end + + # Entropy + if :entropy in dg.analysis_quantities + s = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return entropy(cons, equations(dg)) + end + if mpi_isroot() + print(" ∑S: ") + @printf(" % 10.8e", s) + dg.save_analysis && @printf(f, " % 10.8e", s) + println() + end + end + + # Total energy + if :energy_total in dg.analysis_quantities + e_total = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return energy_total(cons, equations(dg)) + end + if mpi_isroot() + print(" ∑e_total: ") + @printf(" % 10.8e", e_total) + dg.save_analysis && @printf(f, " % 10.8e", e_total) + println() + end + end + + # Kinetic energy + if :energy_kinetic in dg.analysis_quantities + e_kinetic = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return energy_kinetic(cons, equations(dg)) + end + if mpi_isroot() + print(" ∑e_kinetic: ") + @printf(" % 10.8e", e_kinetic) + dg.save_analysis && @printf(f, " % 10.8e", e_kinetic) + println() + end + end + + # Internal energy + if :energy_internal in dg.analysis_quantities + e_internal = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return energy_internal(cons, equations(dg)) + end + if mpi_isroot() + print(" ∑e_internal: ") + @printf(" % 10.8e", e_internal) + dg.save_analysis && @printf(f, " % 10.8e", e_internal) + println() + end + end + + # Magnetic energy #TODO MPI add when MHD is enabled + # if :energy_magnetic in dg.analysis_quantities + # e_magnetic = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + # cons = get_node_vars(u, dg, i, j, element_id) + # return energy_magnetic(cons, equations(dg)) + # end + # if mpi_isroot() + # print(" ∑e_magnetic: ") + # @printf(" % 10.8e", e_magnetic) + # dg.save_analysis && @printf(f, " % 10.8e", e_magnetic) + # println() + # end + # end + + # Potential energy #TODO MPI add when Euler-gravity is enabled + # if :energy_potential in dg.analysis_quantities + # # FIXME: This should be implemented properly for multiple coupled solvers + # @assert !isnothing(solver_gravity) "Only works if gravity solver is supplied" + # @assert dg.initial_conditions == initial_conditions_jeans_instability "Only works with Jeans instability setup" + + # e_potential = integrate(dg, dg.elements.u, solver_gravity.elements.u) do i, j, element_id, dg, u_euler, u_gravity + # cons_euler = get_node_vars(u_euler, dg, i, j, element_id) + # cons_gravity = get_node_vars(u_gravity, solver_gravity, i, j, element_id) + # # OBS! subtraction is specific to Jeans instability test where rho_0 = 1.5e7 + # return (cons_euler[1] - 1.5e7) * cons_gravity[1] + # end + # if mpi_isroot() + # print(" ∑e_pot: ") + # @printf(" % 10.8e", e_potential) + # dg.save_analysis && @printf(f, " % 10.8e", e_potential) + # println() + # end + # end + + # Solenoidal condition ∇ ⋅ B = 0 #TODO MPI add when MHD is enabled + # if :l2_divb in dg.analysis_quantities || :linf_divb in dg.analysis_quantities + # l2_divb, linf_divb = calc_mhd_solenoid_condition(dg, time) + # end + # if mpi_isroot() + # # L2 norm of ∇ ⋅ B + # if :l2_divb in dg.analysis_quantities + # print(" L2 ∇ ⋅B: ") + # @printf(" % 10.8e", l2_divb) + # dg.save_analysis && @printf(f, " % 10.8e", l2_divb) + # println() + # end + # # Linf norm of ∇ ⋅ B + # if :linf_divb in dg.analysis_quantities + # print(" Linf ∇ ⋅B: ") + # @printf(" % 10.8e", linf_divb) + # dg.save_analysis && @printf(f, " % 10.8e", linf_divb) + # println() + # end + # end + + # Cross helicity #TODO MPI add when MHD is enabled + # if :cross_helicity in dg.analysis_quantities + # h_c = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + # cons = get_node_vars(u, dg, i, j, element_id) + # return cross_helicity(cons, equations(dg)) + # end + # if mpi_isroot() + # print(" ∑H_c: ") + # @printf(" % 10.8e", h_c) + # dg.save_analysis && @printf(f, " % 10.8e", h_c) + # println() + # end + # end + + if mpi_isroot() + println("-"^80) + println() + + # Add line break and close analysis file if it was opened + if dg.save_analysis + println(f) + close(f) + end + end + + # Return errors for EOC analysis + return l2_error, linf_error +end + + +# OBS! Global results are only calculated on MPI root +@inline calc_error_norms(dg::Dg2D, t, uses_mpi) = calc_error_norms(cons2cons, dg, t, uses_mpi) +function calc_error_norms(func, dg::Dg2D, t, uses_mpi::Val{true}) + l2_error, linf_error = calc_error_norms(func, dg, t, Val(false)) + + # Since the local L2 norm is already normalized and square-rooted, we need to undo this first + global_l2_error = Vector(l2_error.^2 .* dg.analysis_total_volume) + global_linf_error = Vector(linf_error) + MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm()) + MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm()) + l2_error = convert(typeof(l2_error), global_l2_error) + linf_error = convert(typeof(linf_error), global_linf_error) + + l2_error = @. sqrt(l2_error / dg.analysis_total_volume) + + return l2_error, linf_error +end + + +#TODO MPI add when MHD is enabled +# function calc_mhd_solenoid_condition(dg::Dg2D, t, mpi_parallel::Val{true}) +# l2_divb, linf_divb = calc_mhd_solenoid_condition(func, dg, t, Val(false)) +# +# # Since the local L2 norm is already normalized and square-rooted, we need to undo this first +# global_l2_divb = Vector(l2_divb.^2 .* dg.analysis_total_volume) +# global_linf_divb = Vector(linf_divb) +# MPI.Reduce!(global_l2_divb, +, mpi_root(), mpi_comm()) +# MPI.Reduce!(global_linf_divb, max, mpi_root(), mpi_comm()) +# l2_divb = convert(typeof(l2_divb), global_l2_divb) +# linf_divb = convert(typeof(linf_divb), global_linf_divb) +# +# l2_divb = @. sqrt(l2_divb / dg.analysis_total_volume) +# +# return l2_divb, linf_divb +# end + + +# OBS! Global results are only calculated on MPI root, all other domains receive `nothing` +function integrate(func, dg::Dg2D, uses_mpi::Val{true}, args...; normalize=true) + integral = integrate(func, dg, Val(false), args...; normalize=normalize) + integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm()) + + return mpi_isroot() ? integral[] : integral +end + + +# Calculate stable time step size +function calc_dt(dg::Dg2D, cfl, uses_mpi::Val{true}) + min_dt = calc_dt(dg, cfl, Val(false)) + min_dt = MPI.Allreduce!(Ref(min_dt), min, mpi_comm())[] + + return min_dt +end diff --git a/src/solvers/dg/3d/amr.jl b/src/solvers/dg/3d/amr.jl index b05ccee0703..f3f0fe6dee0 100644 --- a/src/solvers/dg/3d/amr.jl +++ b/src/solvers/dg/3d/amr.jl @@ -1,8 +1,8 @@ # This file contains functions that are related to the AMR capabilities of the DG solver # Refine elements in the DG solver based on a list of cell_ids that should be refined -function refine!(dg::Dg3D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - cells_to_refine::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function refine!(dg::Dg3D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + cells_to_refine::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(cells_to_refine) return @@ -65,6 +65,7 @@ function refine!(dg::Dg3D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, # Update DG instance with new data dg.elements = elements dg.n_elements = n_elements + dg.n_elements_global = n_elements dg.interfaces = interfaces dg.n_interfaces = n_interfaces dg.boundaries = boundaries @@ -131,8 +132,8 @@ end # Coarsen elements in the DG solver based on a list of cell_ids that should be removed -function coarsen!(dg::Dg3D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function coarsen!(dg::Dg3D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(child_cells_to_coarsen) return @@ -207,6 +208,7 @@ function coarsen!(dg::Dg3D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, # Update DG instance with new data dg.elements = elements dg.n_elements = n_elements + dg.n_elements_global = n_elements dg.interfaces = interfaces dg.n_interfaces = n_interfaces dg.boundaries = boundaries diff --git a/src/solvers/dg/3d/containers.jl b/src/solvers/dg/3d/containers.jl index 6b1f83eb101..c70473a45e0 100644 --- a/src/solvers/dg/3d/containers.jl +++ b/src/solvers/dg/3d/containers.jl @@ -151,7 +151,7 @@ nmortars(l2mortars::L2MortarContainer3D) = length(l2mortars.orientations) # Allow printing container contents -function Base.show(io::IO, c::L2MortarContainer3D{NVARS, POLYDEG}) where {NVARS, POLYDEG} +function Base.show(io::IO, ::MIME"text/plain", c::L2MortarContainer3D{NVARS, POLYDEG}) where {NVARS, POLYDEG} println(io, '*'^20) for idx in CartesianIndices(c.u_upper_left) println(io, "c.u_upper_left[$idx] = $(c.u_upper_left[idx])") diff --git a/src/solvers/dg/3d/dg.jl b/src/solvers/dg/3d/dg.jl index ee8d7827f31..ebd6b1c58e8 100644 --- a/src/solvers/dg/3d/dg.jl +++ b/src/solvers/dg/3d/dg.jl @@ -1,10 +1,10 @@ # Main DG data structure that contains all relevant data for the DG solver -mutable struct Dg3D{Eqn<:AbstractEquation, NVARS, POLYDEG, +mutable struct Dg3D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, SurfaceFlux, VolumeFlux, InitialConditions, SourceTerms, BoundaryConditions, MortarType, VolumeIntegralType, ShockIndicatorVariable, VectorNnodes, MatrixNnodes, MatrixNnodes2, InverseVandermondeLegendre, MortarMatrix, - VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{3, POLYDEG} + VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{3, POLYDEG, MeshType} equations::Eqn surface_flux_function::SurfaceFlux @@ -66,6 +66,8 @@ mutable struct Dg3D{Eqn<:AbstractEquation, NVARS, POLYDEG, positivity_preserving_limiter_apply::Bool positivity_preserving_limiter_threshold::Float64 + n_elements_global::Int + element_variables::Dict{Symbol, Union{Vector{Float64}, Vector{Int}}} cache::Dict{Symbol, Any} thread_cache::Any # to make fully-typed output more readable @@ -75,7 +77,7 @@ end # Convenience constructor to create DG solver instance -function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh{NDIMS}, POLYDEG) where {NDIMS, NVARS} +function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh3D, POLYDEG) where {NDIMS, NVARS} # Get cells for which an element needs to be created (i.e., all leaf cells) leaf_cell_ids = leaf_cells(mesh.tree) @@ -133,8 +135,8 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Initialize data structures for error analysis (by default, we use twice the # number of analysis nodes as the normal solution) - NAna = 2 * (n_nodes) - 1 - analysis_nodes, analysis_weights = gauss_lobatto_nodes_weights(NAna + 1) + analysis_polydeg = 2 * (n_nodes) - 1 + analysis_nodes, analysis_weights = gauss_lobatto_nodes_weights(analysis_polydeg + 1) analysis_weights_volume = analysis_weights analysis_vandermonde = polynomial_interpolation_matrix(nodes, analysis_nodes) analysis_total_volume = mesh.tree.length_level_0^ndims(mesh) @@ -168,6 +170,9 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v amr_indicator = Symbol(parameter("amr_indicator", "n/a", valid=["n/a", "gauss", "blob", "density_pulse", "sedov_self_gravity"])) + # Set global number of elements + n_elements_global = n_elements + # Initialize storage for element variables element_variables = Dict{Symbol, Union{Vector{Float64}, Vector{Int}}}() # maximum and minimum alpha for shock capturing @@ -202,8 +207,29 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Store initial state integrals for conservation error calculation initial_state_integrals = Vector{Float64}() + # Convert all performance-critical fields to StaticArrays types + nodes = SVector{POLYDEG+1}(nodes) + weights = SVector{POLYDEG+1}(weights) + inverse_weights = SVector{POLYDEG+1}(inverse_weights) + lhat = SMatrix{POLYDEG+1,2}(lhat) + dhat = SMatrix{POLYDEG+1,POLYDEG+1}(dhat) + dsplit = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit) + dsplit_transposed = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed) + mortar_forward_upper = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper) + mortar_forward_lower = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower) + l2mortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper) + l2mortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower) + analysis_nodes = SVector{analysis_polydeg+1}(analysis_nodes) + analysis_weights = SVector{analysis_polydeg+1}(analysis_weights) + analysis_weights_volume = SVector{analysis_polydeg+1}(analysis_weights_volume) + # Create actual DG solver instance - dg = Dg3D( + dg = Dg3D{typeof(equation), typeof(mesh), NVARS, POLYDEG, + typeof(surface_flux_function), typeof(volume_flux_function), typeof(initial_conditions), + typeof(source_terms), typeof(boundary_conditions), + typeof(mortar_type), typeof(volume_integral_type), typeof(shock_indicator_variable), + typeof(nodes), typeof(dhat), typeof(lhat), typeof(inverse_vandermonde_legendre), + typeof(mortar_forward_upper), typeof(analysis_nodes), typeof(analysis_vandermonde)}( equation, surface_flux_function, volume_flux_function, initial_conditions, source_terms, @@ -212,19 +238,20 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v boundaries, n_boundaries, n_boundaries_per_direction, mortar_type, l2mortars, n_l2mortars, - Tuple(boundary_conditions), - SVector{POLYDEG+1}(nodes), SVector{POLYDEG+1}(weights), SVector{POLYDEG+1}(inverse_weights), - inverse_vandermonde_legendre, SMatrix{POLYDEG+1,2}(lhat), + boundary_conditions, + nodes, weights, inverse_weights, + inverse_vandermonde_legendre, lhat, volume_integral_type, - SMatrix{POLYDEG+1,POLYDEG+1}(dhat), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed), - SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper), SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower), - SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper), SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower), - SVector{NAna+1}(analysis_nodes), SVector{NAna+1}(analysis_weights), SVector{NAna+1}(analysis_weights_volume), + dhat, dsplit, dsplit_transposed, + mortar_forward_upper, mortar_forward_lower, + l2mortar_reverse_upper, l2mortar_reverse_lower, + analysis_nodes, analysis_weights, analysis_weights_volume, analysis_vandermonde, analysis_total_volume, analysis_quantities, save_analysis, analysis_filename, shock_indicator_variable, shock_alpha_max, shock_alpha_min, shock_alpha_smooth, amr_indicator, amr_alpha_max, amr_alpha_min, amr_alpha_smooth, positivity_preserving_limiter_apply, positivity_preserving_limiter_threshold, + n_elements_global, element_variables, cache, thread_cache, initial_state_integrals) @@ -275,7 +302,7 @@ end # Count the number of interfaces that need to be created -function count_required_interfaces(mesh::TreeMesh{3}, cell_ids) +function count_required_interfaces(mesh::TreeMesh3D, cell_ids) count = 0 # Iterate over all cells @@ -306,7 +333,7 @@ end # Count the number of boundaries that need to be created -function count_required_boundaries(mesh::TreeMesh{3}, cell_ids) +function count_required_boundaries(mesh::TreeMesh3D, cell_ids) count = 0 # Iterate over all cells @@ -332,7 +359,7 @@ end # Count the number of mortars that need to be created -function count_required_mortars(mesh::TreeMesh{3}, cell_ids) +function count_required_mortars(mesh::TreeMesh3D, cell_ids) count = 0 # Iterate over all cells and count mortars from perspective of coarse cells @@ -361,7 +388,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_elements(cell_ids, mesh::TreeMesh{3}, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} +function init_elements(cell_ids, mesh::TreeMesh3D, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} # Initialize container n_elements = length(cell_ids) elements = ElementContainer3D{NVARS, POLYDEG}(n_elements) @@ -403,7 +430,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_interfaces(cell_ids, mesh::TreeMesh{3}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_interfaces(cell_ids, mesh::TreeMesh3D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_interfaces = count_required_interfaces(mesh, cell_ids) interfaces = InterfaceContainer3D{NVARS, POLYDEG}(n_interfaces) @@ -419,7 +446,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_boundaries(cell_ids, mesh::TreeMesh{3}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_boundaries(cell_ids, mesh::TreeMesh3D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_boundaries = count_required_boundaries(mesh, cell_ids) boundaries = BoundaryContainer3D{NVARS, POLYDEG}(n_boundaries) @@ -435,7 +462,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_mortars(cell_ids, mesh::TreeMesh{3}, ::Val{NVARS}, ::Val{POLYDEG}, elements, mortar_type) where {NVARS, POLYDEG} +function init_mortars(cell_ids, mesh::TreeMesh3D, ::Val{NVARS}, ::Val{POLYDEG}, elements, mortar_type) where {NVARS, POLYDEG} # Initialize containers n_mortars = count_required_mortars(mesh, cell_ids) if mortar_type === Val(:l2) @@ -457,7 +484,7 @@ end # Initialize connectivity between elements and interfaces -function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{3}) +function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh3D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) @@ -513,7 +540,7 @@ end # Initialize connectivity between elements and boundaries -function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{3}) +function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh3D) # Reset boundaries count count = 0 @@ -592,7 +619,7 @@ end # Initialize connectivity between elements and mortars -function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh{3}) +function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh3D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) @@ -705,7 +732,7 @@ function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh{3}) end -function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{3}) +function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh3D) # "eval is evil" # This is a temporary hack until we have switched to a library based approach # with pure Julia code instead of parameter files. @@ -743,7 +770,7 @@ function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{3}) end end - return boundary_conditions + return Tuple(boundary_conditions) end diff --git a/src/solvers/dg/dg.jl b/src/solvers/dg/dg.jl index 817e5fb98ca..a926dd5301f 100644 --- a/src/solvers/dg/dg.jl +++ b/src/solvers/dg/dg.jl @@ -1,6 +1,6 @@ # Abstract supertype for DG-type solvers # `POLYDEG` corresponds to `N` in the school of Kopriva -abstract type AbstractDg{NDIMS, POLYDEG} <: AbstractSolver{NDIMS} end +abstract type AbstractDg{NDIMS, POLYDEG, MeshType} <: AbstractSolver{NDIMS} end @inline Base.ndims(dg::AbstractDg) = ndims(equations(dg)) @@ -19,6 +19,9 @@ abstract type AbstractDg{NDIMS, POLYDEG} <: AbstractSolver{NDIMS} end # Return number of degrees of freedom @inline ndofs(dg::AbstractDg) = dg.n_elements * nnodes(dg)^ndims(dg) +@inline uses_mpi(::AbstractDg{NDIMS, POLYDEG, TreeMesh{ParallelTree{NDIMS}}}) where {NDIMS, POLYDEG}= Val(true) +@inline uses_mpi(::AbstractDg{NDIMS, POLYDEG, TreeMesh{SerialTree{NDIMS}}}) where {NDIMS, POLYDEG} = Val(false) + """ get_node_coords(x, dg::AbstractDg, indices...) @@ -67,6 +70,7 @@ include("1d/amr.jl") include("2d/containers.jl") include("2d/dg.jl") include("2d/amr.jl") +include("2d/parallel.jl") # Include 3D implementation include("3d/containers.jl") diff --git a/test/Project.toml b/test/Project.toml index a2cd2f8d848..1a807f83808 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,3 +1,4 @@ [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index 9d9d4c3926e..1430011a95a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,8 +1,10 @@ using Test +using MPI: mpiexec # run tests on Travis CI in parallel const TRIXI_TEST = get(ENV, "TRIXI_TEST", "all") const ON_APPVEYOR = lowercase(get(ENV, "APPVEYOR", "false")) == "true" +const TRIXI_MPI_NPROCS = clamp(Sys.CPU_THREADS, 2, 3) @time @testset "Trixi.jl tests" begin @time if TRIXI_TEST == "all" || TRIXI_TEST == "1D" @@ -25,4 +27,12 @@ const ON_APPVEYOR = lowercase(get(ENV, "APPVEYOR", "false")) == "true" @time if (TRIXI_TEST == "all" && !ON_APPVEYOR) || TRIXI_TEST == "paper-self-gravitating-gas-dynamics" include("test_paper-self-gravitating-gas-dynamics.jl") end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "parallel_2d" + # Based on `runtests.jl` from `MPI.jl` and `PencilArrays.jl` + # Precompilation disabled to prevent race conditions when loading packages + mpiexec() do cmd + run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --compiled-modules=no test_examples_parallel_2d.jl`) + end + end end diff --git a/test/test_examples_parallel_2d.jl b/test/test_examples_parallel_2d.jl new file mode 100644 index 00000000000..013be763820 --- /dev/null +++ b/test/test_examples_parallel_2d.jl @@ -0,0 +1,93 @@ +module TestExamplesParallel2D + +using Test +using Trixi + +include("test_trixi.jl") + +# Start with a clean environment: remove Trixi output directory if it exists +outdir = "out" +Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true) + +# pathof(Trixi) returns /path/to/Trixi/src/Trixi.jl, dirname gives the parent directory +const EXAMPLES_DIR = joinpath(pathof(Trixi) |> dirname |> dirname, "examples", "2d") + +# Run basic tests +@testset "Examples 2D" begin + @testset "parameters.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters.toml"), + l2 = [9.144681765639205e-6], + linf = [6.437440532547356e-5]) + end + @testset "parameters.toml with polydeg=1" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters.toml"), + l2 = [0.05264106093598111], + linf = [0.08754218386076518], + polydeg=1) + end + @testset "parameters_ec.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_ec.toml"), + l2 = [0.06159341742582756, 0.05012484425381723, 0.05013298724507752, 0.22537740506116724], + linf = [0.29912627861573327, 0.30886767304359375, 0.3088108573487326, 1.0657556075017878]) + end + @testset "parameters_density_wave.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_density_wave.toml"), + l2 = [0.001060077845747576, 0.00010600778457107525, 0.00021201556914875742, 2.6501946139091318e-5], + linf = [0.0065356386867677085, 0.0006535638688170142, 0.0013071277374487877, 0.0001633909674296774], + extra_analysis_quantities=["l2_error_primitive", "linf_error_primitive"], t_end=0.5) + end + @testset "parameters_vortex.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_vortex.toml"), + l2 = [3.6343138447409784e-6, 0.0032111379843728876, 0.0032111482778261658, 0.004545715889714643], + linf = [7.901869034399045e-5, 0.030511158864742205, 0.030451936462313256, 0.04361908901631395]) + end + # MHD + MPI not yet implemented + # @testset "parameters_ec_mhd.toml" begin + # test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_ec_mhd.toml"), + # l2 = [0.03607862694368351, 0.04281395008247395, 0.04280207686965749, 0.025746770192645763, 0.1611518499414067, 0.017455917249117023, 0.017456981264942977, 0.02688321120361229, 0.00015024027267648003], + # linf = [0.23502083666166018, 0.3156846367743936, 0.31227895161037256, 0.2118146956106238, 0.9743049414302711, 0.09050624115026618, 0.09131633488909774, 0.15693063355520998, 0.0038394720095667593]) + # end + @testset "parameters_hyp_diff_harmonic_nonperiodic.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_hyp_diff_harmonic_nonperiodic.toml"), + l2 = [8.618132353932638e-8, 5.619399844708813e-7, 5.619399845476024e-7], + linf = [1.124861862326869e-6, 8.622436471483752e-6, 8.622436469707395e-6]) + end + @testset "parameters_hyp_diff_llf.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_hyp_diff_llf.toml"), + l2 = [0.00015687751088073104, 0.0010259867353397119, 0.0010259867353398994], + linf = [0.001198695640053704, 0.006423873515701395, 0.006423873515686296]) + end + @testset "parameters_hyp_diff_nonperiodic.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_hyp_diff_nonperiodic.toml"), + l2 = [8.523077654037775e-6, 2.877932365308637e-5, 5.454942769137812e-5], + linf = [5.484978959957587e-5, 0.00014544895979200218, 0.000324491268921534]) + end + @testset "parameters_hyp_diff_upwind.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_hyp_diff_upwind.toml"), + l2 = [5.868147556488962e-6, 3.8051792732628014e-5, 3.8051792732620214e-5], + linf = [3.70196549871471e-5, 0.0002072058411455302, 0.00020720584114464202]) + end + @testset "parameters_nonperiodic.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_nonperiodic.toml"), + l2 = [2.3652137675654753e-6, 2.1386731303685556e-6, 2.138673130413185e-6, 6.009920290578574e-6], + linf = [1.4080448659026246e-5, 1.7581818010814487e-5, 1.758181801525538e-5, 5.9568540361709665e-5]) + end + @testset "parameters_source_terms.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_source_terms.toml"), + l2 = [8.517783186497567e-7, 1.2350199409361865e-6, 1.2350199409828616e-6, 4.277884398786315e-6], + linf = [8.357934254688004e-6, 1.0326389653148027e-5, 1.0326389654924384e-5, 4.4961900057316484e-5]) + end + @testset "parameters.toml with restart and t_end=2" begin + Trixi.run(joinpath(EXAMPLES_DIR, "parameters.toml")) + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters.toml"), + l2 = [1.2148032444677485e-5], + linf = [6.495644794757283e-5], + t_end = 2, + restart = true, restart_filename = "out/restart_000040.h5") + end +end + +# Clean up afterwards: delete Trixi output directory +Trixi.mpi_isroot() && @test_nowarn rm(outdir, recursive=true) + +end #module diff --git a/test/test_manual.jl b/test/test_manual.jl index 1ad79bdd9ee..bedbf0be8f7 100644 --- a/test/test_manual.jl +++ b/test/test_manual.jl @@ -9,16 +9,15 @@ isdir(outdir) && rm(outdir, recursive=true) # Run various manual (= non-parameter-file-triggered tests) @testset "Manual tests" begin - @testset "Tree" begin + @testset "SerialTree" begin @testset "constructors" begin - @test_nowarn Trixi.Tree(Val(1), 10, 0.0, 1.0) + @test_nowarn Trixi.SerialTree(Val(1), 10, 0.0, 1.0) end @testset "helper functions" begin - t = Trixi.Tree(Val(1), 10, 0.0, 1.0) - @test_nowarn show(t) + t = Trixi.SerialTree(Val(1), 10, 0.0, 1.0) + @test isnothing(display(t)) @test Trixi.ndims(t) == 1 - @test Trixi.ndims(Trixi.Tree{1}) == 1 @test Trixi.has_any_neighbor(t, 1, 1) == true @test Trixi.isperiodic(t, 1) == true @test Trixi.n_children_per_cell(t) == 2 @@ -27,7 +26,7 @@ isdir(outdir) && rm(outdir, recursive=true) end @testset "refine!/coarsen!" begin - t = Trixi.Tree(Val(1), 10, 0.0, 1.0) + t = Trixi.SerialTree(Val(1), 10, 0.0, 1.0) @test Trixi.refine!(t) == [1] @test Trixi.coarsen!(t) == [1] @test Trixi.refine!(t) == [1] @@ -41,6 +40,24 @@ isdir(outdir) && rm(outdir, recursive=true) end end + @testset "ParallelTree" begin + @testset "constructors" begin + @test_nowarn Trixi.ParallelTree(Val(1), 10, 0.0, 1.0) + end + + @testset "helper functions" begin + t = Trixi.ParallelTree(Val(1), 10, 0.0, 1.0) + @test isnothing(display(t)) + @test isnothing(Trixi.reset_data_structures!(t)) + end + end + + @testset "TreeMesh" begin + @testset "constructors" begin + Trixi.TreeMesh{Trixi.SerialTree{1}}(1, 5.0, 2.0) isa Trixi.TreeMesh + end + end + @testset "interpolation" begin @testset "nodes and weights" begin @test Trixi.gauss_nodes_weights(1) == ([0.0], [2.0]) @@ -188,9 +205,9 @@ isdir(outdir) && rm(outdir, recursive=true) @testset "DG L2 mortar container debug output" begin c2d = Trixi.L2MortarContainer2D{1, 1}(1) - @test isnothing(show(c2d)) + @test isnothing(display(c2d)) c3d = Trixi.L2MortarContainer3D{1, 1}(1) - @test isnothing(show(c3d)) + @test isnothing(display(c3d)) end end