From d0e85dab6fb1553e830c410bd4abb4d26fc85470 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Thu, 3 Sep 2020 10:02:10 +0200 Subject: [PATCH 01/81] Add MPI as a dependency --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 81a58a7057c..764b7a90ef4 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.2.2-pre" [deps] HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" From fb633ac9e089cd9467bc3c47e66b2301be87a86b Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Thu, 3 Sep 2020 11:28:18 +0200 Subject: [PATCH 02/81] Create parallel mesh struct --- src/mesh/mesh.jl | 1 + src/mesh/parallel_tree.jl | 822 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 823 insertions(+) create mode 100644 src/mesh/parallel_tree.jl diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index db377ff2b24..a75c8ae9473 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -1,5 +1,6 @@ include("tree.jl") +include("parallel_tree.jl") # Composite type to hold the actual tree in addition to other mesh-related data # that is not strictly part of the tree. diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl new file mode 100644 index 00000000000..81ad1861597 --- /dev/null +++ b/src/mesh/parallel_tree.jl @@ -0,0 +1,822 @@ + +# Composite type that represents a NDIMS-dimensional tree. +# +# Implements everything required for AbstractContainer. +# +# Note: The way the data structures are set up and the way most algorithms +# work, it is *always* assumed that +# a) we have a balanced tree (= at most one level difference between +# neighboring cells, or 2:1 rule) +# b) we may not have all children (= some children may not exist) +# c) the tree is stored depth-first +# +# However, the way the refinement/coarsening algorithms are currently +# implemented, we only have fully refined cells. That is, a cell either has 2^NDIMS children or +# no children at all (= leaf cell). This restriction is also assumed at +# multiple positions in the refinement/coarsening algorithms. +# +# An exception to the 2:1 rule exists for the low-level `refine_unbalanced!` +# function, which is required for implementing level-wise refinement in a sane +# way. Also, depth-first ordering *might* not by guaranteed during +# refinement/coarsening operations. +mutable struct ParallelTree{NDIMS} <: AbstractContainer + parent_ids::Vector{Int} + child_ids::Matrix{Int} + neighbor_ids::Matrix{Int} + levels::Vector{Int} + coordinates::Matrix{Float64} + original_cell_ids::Vector{Int} + + capacity::Int + length::Int + dummy::Int + + center_level_0::MVector{NDIMS, Float64} + length_level_0::Float64 + periodicity::NTuple{NDIMS, Bool} + + function ParallelTree{NDIMS}(capacity::Integer) where NDIMS + # Verify that NDIMS is an integer + @assert NDIMS isa Integer + + # Create instance + t = new() + + # Initialize fields with defaults + # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations + t.parent_ids = fill(typemin(Int), capacity + 1) + t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) + t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1) + t.levels = fill(typemin(Int), capacity + 1) + t.coordinates = fill(NaN, NDIMS, capacity + 1) + t.original_cell_ids = fill(typemin(Int), capacity + 1) + + t.capacity = capacity + t.length = 0 + t.dummy = capacity + 1 + + t.center_level_0 = @MVector fill(NaN, NDIMS) + t.length_level_0 = NaN + + return t + end +end + + +# Constructor for passing the dimension as an argument +ParallelTree(::Val{NDIMS}, args...) where NDIMS = ParallelTree{NDIMS}(args...) + +# Create and initialize tree +function ParallelTree{NDIMS}(capacity::Int, center::AbstractArray{Float64}, + length::Real, periodicity=true) where NDIMS + # Create instance + t = ParallelTree{NDIMS}(capacity) + + # Initialize root cell + init!(t, center, length, periodicity) + + return t +end + +# Constructor accepting a single number as center (as opposed to an array) for 1D +ParallelTree{1}(cap::Int, center::Real, len::Real, periodicity=true) = ParallelTree{1}(cap, [convert(Float64, center)], len, periodicity) + + +# Clear tree with deleting data structures, store center and length, and create root cell +function init!(t::ParallelTree, center::AbstractArray{Float64}, length::Real, periodicity=true) + clear!(t) + + # Set domain information + t.center_level_0 = center + t.length_level_0 = length + + # Create root cell + t.length += 1 + t.parent_ids[1] = 0 + t.child_ids[:, 1] .= 0 + t.levels[1] = 0 + t.coordinates[:, 1] .= t.center_level_0 + t.original_cell_ids[1] = 0 + + # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor + if all(periodicity) + # Also catches case where periodicity = true + t.neighbor_ids[:, 1] .= 1 + t.periodicity = ntuple(x->true, ndims(t)) + elseif !any(periodicity) + # Also catches case where periodicity = false + t.neighbor_ids[:, 1] .= 0 + t.periodicity = ntuple(x->false, ndims(t)) + else + # Default case if periodicity is an iterable + for dimension in 1:ndims(t) + if periodicity[dimension] + t.neighbor_ids[2 * dimension - 1, 1] = 1 + t.neighbor_ids[2 * dimension - 0, 1] = 1 + else + t.neighbor_ids[2 * dimension - 1, 1] = 0 + t.neighbor_ids[2 * dimension - 0, 1] = 0 + end + end + + t.periodicity = Tuple(periodicity) + end +end + + +# Convenience output for debugging +function Base.show(io::IO, t::ParallelTree{NDIMS}) where NDIMS + l = t.length + println(io, '*'^20) + println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") + println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") + println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") + println(io, "t.levels[1:l] = $(t.levels[1:l])") + println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") + println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") + println(io, "t.capacity = $(t.capacity)") + println(io, "t.length = $(t.length)") + println(io, "t.dummy = $(t.dummy)") + println(io, "t.center_level_0 = $(t.center_level_0)") + println(io, "t.length_level_0 = $(t.length_level_0)") + println(io, '*'^20) +end + +# Type traits to obtain dimension +@inline Base.ndims(t::Type{ParallelTree{NDIMS}}) where NDIMS = NDIMS +@inline Base.ndims(t::ParallelTree) = ndims(typeof(t)) + + +# Auxiliary methods to allow semantic queries on the tree +# Check whether cell has parent cell +has_parent(t::ParallelTree, cell_id::Int) = t.parent_ids[cell_id] > 0 + +# Count number of children for a given cell +n_children(t::ParallelTree, cell_id::Int) = count(x -> (x > 0), @view t.child_ids[:, cell_id]) + +# Check whether cell has any child cell +has_children(t::ParallelTree, cell_id::Int) = n_children(t, cell_id) > 0 + +# Check whether cell is leaf cell +is_leaf(t::ParallelTree, cell_id::Int) = !has_children(t, cell_id) + +# Check whether cell has specific child cell +has_child(t::ParallelTree, cell_id::Int, child::Int) = t.child_ids[child, cell_id] > 0 + +# Check if cell has a neighbor at the same refinement level in the given direction +has_neighbor(t::ParallelTree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0 + +# Check if cell has a coarse neighbor, i.e., with one refinement level lower +function has_coarse_neighbor(t::ParallelTree, cell_id::Int, direction::Int) + return has_parent(t, cell_id) && has_neighbor(t, t.parent_ids[cell_id], direction) +end + +# Check if cell has any neighbor (same-level or lower-level) +function has_any_neighbor(t::ParallelTree, cell_id::Int, direction::Int) + return has_neighbor(t, cell_id, direction) || has_coarse_neighbor(t, cell_id, direction) +end + +# Return cell length for a given level +length_at_level(t::ParallelTree, level::Int) = t.length_level_0 / 2^level + +# Return cell length for a given cell +length_at_cell(t::ParallelTree, cell_id::Int) = length_at_level(t, t.levels[cell_id]) + +# Return minimum level of any leaf cell +minimum_level(t::ParallelTree) = minimum(t.levels[leaf_cells(t)]) + +# Return maximum level of any leaf cell +maximum_level(t::ParallelTree) = maximum(t.levels[leaf_cells(t)]) + +# Check if tree is periodic +isperiodic(t::ParallelTree) = all(t.periodicity) +isperiodic(t::ParallelTree, dimension) = t.periodicity[dimension] + + +# Auxiliary methods for often-required calculations +# Number of potential child cells +n_children_per_cell(::ParallelTree{NDIMS}) where NDIMS = 2^NDIMS +n_children_per_cell(dims::Integer) = 2^dims + +# Number of directions +# +# Directions are indicated by numbers from 1 to 2*ndims: +# 1 -> -x +# 2 -> +x +# 3 -> -y +# 4 -> +y +# 5 -> -z +# 6 -> +z +n_directions(::ParallelTree{NDIMS}) where NDIMS = 2 * NDIMS + +# For a given direction, return its opposite direction +# +# dir -> opp +# 1 -> 2 +# 2 -> 1 +# 3 -> 4 +# 4 -> 3 +# 5 -> 6 +# 6 -> 5 +opposite_direction(direction::Int) = direction + 1 - 2 * ((direction + 1) % 2) + +# For a given child position (from 1 to 8) and dimension (from 1 to 3), +# calculate a child cell's position relative to its parent cell. +# +# Essentially calculates the following +# dim=1 dim=2 dim=3 +# child x y z +# 1 - - - +# 2 + - - +# 3 - + - +# 4 + + - +# 5 - - + +# 6 + - + +# 7 - + + +# 8 + + + +child_sign(child::Int, dim::Int) = 1 - 2 * (div(child + 2^(dim - 1) - 1, 2^(dim-1)) % 2) + + +# For each child position (1 to 8) and a given direction (from 1 to 6), return +# neighboring child position. +adjacent_child(child::Int, direction::Int) = [2 2 3 3 5 5; + 1 1 4 4 6 6; + 4 4 1 1 7 7; + 3 3 2 2 8 8; + 6 6 7 7 1 1; + 5 5 8 8 2 2; + 8 8 5 5 3 3; + 7 7 6 6 4 4][child, direction] + + +# For each child position (1 to 8) and a given direction (from 1 to 6), return +# if neighbor is a sibling +function has_sibling(child::Int, direction::Int) + return (child_sign(child, div(direction + 1, 2)) * (-1)^(direction - 1)) > 0 +end + + +# Obtain leaf cells that fulfill a given criterion. +# +# The function `f` is passed the cell id of each leaf cell +# as an argument. +function filter_leaf_cells(f, t::ParallelTree) + filtered = Vector{Int}(undef, length(t)) + count = 0 + for cell_id in 1:length(t) + if is_leaf(t, cell_id) && f(cell_id) + count += 1 + filtered[count] = cell_id + end + end + + return filtered[1:count] +end + + +# Return an array with the ids of all leaf cells +leaf_cells(t::ParallelTree) = filter_leaf_cells((cell_id)->true, t) + + +# Count the number of leaf cells. +count_leaf_cells(t::ParallelTree) = length(leaf_cells(t)) + + +# Store cell id in each cell to use for post-AMR analysis +function reset_original_cell_ids!(t::ParallelTree) + t.original_cell_ids[1:length(t)] .= 1:length(t) +end + + +# Refine entire tree by one level +refine!(t::ParallelTree) = refine!(t, leaf_cells(t)) + + +# Refine given cells and rebalance tree. +# +# Note 1: Rebalancing is iterative, i.e., neighboring cells are refined if +# otherwise the 2:1 rule would be violated, which can cause more +# refinements. +# Note 2: Rebalancing currently only considers *Cartesian* neighbors, not diagonal neighbors! +function refine!(t::ParallelTree, cell_ids) + # Reset original cell ids such that each cell knows its current id + reset_original_cell_ids!(t) + + # Refine all requested cells + refined = refine_unbalanced!(t, cell_ids) + refinement_count = length(refined) + + # Iteratively rebalance the tree until it does not change anymore + while length(refined) > 0 + refined = rebalance!(t, refined) + refinement_count += length(refined) + end + + # Determine list of *original* cell ids that were refined + # Note: original_cell_ids contains the cell_id *before* refinement. At + # refinement, the refined cell's original_cell_ids value has its sign flipped + # to easily find it now. + @views refined_original_cells = ( + -t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0]) + + # Check if count of refinement cells matches information in original_cell_ids + @assert refinement_count == length(refined_original_cells) ( + "Mismatch in number of refined cells") + + return refined_original_cells +end + + +# Refine all leaf cells with coordinates in a given rectangular box +function refine_box!(t::ParallelTree{NDIMS}, coordinates_min::AbstractArray{Float64}, + coordinates_max::AbstractArray{Float64}) where NDIMS + for dim in 1:NDIMS + @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." + end + + # Find all leaf cells within box + cells = filter_leaf_cells(t) do cell_id + return (all(coordinates_min .< t.coordinates[:, cell_id]) && + all(coordinates_max .> t.coordinates[:, cell_id])) + end + + # Refine cells + refine!(t, cells) +end + +# Convenience method for 1D +function refine_box!(t::ParallelTree{1}, coordinates_min::Real, coordinates_max::Real) + return refine_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) +end + + +# For the given cell ids, check if neighbors need to be refined to restore a rebalanced tree. +# +# Note 1: Rebalancing currently only considers *Cartesian* neighbors, not diagonal neighbors! +# Note 2: The current algorithm assumes that a previous refinement step has +# created level differences of at most 2. That is, before the previous +# refinement step, the tree was balanced. +function rebalance!(t::ParallelTree, refined_cell_ids) + # Create buffer for newly refined cells + to_refine = zeros(Int, n_directions(t) * length(refined_cell_ids)) + count = 0 + + # Iterate over cell ids that have previously been refined + for cell_id in refined_cell_ids + # Go over all potential neighbors of child cell + for direction in 1:n_directions(t) + # Continue if refined cell has a neighbor in that direction + if has_neighbor(t, cell_id, direction) + continue + end + + # Continue if refined cell has no coarse neighbor, since that would + # mean it there is no neighbor in that direction at all (domain + # boundary) + if !has_coarse_neighbor(t, cell_id, direction) + continue + end + + # Otherwise, the coarse neighbor exists and is not refined, thus it must + # be marked for refinement + coarse_neighbor_id = t.neighbor_ids[direction, t.parent_ids[cell_id]] + count += 1 + to_refine[count] = coarse_neighbor_id + end + end + + # Finally, refine all marked cells... + refined = refine_unbalanced!(t, unique(to_refine[1:count])) + + # ...and return list of refined cells + return refined +end + + +# Refine given cells without rebalancing tree. +# +# Note: After a call to this method the tree may be unbalanced! +function refine_unbalanced!(t::ParallelTree, cell_ids) + # Store actual ids refined cells (shifted due to previous insertions) + refined = zeros(Int, length(cell_ids)) + + # Loop over all cells that are to be refined + for (count, original_cell_id) in enumerate(sort(unique(cell_ids))) + # Determine actual cell id, taking into account previously inserted cells + n_children = n_children_per_cell(t) + cell_id = original_cell_id + (count - 1) * n_children + refined[count] = cell_id + + @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined" + + # Insert new cells directly behind parent (depth-first) + insert!(t, cell_id + 1, n_children) + + # Flip sign of refined cell such that we can easily find it later + t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id] + + # Initialize child cells + for child in 1:n_children + # Set child information based on parent + child_id = cell_id + child + t.parent_ids[child_id] = cell_id + t.child_ids[child, cell_id] = child_id + t.neighbor_ids[:, child_id] .= 0 + t.child_ids[:, child_id] .= 0 + t.levels[child_id] = t.levels[cell_id] + 1 + t.coordinates[:, child_id] .= child_coordinates( + t, t.coordinates[:, cell_id], length_at_cell(t, cell_id), child) + t.original_cell_ids[child_id] = 0 + + # For determining neighbors, use neighbor connections of parent cell + for direction in 1:n_directions(t) + # If neighbor is a sibling, establish one-sided connectivity + # Note: two-sided is not necessary, as each sibling will do this + if has_sibling(child, direction) + adjacent = adjacent_child(child, direction) + neighbor_id = cell_id + adjacent + + t.neighbor_ids[direction, child_id] = neighbor_id + continue + end + + # Skip if original cell does have no neighbor in direction + if !has_neighbor(t, cell_id, direction) + continue + end + + # Otherwise, check if neighbor has children - if not, skip again + neighbor_id = t.neighbor_ids[direction, cell_id] + if !has_children(t, neighbor_id) + continue + end + + # Check if neighbor has corresponding child and if yes, establish connectivity + adjacent = adjacent_child(child, direction) + if has_child(t, neighbor_id, adjacent) + neighbor_child_id = t.child_ids[adjacent, neighbor_id] + opposite = opposite_direction(direction) + + t.neighbor_ids[direction, child_id] = neighbor_child_id + t.neighbor_ids[opposite, neighbor_child_id] = child_id + end + end + end + end + + return refined +end + +# Wrap single-cell refinements such that `sort(...)` does not complain +refine_unbalanced!(t::ParallelTree, cell_id::Int) = refine_unbalanced!(t, [cell_id]) + + +# Coarsen entire tree by one level +function coarsen!(t::ParallelTree) + # Special case: if there is only one cell (root), there is nothing to do + if length(t) == 1 + return Int[] + end + + # Get list of unique parent ids for all leaf cells + parent_ids = unique(t.parent_ids[leaf_cells(t)]) + coarsen!(t, parent_ids) +end + + +# Coarsen given *parent* cells (= these cells must have children who are all +# leaf cells) while retaining a balanced tree. +# +# A cell to be coarsened might cause an unbalanced tree if the neighboring cell +# was already refined. Since it is generally not desired that cells are +# coarsened without specifically asking for it, these cells will then *not* be +# coarsened. +function coarsen!(t::ParallelTree, cell_ids::AbstractArray{Int}) + # Return early if array is empty + if length(cell_ids) == 0 + return Int[] + end + + # Reset original cell ids such that each cell knows its current id + reset_original_cell_ids!(t) + + # To maximize the number of cells that may be coarsened, start with the cells at the highest level + sorted_by_level = sort(cell_ids, by = i -> t.levels[i]) + + # Keep track of number of cells that were actually coarsened + n_coarsened = 0 + + # Local function to adjust cell ids after some cells have been removed + function adjust_cell_ids!(cell_ids, coarsened_cell_id, count) + for (id, cell_id) in enumerate(cell_ids) + if cell_id > coarsened_cell_id + cell_ids[id] = cell_id - count + end + end + end + + # Iterate backwards over cells to coarsen + while true + # Retrieve next cell or quit + if length(sorted_by_level) > 0 + coarse_cell_id = pop!(sorted_by_level) + else + break + end + + # Ensure that cell has children (violation is an error) + if !has_children(t, coarse_cell_id) + error("cell is leaf and cannot be coarsened to: $coarse_cell_id") + end + + # Ensure that all child cells are leaf cells (violation is an error) + for child in 1:n_children_per_cell(t) + if has_child(t, coarse_cell_id, child) + if !is_leaf(t, t.child_ids[child, coarse_cell_id]) + error("cell $coarse_cell_id has child cell at position $child that is not a leaf cell") + end + end + end + + # Check if coarse cell has refined neighbors that would prevent coarsening + skip = false + # Iterate over all children (which are to be removed) + for child in 1:n_children_per_cell(t) + # Continue if child does not exist + if !has_child(t, coarse_cell_id, child) + continue + end + child_id = t.child_ids[child, coarse_cell_id] + + # Go over all neighbors of child cell. If it has a neighbor that is *not* + # a sibling and that is not a leaf cell, we cannot coarsen its parent + # without creating an unbalanced tree. + for direction in 1:n_directions(t) + # Continue if neighbor would be a sibling + if has_sibling(child, direction) + continue + end + + # Continue if child cell has no neighbor in that direction + if !has_neighbor(t, child_id, direction) + continue + end + neighbor_id = t.neighbor_ids[direction, child_id] + + if !has_children(t, neighbor_id) + continue + end + + # If neighbor is not a sibling, is existing, and has children, do not coarsen + skip = true + break + end + end + # Skip if a neighboring cell prevents coarsening + if skip + continue + end + + # Flip sign of cell to be coarsened to such that we can easily find it + t.original_cell_ids[coarse_cell_id] = -t.original_cell_ids[coarse_cell_id] + + # If a coarse cell has children that are all leaf cells, they must follow + # immediately due to depth-first ordering of the tree + count = n_children(t, coarse_cell_id) + @assert count == n_children_per_cell(t) "cell $coarse_cell_id does not have all child cells" + remove_shift!(t, coarse_cell_id + 1, coarse_cell_id + count) + + # Take into account shifts in tree that alters cell ids + adjust_cell_ids!(sorted_by_level, coarse_cell_id, count) + + # Keep track of number of coarsened cells + n_coarsened += 1 + end + + # Determine list of *original* cell ids that were coarsened to + # Note: original_cell_ids contains the cell_id *before* coarsening. At + # coarsening, the coarsened parent cell's original_cell_ids value has its sign flipped + # to easily find it now. + @views coarsened_original_cells = ( + -t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0]) + + # Check if count of coarsened cells matches information in original_cell_ids + @assert n_coarsened == length(coarsened_original_cells) ( + "Mismatch in number of coarsened cells") + + return coarsened_original_cells +end + +# Wrap single-cell coarsening such that `sort(...)` does not complain +coarsen!(t::ParallelTree, cell_id::Int) = coarsen!(t::ParallelTree, [cell_id]) + + +# Coarsen all viable parent cells with coordinates in a given rectangular box +function coarsen_box!(t::ParallelTree{NDIMS}, coordinates_min::AbstractArray{Float64}, + coordinates_max::AbstractArray{Float64}) where NDIMS + for dim in 1:NDIMS + @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." + end + + # Find all leaf cells within box + leaves = filter_leaf_cells(t) do cell_id + return (all(coordinates_min .< t.coordinates[:, cell_id]) && + all(coordinates_max .> t.coordinates[:, cell_id])) + end + + # Get list of unique parent ids for all leaf cells + parent_ids = unique(t.parent_ids[leaves]) + + # Filter parent ids to be within box + parents = filter(parent_ids) do cell_id + return (all(coordinates_min .< t.coordinates[:, cell_id]) && + all(coordinates_max .> t.coordinates[:, cell_id])) + end + + # Coarsen cells + coarsen!(t, parents) +end + +# Convenience method for 1D +function coarsen_box!(t::ParallelTree{1}, coordinates_min::Real, coordinates_max::Real) + return coarsen_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) +end + + +# Return coordinates of a child cell based on its relative position to the parent. +function child_coordinates(::ParallelTree{NDIMS}, parent_coordinates, parent_length::Number, child::Int) where NDIMS + # Calculate length of child cells and set up data structure + child_length = parent_length / 2 + coordinates = MVector{NDIMS, Float64}(undef) + + # For each dimension, calculate coordinate as parent coordinate + relative position x length/2 + for d in 1:NDIMS + coordinates[d] = parent_coordinates[d] + child_sign(child, d) * child_length / 2 + end + + return coordinates +end + + +# Reset range of cells to values that are prone to cause errors as soon as they are used. +# +# Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible. +function invalidate!(t::ParallelTree, first::Int, last::Int) + @assert first > 0 + @assert last <= t.capacity + 1 + + # Integer values are set to smallest negative value, floating point values to NaN + t.parent_ids[first:last] .= typemin(Int) + t.child_ids[:, first:last] .= typemin(Int) + t.neighbor_ids[:, first:last] .= typemin(Int) + t.levels[first:last] .= typemin(Int) + t.coordinates[:, first:last] .= NaN + t.original_cell_ids[first:last] .= typemin(Int) + + return nothing +end +invalidate!(t::ParallelTree, id::Int) = invalidate!(t, id, id) +invalidate!(t::ParallelTree) = invalidate!(t, 1, length(t)) + + +# Delete connectivity with parents/children/neighbors before cells are erased +function delete_connectivity!(t::ParallelTree, first::Int, last::Int) + @assert first > 0 + @assert first <= last + @assert last <= t.capacity + 1 + + # Iterate over all cells + for cell_id in first:last + # Delete connectivity from parent cell + if has_parent(t, cell_id) + parent_id = t.parent_ids[cell_id] + for child in 1:n_children_per_cell(t) + if t.child_ids[child, parent_id] == cell_id + t.child_ids[child, parent_id] = 0 + break + end + end + end + + # Delete connectivity from child cells + for child in 1:n_children_per_cell(t) + if has_child(t, cell_id, child) + t.parent_ids[t._child_ids[child, cell_id]] = 0 + end + end + + # Delete connectivity from neighboring cells + for direction in 1:n_directions(t) + if has_neighbor(t, cell_id, direction) + t.neighbor_ids[opposite_direction(direction), t.neighbor_ids[direction, cell_id]] = 0 + end + end + end +end + + +# Move connectivity with parents/children/neighbors after cells have been moved +function move_connectivity!(t::ParallelTree, first::Int, last::Int, destination::Int) + @assert first > 0 + @assert first <= last + @assert last <= t.capacity + 1 + @assert destination > 0 + @assert destination <= t.capacity + 1 + + # Strategy + # 1) Loop over moved cells (at target location) + # 2) Check if parent/children/neighbors connections are to a cell that was moved + # a) if cell was moved: apply offset to current cell + # b) if cell was not moved: go to connected cell and update connectivity there + + offset = destination - first + has_moved(n) = (first <= n <= last) + + for source in first:last + target = source + offset + + # Update parent + if has_parent(t, target) + # Get parent cell + parent_id = t.parent_ids[target] + if has_moved(parent_id) + # If parent itself was moved, just update parent id accordingly + t.parent_ids[target] += offset + else + # If parent was not moved, update its corresponding child id + for child in 1:n_children_per_cell(t) + if t.child_ids[child, parent_id] == source + t.child_ids[child, parent_id] = target + end + end + end + end + + # Update children + for child in 1:n_children_per_cell(t) + if has_child(t, target, child) + # Get child cell + child_id = t.child_ids[child, target] + if has_moved(child_id) + # If child itself was moved, just update child id accordingly + t.child_ids[child, target] += offset + else + # If child was not moved, update its parent id + t.parent_ids[child_id] = target + end + end + end + + # Update neighbors + for direction in 1:n_directions(t) + if has_neighbor(t, target, direction) + # Get neighbor cell + neighbor_id = t.neighbor_ids[direction, target] + if has_moved(neighbor_id) + # If neighbor itself was moved, just update neighbor id accordingly + t.neighbor_ids[direction, target] += offset + else + # If neighbor was not moved, update its opposing neighbor id + t.neighbor_ids[opposite_direction(direction), neighbor_id] = target + end + end + end + end +end + + +# Raw copy operation for ranges of cells. +# +# This method is used by the higher-level copy operations for AbstractContainer +function raw_copy!(target::ParallelTree, source::ParallelTree, first::Int, last::Int, destination::Int) + copy_data!(target.parent_ids, source.parent_ids, first, last, destination) + copy_data!(target.child_ids, source.child_ids, first, last, destination, + n_children_per_cell(target)) + copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, + destination, n_directions(target)) + copy_data!(target.levels, source.levels, first, last, destination) + copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) + copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) +end +function raw_copy!(c::AbstractContainer, first::Int, last::Int, destination::Int) + raw_copy!(c, c, first, last, destination) +end +function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int) + raw_copy!(target, source, from, from, destination) +end +function raw_copy!(c::AbstractContainer, from::Int, destination::Int) + raw_copy!(c, c, from, from, destination) +end + + +# Reset data structures by recreating all internal storage containers and invalidating all elements +function reset_data_structures!(t::ParallelTree{NDIMS}) where NDIMS + t.parent_ids = Vector{Int}(undef, t.capacity + 1) + t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) + t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1) + t.levels = Vector{Int}(undef, t.capacity + 1) + t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) + t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) + + invalidate!(t, 1, capacity(t) + 1) +end From 8f81b0b5c5f97a9e33dc53a827deb5ab5e6bab60 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 4 Sep 2020 11:57:52 +0200 Subject: [PATCH 03/81] Add "domain_ids" to parallel mesh and comment out duplicate methods --- src/auxiliary/containers.jl | 12 ++++++++++ src/mesh/parallel_tree.jl | 45 ++++++++++++++++++------------------- src/mesh/tree.jl | 9 -------- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index d513040f1a0..4d6befe9635 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -307,3 +307,15 @@ function clear!(c::AbstractContainer) return c end + + +# Helpful overloads for `raw_copy` +function raw_copy!(c::AbstractContainer, first::Int, last::Int, destination::Int) + raw_copy!(c, c, first, last, destination) +end +function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int) + raw_copy!(target, source, from, from, destination) +end +function raw_copy!(c::AbstractContainer, from::Int, destination::Int) + raw_copy!(c, c, from, from, destination) +end diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl index 81ad1861597..42584108007 100644 --- a/src/mesh/parallel_tree.jl +++ b/src/mesh/parallel_tree.jl @@ -26,6 +26,7 @@ mutable struct ParallelTree{NDIMS} <: AbstractContainer levels::Vector{Int} coordinates::Matrix{Float64} original_cell_ids::Vector{Int} + domain_ids::Vector{Int} capacity::Int length::Int @@ -50,6 +51,7 @@ mutable struct ParallelTree{NDIMS} <: AbstractContainer t.levels = fill(typemin(Int), capacity + 1) t.coordinates = fill(NaN, NDIMS, capacity + 1) t.original_cell_ids = fill(typemin(Int), capacity + 1) + t.domain_ids = fill(typemin(Int), capacity + 1) t.capacity = capacity t.length = 0 @@ -97,6 +99,7 @@ function init!(t::ParallelTree, center::AbstractArray{Float64}, length::Real, pe t.levels[1] = 0 t.coordinates[:, 1] .= t.center_level_0 t.original_cell_ids[1] = 0 + t.domain_ids[1] = 0 # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor if all(periodicity) @@ -134,6 +137,7 @@ function Base.show(io::IO, t::ParallelTree{NDIMS}) where NDIMS println(io, "t.levels[1:l] = $(t.levels[1:l])") println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") + println(io, "t.domain_ids[1:l] = $(t.domain_ids[1:l])") println(io, "t.capacity = $(t.capacity)") println(io, "t.length = $(t.length)") println(io, "t.dummy = $(t.dummy)") @@ -196,7 +200,7 @@ isperiodic(t::ParallelTree, dimension) = t.periodicity[dimension] # Auxiliary methods for often-required calculations # Number of potential child cells n_children_per_cell(::ParallelTree{NDIMS}) where NDIMS = 2^NDIMS -n_children_per_cell(dims::Integer) = 2^dims +# n_children_per_cell(dims::Integer) = 2^dims # Number of directions # @@ -218,7 +222,7 @@ n_directions(::ParallelTree{NDIMS}) where NDIMS = 2 * NDIMS # 4 -> 3 # 5 -> 6 # 6 -> 5 -opposite_direction(direction::Int) = direction + 1 - 2 * ((direction + 1) % 2) +# opposite_direction(direction::Int) = direction + 1 - 2 * ((direction + 1) % 2) # For a given child position (from 1 to 8) and dimension (from 1 to 3), # calculate a child cell's position relative to its parent cell. @@ -234,26 +238,26 @@ opposite_direction(direction::Int) = direction + 1 - 2 * ((direction + 1) % 2) # 6 + - + # 7 - + + # 8 + + + -child_sign(child::Int, dim::Int) = 1 - 2 * (div(child + 2^(dim - 1) - 1, 2^(dim-1)) % 2) +# child_sign(child::Int, dim::Int) = 1 - 2 * (div(child + 2^(dim - 1) - 1, 2^(dim-1)) % 2) # For each child position (1 to 8) and a given direction (from 1 to 6), return # neighboring child position. -adjacent_child(child::Int, direction::Int) = [2 2 3 3 5 5; - 1 1 4 4 6 6; - 4 4 1 1 7 7; - 3 3 2 2 8 8; - 6 6 7 7 1 1; - 5 5 8 8 2 2; - 8 8 5 5 3 3; - 7 7 6 6 4 4][child, direction] +# adjacent_child(child::Int, direction::Int) = [2 2 3 3 5 5; +# 1 1 4 4 6 6; +# 4 4 1 1 7 7; +# 3 3 2 2 8 8; +# 6 6 7 7 1 1; +# 5 5 8 8 2 2; +# 8 8 5 5 3 3; +# 7 7 6 6 4 4][child, direction] # For each child position (1 to 8) and a given direction (from 1 to 6), return # if neighbor is a sibling -function has_sibling(child::Int, direction::Int) - return (child_sign(child, div(direction + 1, 2)) * (-1)^(direction - 1)) > 0 -end +# function has_sibling(child::Int, direction::Int) +# return (child_sign(child, div(direction + 1, 2)) * (-1)^(direction - 1)) > 0 +# end # Obtain leaf cells that fulfill a given criterion. @@ -427,6 +431,7 @@ function refine_unbalanced!(t::ParallelTree, cell_ids) t.coordinates[:, child_id] .= child_coordinates( t, t.coordinates[:, cell_id], length_at_cell(t, cell_id), child) t.original_cell_ids[child_id] = 0 + t.domain_ids[child_id] = t.domain_ids[cell_id] # For determining neighbors, use neighbor connections of parent cell for direction in 1:n_directions(t) @@ -672,6 +677,7 @@ function invalidate!(t::ParallelTree, first::Int, last::Int) t.levels[first:last] .= typemin(Int) t.coordinates[:, first:last] .= NaN t.original_cell_ids[first:last] .= typemin(Int) + t.domain_ids[first:last] .= typemin(Int) return nothing end @@ -797,15 +803,7 @@ function raw_copy!(target::ParallelTree, source::ParallelTree, first::Int, last: copy_data!(target.levels, source.levels, first, last, destination) copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) -end -function raw_copy!(c::AbstractContainer, first::Int, last::Int, destination::Int) - raw_copy!(c, c, first, last, destination) -end -function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int) - raw_copy!(target, source, from, from, destination) -end -function raw_copy!(c::AbstractContainer, from::Int, destination::Int) - raw_copy!(c, c, from, from, destination) + copy_data!(target.domain_ids, source.domain_ids, first, last, destination) end @@ -817,6 +815,7 @@ function reset_data_structures!(t::ParallelTree{NDIMS}) where NDIMS t.levels = Vector{Int}(undef, t.capacity + 1) t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) + t.domain_ids = Vector{Int}(undef, t.capacity + 1) invalidate!(t, 1, capacity(t) + 1) end diff --git a/src/mesh/tree.jl b/src/mesh/tree.jl index 209ff05abd9..beb27bea3f7 100644 --- a/src/mesh/tree.jl +++ b/src/mesh/tree.jl @@ -798,15 +798,6 @@ function raw_copy!(target::Tree, source::Tree, first::Int, last::Int, destinatio copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) end -function raw_copy!(c::AbstractContainer, first::Int, last::Int, destination::Int) - raw_copy!(c, c, first, last, destination) -end -function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int) - raw_copy!(target, source, from, from, destination) -end -function raw_copy!(c::AbstractContainer, from::Int, destination::Int) - raw_copy!(c, c, from, from, destination) -end # Reset data structures by recreating all internal storage containers and invalidating all elements From 8b7c9b4f093da0eb723a09677ad69b4420db3b59 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 4 Sep 2020 12:22:22 +0200 Subject: [PATCH 04/81] Add some very basic parallelization methods --- src/Trixi.jl | 2 ++ src/parallel/parallel.jl | 14 ++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 src/parallel/parallel.jl diff --git a/src/Trixi.jl b/src/Trixi.jl index 5604aeee53f..fbfc5578f85 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -20,6 +20,7 @@ using Profile: clear_malloc_data using Random: seed! using HDF5: h5open, attrs +using MPI # We use all symbols, but for now we always prefix with `MPI.`, e.g., `MPI.Init()` using StaticArrays: @MVector, @SVector, MVector, MMatrix, MArray, SVector, SMatrix, SArray using TimerOutputs: @notimeit, @timeit, TimerOutput, print_timer, reset_timer! using UnPack: @unpack @@ -30,6 +31,7 @@ export globals # Include all top-level source files include("auxiliary/auxiliary.jl") +include("parallel/parallel.jl") include("equations/equations.jl") include("mesh/mesh.jl") include("solvers/solvers.jl") diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl new file mode 100644 index 00000000000..0d821b2b624 --- /dev/null +++ b/src/parallel/parallel.jl @@ -0,0 +1,14 @@ +domain_id(comm) = MPI.Comm_rank(comm) +domain_id() = MPI.Comm_rank(MPI.COMM_WORLD) + +n_domains(comm) = MPI.Comm_size(comm) +n_domains() = MPI.Comm_size(MPI.COMM_WORLD) + +is_parallel(comm) = n_domains(comm) > 1 +is_parallel() = is_parallel(MPI.COMM_WORLD) > 1 + +is_serial(comm) = !is_parallel(comm) +is_serial() = is_serial(MPI.COMM_WORLD) + +is_mpi_root(comm) = domain_id(comm) == 0 +is_mpi_root() = is_mpi_root(MPI.COMM_WORLD) From 3151f4b739a3173e68ec00068580eff163b90822 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 4 Sep 2020 12:22:41 +0200 Subject: [PATCH 05/81] Initialize MPI (if not yet done) at the beginning of `run` --- src/run.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/run.jl b/src/run.jl index 21c0934fcb2..5dfdaa02043 100644 --- a/src/run.jl +++ b/src/run.jl @@ -29,6 +29,11 @@ function run(parameters_file; verbose=false, refinement_level_increment=0, param # Reset timer reset_timer!(timer()) + # Initialize MPI + if !MPI.Initialized() + MPI.Init() + end + # Read command line or keyword arguments and parse parameters file init_parameters(parameters_file; verbose=verbose, refinement_level_increment=refinement_level_increment, parameters...) From 98c0339317cc14deb7b45ddf787337fd55ca45ec Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 4 Sep 2020 15:05:21 +0200 Subject: [PATCH 06/81] Create mesh with parallel tree if running in parallel --- src/mesh/mesh.jl | 42 +++++++++++++++++++++++++--------------- src/parallel/parallel.jl | 4 ++-- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index a75c8ae9473..19afcfa7eb8 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -4,32 +4,32 @@ include("parallel_tree.jl") # Composite type to hold the actual tree in addition to other mesh-related data # that is not strictly part of the tree. -mutable struct TreeMesh{D} - tree::Tree{D} +mutable struct TreeMesh{NDIMS, TreeType} + tree::TreeType current_filename::String unsaved_changes::Bool - function TreeMesh{D}(n_cells_max::Integer) where D - # Verify that D is an integer - @assert D isa Integer + function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer) where {NDIMS, TreeType} + # Verify that NDIMS is an integer + @assert NDIMS == ndims(TreeType) # Create mesh m = new() - m.tree = Tree{D}(n_cells_max) + m.tree = TreeType{NDIMS}(n_cells_max) m.current_filename = "" m.unsaved_changes = false return m end - function TreeMesh{D}(n_cells_max::Integer, domain_center::AbstractArray{Float64}, - domain_length, periodicity=true) where D - # Verify that D is an integer - @assert D isa Integer + function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer, domain_center::AbstractArray{Float64}, + domain_length, periodicity=true) where{NDIMS, TreeType} + # Verify that NDIMS matches the tree + @assert NDIMS == ndims(TreeType) # Create mesh m = new() - m.tree = Tree{D}(n_cells_max, domain_center, domain_length, periodicity) + m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity) m.current_filename = "" m.unsaved_changes = false @@ -37,11 +37,15 @@ mutable struct TreeMesh{D} end end -# Constructor for passing the dimension as an argument -TreeMesh(::Val{D}, args...) where D = TreeMesh{D}(args...) +# Constructor for passing the dimension and mesh type as an argument +function TreeMesh(::Val{NDIMS}, ::Val{TreeType}, args...) where {NDIMS, TreeType} + return TreeMesh{NDIMS, TreeType}(args...) +end # Constructor accepting a single number as center (as opposed to an array) for 1D -TreeMesh{1}(n::Int, center::Real, len::Real, periodicity=true) = TreeMesh{1}(n, [convert(Float64, center)], len, periodicity) +function TreeMesh{1, TreeType}(n::Int, center::Real, len::Real, periodicity=true) where TreeType + return TreeMesh{1, TreeType}(n, [convert(Float64, center)], len, periodicity) +end @inline Base.ndims(mesh::TreeMesh) = ndims(mesh.tree) @@ -67,8 +71,14 @@ function generate_mesh() periodicity = parameter("periodicity", true) # Create mesh - @timeit timer() "creation" mesh = TreeMesh(Val{ndims_}(), n_cells_max, domain_center, - domain_length, periodicity) + if is_parallel() + @timeit timer() "creation" mesh = TreeMesh(Val{ndims_}(), Val{ParallelTree{ndims_}}(), + n_cells_max, + domain_center, domain_length, periodicity) + else + @timeit timer() "creation" mesh = TreeMesh(Val{ndims_}(), Val{Tree{ndims_}}(), n_cells_max, + domain_center, domain_length, periodicity) + end # Create initial refinement initial_refinement_level = parameter("initial_refinement_level") diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index 0d821b2b624..2d9def63237 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -5,10 +5,10 @@ n_domains(comm) = MPI.Comm_size(comm) n_domains() = MPI.Comm_size(MPI.COMM_WORLD) is_parallel(comm) = n_domains(comm) > 1 -is_parallel() = is_parallel(MPI.COMM_WORLD) > 1 +is_parallel() = is_parallel(MPI.COMM_WORLD) is_serial(comm) = !is_parallel(comm) is_serial() = is_serial(MPI.COMM_WORLD) -is_mpi_root(comm) = domain_id(comm) == 0 +is_mpi_root(comm) = is_serial() || domain_id(comm) == 0 is_mpi_root() = is_mpi_root(MPI.COMM_WORLD) From 177c55950b454b3cff9a7c7148145c7345dd441f Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 4 Sep 2020 15:05:48 +0200 Subject: [PATCH 07/81] Serialize startup message --- src/run.jl | 5 ++++- src/run_euler_gravity.jl | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/run.jl b/src/run.jl index 5dfdaa02043..9ad2f499cb2 100644 --- a/src/run.jl +++ b/src/run.jl @@ -75,7 +75,9 @@ end function init_simulation() # Print starup message - print_startup_message() + if is_mpi_root() + print_startup_message() + end # Get number of dimensions ndims_ = parameter("ndims")::Int @@ -88,6 +90,7 @@ function init_simulation() # Initialize mesh if restart + if_parallel() && error("restarting not yet implemented in parallel") # TODO print("Loading mesh... ") @timeit timer() "mesh loading" mesh = load_mesh(restart_filename) println("done") diff --git a/src/run_euler_gravity.jl b/src/run_euler_gravity.jl index aad2c3cd226..d4b84fa2f88 100644 --- a/src/run_euler_gravity.jl +++ b/src/run_euler_gravity.jl @@ -1,5 +1,10 @@ function init_simulation_euler_gravity() - # Print starup message + # TODO: Coupled simulations are not yet tested for parallel runs + if is_parallel() + error("coupled simulations are not yet tested for parallel runs") + end + + # Print startup message print_startup_message() # Get number of dimensions From f82d34dffac5955c245e60ba900e4fa124f6ca83 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 4 Sep 2020 15:43:03 +0200 Subject: [PATCH 08/81] Read in parameters in parallel --- src/Trixi.jl | 2 +- src/auxiliary/auxiliary.jl | 18 +++++++++++++++++- src/parallel/parallel.jl | 24 ++++++++++++++---------- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index fbfc5578f85..a45a8f80298 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -14,7 +14,7 @@ module Trixi # Include other packages that are used in Trixi # (standard library packages first, other packages next, all of them sorted alphabetically) -using Pkg.TOML: parsefile +using Pkg.TOML: parsefile, parse using Printf: @printf, @sprintf, println using Profile: clear_malloc_data using Random: seed! diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 1d7108936ab..dc37b65bddf 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -14,7 +14,23 @@ const parameters = Dict{Symbol,Any}() # Parse parameters file into global dict function parse_parameters_file(filename) - parameters[:default] = parsefile(filename) + if is_parallel() + # If parallel, read in file on root domain and distribute to other domains + if is_mpi_root() + buffer = read(filename) + buffer_length = Int[length(buffer)] + MPI.Bcast!(buffer_length, mpi_root(), mpi_comm()) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + else + buffer_length = Int[0] + MPI.Bcast!(buffer_length, mpi_root(), mpi_comm()) + buffer = Vector{UInt8}(undef, buffer_length[1]) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + end + parameters[:default] = parse(String(buffer)) + else + parameters[:default] = parsefile(filename) + end parameters[:default]["parameters_file"] = filename end diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index 2d9def63237..40510a428a9 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -1,14 +1,18 @@ -domain_id(comm) = MPI.Comm_rank(comm) -domain_id() = MPI.Comm_rank(MPI.COMM_WORLD) +@inline mpi_comm() = MPI.COMM_WORLD -n_domains(comm) = MPI.Comm_size(comm) -n_domains() = MPI.Comm_size(MPI.COMM_WORLD) +@inline domain_id(comm) = MPI.Comm_rank(comm) +@inline domain_id() = MPI.Comm_rank(mpi_comm()) -is_parallel(comm) = n_domains(comm) > 1 -is_parallel() = is_parallel(MPI.COMM_WORLD) +@inline n_domains(comm) = MPI.Comm_size(comm) +@inline n_domains() = MPI.Comm_size(mpi_comm()) -is_serial(comm) = !is_parallel(comm) -is_serial() = is_serial(MPI.COMM_WORLD) +@inline is_parallel(comm) = n_domains(comm) > 1 +@inline is_parallel() = is_parallel(mpi_comm()) -is_mpi_root(comm) = is_serial() || domain_id(comm) == 0 -is_mpi_root() = is_mpi_root(MPI.COMM_WORLD) +@inline is_serial(comm) = !is_parallel(comm) +@inline is_serial() = is_serial(mpi_comm()) + +@inline is_mpi_root(comm) = is_serial() || domain_id(comm) == 0 +@inline is_mpi_root() = is_mpi_root(mpi_comm()) + +@inline mpi_root() = 0 From 4bb691407586b140172a36ff567ac3e25d32a2d4 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 4 Sep 2020 20:39:02 +0200 Subject: [PATCH 09/81] Safe-guard non-parallelized code paths --- src/run.jl | 2 +- src/run_euler_gravity.jl | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/run.jl b/src/run.jl index 9ad2f499cb2..e389501c583 100644 --- a/src/run.jl +++ b/src/run.jl @@ -90,7 +90,7 @@ function init_simulation() # Initialize mesh if restart - if_parallel() && error("restarting not yet implemented in parallel") # TODO + is_parallel() && error("restarting not yet implemented in parallel") # TODO parallel print("Loading mesh... ") @timeit timer() "mesh loading" mesh = load_mesh(restart_filename) println("done") diff --git a/src/run_euler_gravity.jl b/src/run_euler_gravity.jl index d4b84fa2f88..11aae83f16c 100644 --- a/src/run_euler_gravity.jl +++ b/src/run_euler_gravity.jl @@ -1,8 +1,5 @@ function init_simulation_euler_gravity() - # TODO: Coupled simulations are not yet tested for parallel runs - if is_parallel() - error("coupled simulations are not yet tested for parallel runs") - end + is_parallel() && error("coupled simulations are not yet tested for parallel runs") # TODO parallel # Print startup message print_startup_message() @@ -210,6 +207,8 @@ end function run_simulation_euler_gravity(mesh, solvers, time_parameters, time_integration_function) + is_parallel() && error("coupled simulations are not yet tested for parallel runs") # TODO parallel + @unpack time, step, t_end, cfl, n_steps_max, save_final_solution, save_final_restart, analysis_interval, alive_interval, From 6d6eba7b7b7675ad67740b927aad727103abcf6a Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 5 Sep 2020 06:22:14 +0200 Subject: [PATCH 10/81] Partition mesh statically by leaf cell count --- src/mesh/mesh.jl | 49 +++++++++++++++++++++++++++++++++++++++ src/mesh/parallel_tree.jl | 2 +- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index 19afcfa7eb8..ebcb62712a3 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -8,6 +8,8 @@ mutable struct TreeMesh{NDIMS, TreeType} tree::TreeType current_filename::String unsaved_changes::Bool + first_cell_by_domain::Vector{Int} + n_cells_by_domain::Vector{Int} function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer) where {NDIMS, TreeType} # Verify that NDIMS is an integer @@ -18,6 +20,8 @@ mutable struct TreeMesh{NDIMS, TreeType} m.tree = TreeType{NDIMS}(n_cells_max) m.current_filename = "" m.unsaved_changes = false + m.first_cell_by_domain = Int[] + m.n_cells_by_domain = Int[] return m end @@ -32,6 +36,8 @@ mutable struct TreeMesh{NDIMS, TreeType} m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity) m.current_filename = "" m.unsaved_changes = false + m.first_cell_by_domain = Int[] + m.n_cells_by_domain = Int[] return m end @@ -86,8 +92,14 @@ function generate_mesh() refine!(mesh.tree) end + # Partition mesh + if is_parallel() + partition(mesh) + end + # Apply refinement patches @timeit timer() "refinement patches" for patch in parameter("refinement_patches", []) + is_parallel() && error("non-uniform meshes not supported in parallel") if patch["type"] == "box" refine_box!(mesh.tree, patch["coordinates_min"], patch["coordinates_max"]) else @@ -97,6 +109,7 @@ function generate_mesh() # Apply coarsening patches @timeit timer() "coarsening patches" for patch in parameter("coarsening_patches", []) + is_parallel() && error("non-uniform meshes not supported in parallel") if patch["type"] == "box" coarsen_box!(mesh.tree, patch["coordinates_min"], patch["coordinates_max"]) else @@ -161,3 +174,39 @@ function get_restart_mesh_filename(restart_filename) # Construct and return filename return joinpath(dirname, mesh_file) end + + +# Partition mesh using a static domain decomposition algorithm based on leaf cell count alone +# Return first cell id for each domain +function partition(mesh) + # Determine number of leaf cells per domain + leaves = leaf_cells(mesh.tree) + n_leaves_per_domain = fill(div(length(leaves), n_domains()), n_domains()) + for d in 1:rem(length(leaves), n_domains()) + n_leaves_per_domain[d] += 1 + end + @assert sum(n_leaves_per_domain) == length(leaves) + + # Assign domain ids to all cells such that all ancestors of each cell - if not yet assigned to a + # domain - belong to the same domain + mesh.first_cell_by_domain = similar(n_leaves_per_domain) + mesh.n_cells_by_domain = similar(n_leaves_per_domain) + + leaf_count = 0 + last_id = leaves[n_leaves_per_domain[1]] + mesh.first_cell_by_domain[1] = 1 + mesh.n_cells_by_domain[1] = last_id + mesh.tree.domain_ids[1:last_id] .= 0 + for d in 2:length(n_leaves_per_domain) + leaf_count += n_leaves_per_domain[d-1] + last_id = leaves[leaf_count + n_leaves_per_domain[d]] + mesh.first_cell_by_domain[d] = mesh.first_cell_by_domain[d-1] + mesh.n_cells_by_domain[d-1] + mesh.n_cells_by_domain[d] = last_id - mesh.first_cell_by_domain[d] + 1 + mesh.tree.domain_ids[mesh.first_cell_by_domain[d]:last_id] .= d-1 + end + + @assert all(x->x >= 0, mesh.tree.domain_ids[1:length(mesh.tree)]) + @assert sum(mesh.n_cells_by_domain) == length(mesh.tree) + + return nothing +end diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl index 42584108007..cab8a76f970 100644 --- a/src/mesh/parallel_tree.jl +++ b/src/mesh/parallel_tree.jl @@ -99,7 +99,7 @@ function init!(t::ParallelTree, center::AbstractArray{Float64}, length::Real, pe t.levels[1] = 0 t.coordinates[:, 1] .= t.center_level_0 t.original_cell_ids[1] = 0 - t.domain_ids[1] = 0 + t.domain_ids[1] = typemin(Int) # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor if all(periodicity) From 82751ba7f4d69c0b277dc0e287fc200a0d75b40f Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 5 Sep 2020 12:34:14 +0200 Subject: [PATCH 11/81] Use OffsetArrays to store data by domain id with 0-based indices --- Project.toml | 1 + src/Trixi.jl | 1 + src/mesh/mesh.jl | 27 ++++++++++++++------------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Project.toml b/Project.toml index 764b7a90ef4..cd43ae70c91 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ version = "0.2.2-pre" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" diff --git a/src/Trixi.jl b/src/Trixi.jl index a45a8f80298..9565d0079a0 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -21,6 +21,7 @@ using Random: seed! using HDF5: h5open, attrs using MPI # We use all symbols, but for now we always prefix with `MPI.`, e.g., `MPI.Init()` +using OffsetArrays: OffsetArray, OffsetVector using StaticArrays: @MVector, @SVector, MVector, MMatrix, MArray, SVector, SMatrix, SArray using TimerOutputs: @notimeit, @timeit, TimerOutput, print_timer, reset_timer! using UnPack: @unpack diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index ebcb62712a3..6f7e4ed8a38 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -8,8 +8,8 @@ mutable struct TreeMesh{NDIMS, TreeType} tree::TreeType current_filename::String unsaved_changes::Bool - first_cell_by_domain::Vector{Int} - n_cells_by_domain::Vector{Int} + first_cell_by_domain::OffsetVector{Int, Vector{Int}} + n_cells_by_domain::OffsetVector{Int, Vector{Int}} function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer) where {NDIMS, TreeType} # Verify that NDIMS is an integer @@ -20,8 +20,8 @@ mutable struct TreeMesh{NDIMS, TreeType} m.tree = TreeType{NDIMS}(n_cells_max) m.current_filename = "" m.unsaved_changes = false - m.first_cell_by_domain = Int[] - m.n_cells_by_domain = Int[] + m.first_cell_by_domain = OffsetVector(Int[], 0) + m.n_cells_by_domain = OffsetVector(Int[], 0) return m end @@ -36,8 +36,8 @@ mutable struct TreeMesh{NDIMS, TreeType} m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity) m.current_filename = "" m.unsaved_changes = false - m.first_cell_by_domain = Int[] - m.n_cells_by_domain = Int[] + m.first_cell_by_domain = OffsetVector(Int[], 0) + m.n_cells_by_domain = OffsetVector(Int[], 0) return m end @@ -181,8 +181,9 @@ end function partition(mesh) # Determine number of leaf cells per domain leaves = leaf_cells(mesh.tree) - n_leaves_per_domain = fill(div(length(leaves), n_domains()), n_domains()) - for d in 1:rem(length(leaves), n_domains()) + n_leaves_per_domain = OffsetArray(fill(div(length(leaves), n_domains()), n_domains()), + 0:(n_domains() - 1)) + for d in 0:(rem(length(leaves), n_domains()) - 1) n_leaves_per_domain[d] += 1 end @assert sum(n_leaves_per_domain) == length(leaves) @@ -193,16 +194,16 @@ function partition(mesh) mesh.n_cells_by_domain = similar(n_leaves_per_domain) leaf_count = 0 - last_id = leaves[n_leaves_per_domain[1]] - mesh.first_cell_by_domain[1] = 1 - mesh.n_cells_by_domain[1] = last_id + last_id = leaves[n_leaves_per_domain[0]] + mesh.first_cell_by_domain[0] = 1 + mesh.n_cells_by_domain[0] = last_id mesh.tree.domain_ids[1:last_id] .= 0 - for d in 2:length(n_leaves_per_domain) + for d in 1:(length(n_leaves_per_domain)-1) leaf_count += n_leaves_per_domain[d-1] last_id = leaves[leaf_count + n_leaves_per_domain[d]] mesh.first_cell_by_domain[d] = mesh.first_cell_by_domain[d-1] + mesh.n_cells_by_domain[d-1] mesh.n_cells_by_domain[d] = last_id - mesh.first_cell_by_domain[d] + 1 - mesh.tree.domain_ids[mesh.first_cell_by_domain[d]:last_id] .= d-1 + mesh.tree.domain_ids[mesh.first_cell_by_domain[d]:last_id] .= d end @assert all(x->x >= 0, mesh.tree.domain_ids[1:length(mesh.tree)]) From 05abed556219318d7ee51a7352cc098259a7cfb4 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 7 Sep 2020 09:27:45 +0200 Subject: [PATCH 12/81] Add initial setup for MPI exchange in Dg2D --- src/mesh/parallel_tree.jl | 12 ++ src/solvers/dg/2d/containers.jl | 28 ++++ src/solvers/dg/2d/dg.jl | 233 +++++++++++++++++++++++++++++++- 3 files changed, 268 insertions(+), 5 deletions(-) diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl index cab8a76f970..8f3a781c6ad 100644 --- a/src/mesh/parallel_tree.jl +++ b/src/mesh/parallel_tree.jl @@ -170,6 +170,9 @@ has_child(t::ParallelTree, cell_id::Int, child::Int) = t.child_ids[child, cell_i # Check if cell has a neighbor at the same refinement level in the given direction has_neighbor(t::ParallelTree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0 +# Check if cell is own cell, i.e., belongs to this MPI domain +is_own_cell(t::ParallelTree, cell_id) = t.domain_ids[cell_id] == domain_id() + # Check if cell has a coarse neighbor, i.e., with one refinement level lower function has_coarse_neighbor(t::ParallelTree, cell_id::Int, direction::Int) return has_parent(t, cell_id) && has_neighbor(t, t.parent_ids[cell_id], direction) @@ -282,6 +285,15 @@ end leaf_cells(t::ParallelTree) = filter_leaf_cells((cell_id)->true, t) +# Return an array with the ids of all leaf cells for a given domain +leaf_cells_by_domain(t::ParallelTree, domain_id) = filter_leaf_cells(t) do cell_id + t.domain_ids[cell_id] == domain_id + end + +# Return an array with the ids of all local leaf cells +local_leaf_cells(t::ParallelTree) = leaf_cells_by_domain(t, domain_id()) + + # Count the number of leaf cells. count_leaf_cells(t::ParallelTree) = length(leaf_cells(t)) diff --git a/src/solvers/dg/2d/containers.jl b/src/solvers/dg/2d/containers.jl index 75a87c8440e..95d83de3a61 100644 --- a/src/solvers/dg/2d/containers.jl +++ b/src/solvers/dg/2d/containers.jl @@ -63,6 +63,34 @@ end ninterfaces(interfaces::InterfaceContainer2D) = length(interfaces.orientations) +# Container data structure (structure-of-arrays style) for DG MPI interfaces +struct MpiInterfaceContainer2D{NVARS, POLYDEG} <: AbstractContainer + u::Array{Float64, 4} # [leftright, variables, i, interfaces] + local_element_ids::Vector{Int} # [interfaces] + orientations::Vector{Int} # [interfaces] + remote_sides::Vector{Int} # [interfaces] +end + + +function MpiInterfaceContainer2D{NVARS, POLYDEG}(capacity::Integer) where {NVARS, POLYDEG} + # Initialize fields with defaults + n_nodes = POLYDEG + 1 + u = fill(NaN, 2, NVARS, n_nodes, capacity) + local_element_ids = fill(typemin(Int), capacity) + orientations = fill(typemin(Int), capacity) + remote_sides = fill(typemin(Int), capacity) + + mpi_interfaces = MpiInterfaceContainer2D{NVARS, POLYDEG}(u, local_element_ids, orientations, + remote_sides) + + return mpi_interfaces +end + + +# Return number of interfaces +nmpiinterfaces(mpi_interfaces::MpiInterfaceContainer2D) = length(mpi_interfaces.orientations) + + # Container data structure (structure-of-arrays style) for DG boundaries struct BoundaryContainer2D{NVARS, POLYDEG} <: AbstractContainer u::Array{Float64, 4} # [leftright, variables, i, boundaries] diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 5daefba630c..d985eea8801 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -18,6 +18,9 @@ mutable struct Dg2D{Eqn<:AbstractEquation, NVARS, POLYDEG, interfaces::InterfaceContainer2D{NVARS, POLYDEG} n_interfaces::Int + mpi_interfaces::MpiInterfaceContainer2D{NVARS, POLYDEG} + n_mpi_interfaces::Int + boundaries::BoundaryContainer2D{NVARS, POLYDEG} n_boundaries::Int @@ -63,6 +66,13 @@ mutable struct Dg2D{Eqn<:AbstractEquation, NVARS, POLYDEG, amr_alpha_min::Float64 amr_alpha_smooth::Bool + mpi_neighbor_domain_ids::Vector{Int} + mpi_neighbor_interfaces::Vector{Vector{Int}} + mpi_send_buffers::Vector{Vector{Float64}} + mpi_recv_buffers::Vector{Vector{Float64}} + mpi_send_requests::Vector{MPI.Request} + mpi_recv_requests::Vector{MPI.Request} + element_variables::Dict{Symbol, Union{Vector{Float64}, Vector{Int}}} cache::Dict{Symbol, Any} thread_cache::Any # to make fully-typed output more readable @@ -73,8 +83,12 @@ end # Convenience constructor to create DG solver instance function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh{NDIMS}, POLYDEG) where {NDIMS, NVARS} - # Get cells for which an element needs to be created (i.e., all leaf cells) - leaf_cell_ids = leaf_cells(mesh.tree) + # Get local cells for which an element needs to be created (i.e., all leaf cells) + if is_parallel() + leaf_cell_ids = local_leaf_cells(mesh.tree) + else + leaf_cell_ids = leaf_cells(mesh.tree) + end # Initialize element container elements = init_elements(leaf_cell_ids, mesh, Val(NVARS), Val(POLYDEG)) @@ -84,6 +98,10 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v interfaces = init_interfaces(leaf_cell_ids, mesh, Val(NVARS), Val(POLYDEG), elements) n_interfaces = ninterfaces(interfaces) + # Initialize MPI interface container + mpi_interfaces = init_mpi_interfaces(leaf_cell_ids, mesh, Val(NVARS), Val(POLYDEG), elements) + n_mpi_interfaces = nmpiinterfaces(mpi_interfaces) + # Initialize boundaries boundaries = init_boundaries(leaf_cell_ids, mesh, Val(NVARS), Val(POLYDEG), elements) n_boundaries = nboundaries(boundaries) @@ -95,7 +113,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v n_ecmortars = nmortars(ecmortars) # Sanity checks - if isperiodic(mesh.tree) && n_l2mortars == 0 && n_ecmortars == 0 + if isperiodic(mesh.tree) && n_l2mortars == 0 && n_ecmortars == 0 && is_serial() @assert n_interfaces == 2*n_elements ("For 2D and periodic domains and conforming elements, " * "n_surf must be the same as 2*n_elem") end @@ -184,6 +202,24 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v amr_alpha_min = parameter("amr_alpha_min", 0.001) amr_alpha_smooth = parameter("amr_alpha_smooth", false) + # Set up MPI neighbor connectivity and communication data structures + if is_parallel() + (mpi_neighbor_domain_ids, + mpi_neighbor_interfaces) = init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh) + (mpi_send_buffers, + mpi_recv_buffers, + mpi_send_requests, + mpi_recv_requests) = init_mpi_data_structures(mpi_neighbor_interfaces, + Val(NDIMS), Val(NVARS), Val(POLYDEG)) + else + mpi_neighbor_domain_ids = Int[] + mpi_neighbor_interfaces = Vector{Int}[] + mpi_send_buffers = Vector{Float64}[] + mpi_recv_buffers = Vector{Float64}[] + mpi_send_requests = MPI.Request[] + mpi_recv_requests = MPI.Request[] + end + # Initialize element variables such that they are available in the first solution file if volume_integral_type === Val(:shock_capturing) element_variables[:blending_factor] = zeros(n_elements) @@ -203,6 +239,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v initial_conditions, source_terms, elements, n_elements, interfaces, n_interfaces, + mpi_interfaces, n_mpi_interfaces, boundaries, n_boundaries, mortar_type, l2mortars, n_l2mortars, @@ -219,6 +256,8 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v analysis_quantities, save_analysis, analysis_filename, shock_indicator_variable, shock_alpha_max, shock_alpha_min, shock_alpha_smooth, amr_indicator, amr_alpha_max, amr_alpha_min, amr_alpha_smooth, + mpi_neighbor_domain_ids, mpi_neighbor_interfaces, + mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests, element_variables, cache, thread_cache, initial_state_integrals) @@ -271,8 +310,44 @@ function count_required_interfaces(mesh::TreeMesh{2}, cell_ids) end # Skip if neighbor has children - neighbor_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_id) + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on different domain -> create MPI interface instead + if is_parallel() && !is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + count += 1 + end + end + + return count +end + + +# Count the number of MPI interfaces that need to be created +function count_required_mpi_interfaces(mesh::TreeMesh{2}, cell_ids) + count = 0 + + # Iterate over all cells + for cell_id in cell_ids + for direction in 1:n_directions(mesh.tree) + # If no neighbor exists, current cell is small or at boundary and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on this domain -> create regular interface instead + if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) continue end @@ -394,6 +469,19 @@ function init_interfaces(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDE end +# Create MPI interface container, initialize interface data, and return interface container for further use +function init_mpi_interfaces(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} + # Initialize container + n_mpi_interfaces = count_required_mpi_interfaces(mesh, cell_ids) + mpi_interfaces = MpiInterfaceContainer2D{NVARS, POLYDEG}(n_mpi_interfaces) + + # Connect elements with interfaces + init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh) + + return mpi_interfaces +end + + # Create boundaries container, initialize boundary data, and return boundaries container # # NVARS: number of variables @@ -477,6 +565,11 @@ function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{2}) continue end + # Skip if neighbor is on different domain -> create MPI interface instead + if is_parallel() && !is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface) count += 1 interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id] @@ -492,6 +585,54 @@ function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{2}) end +# Initialize connectivity between elements and interfaces +function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMesh{2}) + # Reset interface count + count = 0 + + # Iterate over all elements to find neighbors and to connect via mpi_interfaces + for element_id in 1:nelements(elements) + # Get cell id + cell_id = elements.cell_ids[element_id] + + # Loop over directions + for direction in 1:n_directions(mesh.tree) + # If no neighbor exists, current cell is small and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on this domain -> create regular interface instead + if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + # Create interface between elements + count += 1 + mpi_interfaces.local_element_ids[count] = element_id + + if direction in (2, 4) # element is "left" of interface, remote cell is "right" of interface + mpi_interfaces.remote_sides[count] = 2 + else + mpi_interfaces.remote_sides[count] = 1 + end + + # Set orientation (x -> 1, y -> 2) + mpi_interfaces.orientations[count] = div(direction, 2) + end + end + + @assert count == nmpiinterfaces(mpi_interfaces) ("Actual interface count ($count) does not match " + * "expectations $(nmpiinterfaces(mpi_interfaces))") +end + + # Initialize connectivity between elements and boundaries function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{2}) # Reset boundaries count @@ -627,6 +768,70 @@ function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh{2}) end +# Initialize connectivity between MPI neighbor domains +function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh{2}) + tree = mesh.tree + + # Determine neighbor domains and sides for MPI interfaces + neighbor_domain_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) + my_domain_id = domain_id() + for interface_id in 1:nmpiinterfaces(mpi_interfaces) + orientation = mpi_interfaces.orientations[interface_id] + remote_side = mpi_interfaces.remote_sides[interface_id] + if orientation == 1 # MPI interface in x-direction + if remote_side == 1 # remote cell on the "left" of MPI interface + direction = 1 + else # remote cell on the "right" of MPI interface + direction = 2 + end + else # MPI interface in y-direction + if remote_side == 1 # remote cell on the "left" of MPI interface + direction = 3 + else # remote cell on the "right" of MPI interface + direction = 4 + end + end + local_element_id = mpi_interfaces.local_element_ids[interface_id] + local_cell_id = elements.cell_ids[local_element_id] + remote_cell_id = tree.neighbor_ids[direction, local_cell_id] + neighbor_domain_ids[interface_id] = tree.domain_ids[remote_cell_id] + end + + # Get sorted, unique neighbor domain ids + mpi_neighbor_domain_ids = unique(sort(neighbor_domain_ids)) + + # For each neighbor domain id, init connectivity data structures + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_domain_ids)) + for (index, d) in enumerate(mpi_neighbor_domain_ids) + count_ = count(x->(x == d), neighbor_domain_ids) + mpi_neighbor_interfaces[index] = findall(x->(x == d), neighbor_domain_ids) + end + + # Sanity check that we counted all interfaces exactly once + @assert sum(length(v) for v in mpi_neighbor_interfaces) == nmpiinterfaces(mpi_interfaces) + + return mpi_neighbor_domain_ids, mpi_neighbor_interfaces +end + + +# Initialize MPI data structures +function init_mpi_data_structures(mpi_neighbor_interfaces, ::Val{NDIMS}, ::Val{NVARS}, + ::Val{POLYDEG}) where {NDIMS, NVARS, POLYDEG} + data_size = NVARS * (POLYDEG + 1)^(NDIMS - 1) + mpi_send_buffers = Vector{Vector{Float64}}(undef, length(mpi_neighbor_interfaces)) + mpi_recv_buffers = Vector{Vector{Float64}}(undef, length(mpi_neighbor_interfaces)) + for index in 1:length(mpi_neighbor_interfaces) + mpi_send_buffers[index] = Vector{Float64}(undef, length(mpi_neighbor_interfaces[index]) * data_size) + mpi_recv_buffers[index] = Vector{Float64}(undef, length(mpi_neighbor_interfaces[index]) * data_size) + end + + mpi_send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) + mpi_recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) + + return mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests +end + + """ integrate(func, dg::Dg2D, args...; normalize=true) @@ -1138,9 +1343,18 @@ end # Calculate time derivative function rhs!(dg::Dg2D, t_stage) + # Start to receive MPI data + is_parallel() && @timeit timer() "start MPI receive" start_mpi_receive!(dg) + # Reset u_t @timeit timer() "reset ∂u/∂t" dg.elements.u_t .= 0 + # Prolong solution to MPI interfaces + is_parallel() && @timeit timer() "prolong2mpiinterfaces" prolong2mpiinterfaces!(dg) + + # Start to send MPI data + is_parallel() && @timeit timer() "start MPI send" start_mpi_send!(dg) + # Calculate volume integral @timeit timer() "volume integral" calc_volume_integral!(dg) @@ -1162,6 +1376,12 @@ function rhs!(dg::Dg2D, t_stage) # Calculate mortar fluxes @timeit timer() "mortar flux" calc_mortar_flux!(dg) + # Finish to receive MPI data + is_parallel() && @timeit timer() "finish MPI receive" finish_mpi_receive!(dg) + + # Calculate MPI interface fluxes + is_parallel() && @timeit timer() "MPI interface flux" calc_mpi_interface_flux!(dg) + # Calculate surface integrals @timeit timer() "surface integral" calc_surface_integral!(dg) @@ -1170,6 +1390,9 @@ function rhs!(dg::Dg2D, t_stage) # Calculate source terms @timeit timer() "source terms" calc_sources!(dg, dg.source_terms, t_stage) + + # Finish to send MPI data + is_parallel() && @timeit timer() "finish MPI send" finish_mpi_send!(dg) end From 6d8792a5b99cda35551c2c3931412c8db845596b Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 7 Sep 2020 09:51:03 +0200 Subject: [PATCH 13/81] Move MPI-related methods to `parallel.jl` --- src/solvers/dg/2d/dg.jl | 156 ---------------------------- src/solvers/dg/2d/parallel.jl | 185 ++++++++++++++++++++++++++++++++++ src/solvers/dg/dg.jl | 1 + 3 files changed, 186 insertions(+), 156 deletions(-) create mode 100644 src/solvers/dg/2d/parallel.jl diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index d985eea8801..f88ce60ad7e 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -328,37 +328,6 @@ function count_required_interfaces(mesh::TreeMesh{2}, cell_ids) end -# Count the number of MPI interfaces that need to be created -function count_required_mpi_interfaces(mesh::TreeMesh{2}, cell_ids) - count = 0 - - # Iterate over all cells - for cell_id in cell_ids - for direction in 1:n_directions(mesh.tree) - # If no neighbor exists, current cell is small or at boundary and thus we need a mortar - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if neighbor has children - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) - continue - end - - # Skip if neighbor is on this domain -> create regular interface instead - if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) - continue - end - - count += 1 - end - end - - return count -end - - # Count the number of boundaries that need to be created function count_required_boundaries(mesh::TreeMesh{2}, cell_ids) count = 0 @@ -469,19 +438,6 @@ function init_interfaces(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDE end -# Create MPI interface container, initialize interface data, and return interface container for further use -function init_mpi_interfaces(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} - # Initialize container - n_mpi_interfaces = count_required_mpi_interfaces(mesh, cell_ids) - mpi_interfaces = MpiInterfaceContainer2D{NVARS, POLYDEG}(n_mpi_interfaces) - - # Connect elements with interfaces - init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh) - - return mpi_interfaces -end - - # Create boundaries container, initialize boundary data, and return boundaries container # # NVARS: number of variables @@ -585,54 +541,6 @@ function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{2}) end -# Initialize connectivity between elements and interfaces -function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMesh{2}) - # Reset interface count - count = 0 - - # Iterate over all elements to find neighbors and to connect via mpi_interfaces - for element_id in 1:nelements(elements) - # Get cell id - cell_id = elements.cell_ids[element_id] - - # Loop over directions - for direction in 1:n_directions(mesh.tree) - # If no neighbor exists, current cell is small and thus we need a mortar - if !has_neighbor(mesh.tree, cell_id, direction) - continue - end - - # Skip if neighbor has children - neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] - if has_children(mesh.tree, neighbor_cell_id) - continue - end - - # Skip if neighbor is on this domain -> create regular interface instead - if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) - continue - end - - # Create interface between elements - count += 1 - mpi_interfaces.local_element_ids[count] = element_id - - if direction in (2, 4) # element is "left" of interface, remote cell is "right" of interface - mpi_interfaces.remote_sides[count] = 2 - else - mpi_interfaces.remote_sides[count] = 1 - end - - # Set orientation (x -> 1, y -> 2) - mpi_interfaces.orientations[count] = div(direction, 2) - end - end - - @assert count == nmpiinterfaces(mpi_interfaces) ("Actual interface count ($count) does not match " - * "expectations $(nmpiinterfaces(mpi_interfaces))") -end - - # Initialize connectivity between elements and boundaries function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{2}) # Reset boundaries count @@ -768,70 +676,6 @@ function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh{2}) end -# Initialize connectivity between MPI neighbor domains -function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh{2}) - tree = mesh.tree - - # Determine neighbor domains and sides for MPI interfaces - neighbor_domain_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) - my_domain_id = domain_id() - for interface_id in 1:nmpiinterfaces(mpi_interfaces) - orientation = mpi_interfaces.orientations[interface_id] - remote_side = mpi_interfaces.remote_sides[interface_id] - if orientation == 1 # MPI interface in x-direction - if remote_side == 1 # remote cell on the "left" of MPI interface - direction = 1 - else # remote cell on the "right" of MPI interface - direction = 2 - end - else # MPI interface in y-direction - if remote_side == 1 # remote cell on the "left" of MPI interface - direction = 3 - else # remote cell on the "right" of MPI interface - direction = 4 - end - end - local_element_id = mpi_interfaces.local_element_ids[interface_id] - local_cell_id = elements.cell_ids[local_element_id] - remote_cell_id = tree.neighbor_ids[direction, local_cell_id] - neighbor_domain_ids[interface_id] = tree.domain_ids[remote_cell_id] - end - - # Get sorted, unique neighbor domain ids - mpi_neighbor_domain_ids = unique(sort(neighbor_domain_ids)) - - # For each neighbor domain id, init connectivity data structures - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_domain_ids)) - for (index, d) in enumerate(mpi_neighbor_domain_ids) - count_ = count(x->(x == d), neighbor_domain_ids) - mpi_neighbor_interfaces[index] = findall(x->(x == d), neighbor_domain_ids) - end - - # Sanity check that we counted all interfaces exactly once - @assert sum(length(v) for v in mpi_neighbor_interfaces) == nmpiinterfaces(mpi_interfaces) - - return mpi_neighbor_domain_ids, mpi_neighbor_interfaces -end - - -# Initialize MPI data structures -function init_mpi_data_structures(mpi_neighbor_interfaces, ::Val{NDIMS}, ::Val{NVARS}, - ::Val{POLYDEG}) where {NDIMS, NVARS, POLYDEG} - data_size = NVARS * (POLYDEG + 1)^(NDIMS - 1) - mpi_send_buffers = Vector{Vector{Float64}}(undef, length(mpi_neighbor_interfaces)) - mpi_recv_buffers = Vector{Vector{Float64}}(undef, length(mpi_neighbor_interfaces)) - for index in 1:length(mpi_neighbor_interfaces) - mpi_send_buffers[index] = Vector{Float64}(undef, length(mpi_neighbor_interfaces[index]) * data_size) - mpi_recv_buffers[index] = Vector{Float64}(undef, length(mpi_neighbor_interfaces[index]) * data_size) - end - - mpi_send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) - mpi_recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) - - return mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests -end - - """ integrate(func, dg::Dg2D, args...; normalize=true) diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl new file mode 100644 index 00000000000..8415289c547 --- /dev/null +++ b/src/solvers/dg/2d/parallel.jl @@ -0,0 +1,185 @@ +# Count the number of MPI interfaces that need to be created +function count_required_mpi_interfaces(mesh::TreeMesh{2}, cell_ids) + count = 0 + + # Iterate over all cells + for cell_id in cell_ids + for direction in 1:n_directions(mesh.tree) + # If no neighbor exists, current cell is small or at boundary and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on this domain -> create regular interface instead + if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + count += 1 + end + end + + return count +end + + +# Create MPI interface container, initialize interface data, and return interface container for further use +function init_mpi_interfaces(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} + # Initialize container + n_mpi_interfaces = count_required_mpi_interfaces(mesh, cell_ids) + mpi_interfaces = MpiInterfaceContainer2D{NVARS, POLYDEG}(n_mpi_interfaces) + + # Connect elements with interfaces + init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh) + + return mpi_interfaces +end + + +function start_mpi_receive!(dg::Dg2D) + for (index, d) in enumerate(dg.mpi_neighbor_domain_ids) + mpi_recv_requests[index] = MPI.Irecv!(dg.mpi_recv_buffers[index], d, d, mpi_comm()) + end +end + + +# Initialize connectivity between elements and interfaces +function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMesh{2}) + # Reset interface count + count = 0 + + # Iterate over all elements to find neighbors and to connect via mpi_interfaces + for element_id in 1:nelements(elements) + # Get cell id + cell_id = elements.cell_ids[element_id] + + # Loop over directions + for direction in 1:n_directions(mesh.tree) + # If no neighbor exists, current cell is small and thus we need a mortar + if !has_neighbor(mesh.tree, cell_id, direction) + continue + end + + # Skip if neighbor has children + neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id] + if has_children(mesh.tree, neighbor_cell_id) + continue + end + + # Skip if neighbor is on this domain -> create regular interface instead + if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) + continue + end + + # Create interface between elements + count += 1 + mpi_interfaces.local_element_ids[count] = element_id + + if direction in (2, 4) # element is "left" of interface, remote cell is "right" of interface + mpi_interfaces.remote_sides[count] = 2 + else + mpi_interfaces.remote_sides[count] = 1 + end + + # Set orientation (x -> 1, y -> 2) + mpi_interfaces.orientations[count] = div(direction, 2) + end + end + + @assert count == nmpiinterfaces(mpi_interfaces) ("Actual interface count ($count) does not match " + * "expectations $(nmpiinterfaces(mpi_interfaces))") +end + + +# Initialize connectivity between MPI neighbor domains +function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh{2}) + tree = mesh.tree + + # Determine neighbor domains and sides for MPI interfaces + neighbor_domain_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) + my_domain_id = domain_id() + for interface_id in 1:nmpiinterfaces(mpi_interfaces) + orientation = mpi_interfaces.orientations[interface_id] + remote_side = mpi_interfaces.remote_sides[interface_id] + if orientation == 1 # MPI interface in x-direction + if remote_side == 1 # remote cell on the "left" of MPI interface + direction = 1 + else # remote cell on the "right" of MPI interface + direction = 2 + end + else # MPI interface in y-direction + if remote_side == 1 # remote cell on the "left" of MPI interface + direction = 3 + else # remote cell on the "right" of MPI interface + direction = 4 + end + end + local_element_id = mpi_interfaces.local_element_ids[interface_id] + local_cell_id = elements.cell_ids[local_element_id] + remote_cell_id = tree.neighbor_ids[direction, local_cell_id] + neighbor_domain_ids[interface_id] = tree.domain_ids[remote_cell_id] + end + + # Get sorted, unique neighbor domain ids + mpi_neighbor_domain_ids = unique(sort(neighbor_domain_ids)) + + # For each neighbor domain id, init connectivity data structures + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_domain_ids)) + for (index, d) in enumerate(mpi_neighbor_domain_ids) + mpi_neighbor_interfaces[index] = findall(x->(x == d), neighbor_domain_ids) + end + + # Sanity check that we counted all interfaces exactly once + @assert sum(length(v) for v in mpi_neighbor_interfaces) == nmpiinterfaces(mpi_interfaces) + + return mpi_neighbor_domain_ids, mpi_neighbor_interfaces +end + + +# Initialize MPI data structures +function init_mpi_data_structures(mpi_neighbor_interfaces, ::Val{NDIMS}, ::Val{NVARS}, + ::Val{POLYDEG}) where {NDIMS, NVARS, POLYDEG} + data_size = NVARS * (POLYDEG + 1)^(NDIMS - 1) + mpi_send_buffers = Vector{Vector{Float64}}(undef, length(mpi_neighbor_interfaces)) + mpi_recv_buffers = Vector{Vector{Float64}}(undef, length(mpi_neighbor_interfaces)) + for index in 1:length(mpi_neighbor_interfaces) + mpi_send_buffers[index] = Vector{Float64}(undef, length(mpi_neighbor_interfaces[index]) * data_size) + mpi_recv_buffers[index] = Vector{Float64}(undef, length(mpi_neighbor_interfaces[index]) * data_size) + end + + mpi_send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) + mpi_recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces)) + + return mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests +end + + +function prolong2mpiinterfaces!(dg::Dg2D) +end + + +function start_mpi_send!(dg::Dg2D) + error("pack buffers") + for (index, d) in enumerate(dg.mpi_neighbor_domain_ids) + mpi_send_requests[index] = MPI.Isend(dg.mpi_send_buffers[index], d, domain_id(), mpi_comm()) + end +end + + +function finish_mpi_receive!(dg::Dg2D) +end + + +function calc_mpi_interface_flux!(dg::Dg2D) +end + + +function finish_mpi_send!(dg::Dg2D) + MPI.Waitall!(dg.mpi_send_requests) +end diff --git a/src/solvers/dg/dg.jl b/src/solvers/dg/dg.jl index 40a02c56567..a381462045e 100644 --- a/src/solvers/dg/dg.jl +++ b/src/solvers/dg/dg.jl @@ -49,6 +49,7 @@ include("l2projection.jl") include("2d/containers.jl") include("2d/dg.jl") include("2d/amr.jl") +include("2d/parallel.jl") # Include 3D implementation include("3d/containers.jl") From aaffd56d20b52d145cf387899699a9a63225c18b Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 7 Sep 2020 11:11:40 +0200 Subject: [PATCH 14/81] Sort interface by global interface id --- src/solvers/dg/2d/parallel.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index 8415289c547..ebd96e808b5 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -44,7 +44,7 @@ end function start_mpi_receive!(dg::Dg2D) for (index, d) in enumerate(dg.mpi_neighbor_domain_ids) - mpi_recv_requests[index] = MPI.Irecv!(dg.mpi_recv_buffers[index], d, d, mpi_comm()) + dg.mpi_recv_requests[index] = MPI.Irecv!(dg.mpi_recv_buffers[index], d, d, mpi_comm()) end end @@ -103,6 +103,8 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh # Determine neighbor domains and sides for MPI interfaces neighbor_domain_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) + # The global interface id is the smaller of the (globally unique) neighbor cell ids + global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) my_domain_id = domain_id() for interface_id in 1:nmpiinterfaces(mpi_interfaces) orientation = mpi_interfaces.orientations[interface_id] @@ -124,15 +126,21 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh local_cell_id = elements.cell_ids[local_element_id] remote_cell_id = tree.neighbor_ids[direction, local_cell_id] neighbor_domain_ids[interface_id] = tree.domain_ids[remote_cell_id] + global_interface_ids[interface_id] = min(local_cell_id, remote_cell_id) end # Get sorted, unique neighbor domain ids mpi_neighbor_domain_ids = unique(sort(neighbor_domain_ids)) + # Sort interfaces by global interface id + p = sortperm(global_interface_ids) + neighbor_domain_ids .= neighbor_domain_ids[p] + interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p] + # For each neighbor domain id, init connectivity data structures mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_domain_ids)) for (index, d) in enumerate(mpi_neighbor_domain_ids) - mpi_neighbor_interfaces[index] = findall(x->(x == d), neighbor_domain_ids) + mpi_neighbor_interfaces[index] = interface_ids[findall(x->(x == d), neighbor_domain_ids)] end # Sanity check that we counted all interfaces exactly once From 641c15ae632ed793517acd971c1f0b9e5cd60ef1 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 7 Sep 2020 14:42:12 +0200 Subject: [PATCH 15/81] First working parallel DG computations on 2 domains (L2/Linf values and EOC tests for scalar advection and compressible Euler are matching) --- src/run.jl | 157 ++++++++++++++++++++-------------- src/solvers/dg/2d/dg.jl | 102 +++++++++++++--------- src/solvers/dg/2d/parallel.jl | 135 +++++++++++++++++++++++++++-- 3 files changed, 283 insertions(+), 111 deletions(-) diff --git a/src/run.jl b/src/run.jl index e389501c583..348f1baa243 100644 --- a/src/run.jl +++ b/src/run.jl @@ -30,9 +30,7 @@ function run(parameters_file; verbose=false, refinement_level_increment=0, param reset_timer!(timer()) # Initialize MPI - if !MPI.Initialized() - MPI.Init() - end + init_mpi() # Read command line or keyword arguments and parse parameters file init_parameters(parameters_file; verbose=verbose, @@ -91,28 +89,32 @@ function init_simulation() # Initialize mesh if restart is_parallel() && error("restarting not yet implemented in parallel") # TODO parallel - print("Loading mesh... ") + is_mpi_root() && print("Loading mesh... ") @timeit timer() "mesh loading" mesh = load_mesh(restart_filename) - println("done") + is_parallel() && MPI.Barrier(mpi_comm()) + is_mpi_root() && println("done") else - print("Creating mesh... ") + is_mpi_root() && print("Creating mesh... ") @timeit timer() "mesh creation" mesh = generate_mesh() mesh.current_filename = save_mesh_file(mesh) mesh.unsaved_changes = false - println("done") + is_parallel() && MPI.Barrier(mpi_comm()) + is_mpi_root() && println("done") end # Initialize system of equations - print("Initializing system of equations... ") + is_mpi_root() && print("Initializing system of equations... ") equations_name = parameter("equations") equations = make_equations(equations_name, ndims_) - println("done") + is_parallel() && MPI.Barrier(mpi_comm()) + is_mpi_root() && println("done") # Initialize solver - print("Initializing solver... ") + is_mpi_root() && print("Initializing solver... ") solver_name = parameter("solver", valid=["dg"]) solver = make_solver(solver_name, equations, mesh) - println("done") + is_parallel() && MPI.Barrier(mpi_comm()) + is_mpi_root() && println("done") # Sanity checks # If DG volume integral type is weak form, volume flux type must be flux_central, @@ -130,16 +132,18 @@ function init_simulation() adapt_initial_conditions = parameter("adapt_initial_conditions", true) adapt_initial_conditions_only_refine = parameter("adapt_initial_conditions_only_refine", true) if restart - print("Loading restart file...") + is_mpi_root() && print("Loading restart file...") time, step = load_restart_file!(solver, restart_filename) - println("done") + is_parallel() && MPI.Barrier(mpi_comm()) + is_mpi_root() && println("done") else - print("Applying initial conditions... ") + is_mpi_root() && print("Applying initial conditions... ") t_start = parameter("t_start") time = t_start step = 0 set_initial_conditions!(solver, time) - println("done") + is_parallel() && MPI.Barrier(mpi_comm()) + is_mpi_root() && println("done") # If AMR is enabled, adapt mesh and re-apply ICs if amr_interval > 0 && adapt_initial_conditions @@ -232,8 +236,8 @@ function init_simulation() | | minimum dx: $min_dx | | maximum dx: $max_dx """ - println() - println(s) + is_mpi_root() && println() + is_mpi_root() && println(s) # Set up main loop save_final_solution = parameter("save_final_solution", true) @@ -317,21 +321,26 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function end # Check steady-state integration residual - if solver.equations isa HyperbolicDiffusionEquations2D - if maximum(abs, view(solver.elements.u_t, 1, :, :, :)) <= solver.equations.resid_tol - println() - println("-"^80) - println(" Steady state tolerance of ",solver.equations.resid_tol," reached at time ",time) - println("-"^80) - println() - finalstep = true + if solver.equations isa AbstractHyperbolicDiffusionEquations + if solver.equations isa HyperbolicDiffusionEquations2D + resid = maximum(abs, view(solver.elements.u_t, 1, :, :, :)) + elseif solver.equations isa HyperbolicDiffusionEquations3D + resid = maximum(abs, view(solver.elements.u_t, 1, :, :, :, :)) + else + error("unsupported system of equations") end - end - if solver.equations isa HyperbolicDiffusionEquations3D - if maximum(abs, view(solver.elements.u_t, 1, :, :, :, :)) <= solver.equations.resid_tol + + if is_parallel() + resid_buffer = [resid] + MPI.Allreduce!(resid_buffer, max, mpi_comm()) + resid = resid_buffer[1] + end + + if resid <= solver.equations.resid_tol println() println("-"^80) - println(" Steady state tolerance of ",solver.equations.resid_tol," reached at time ",time) + println(" Steady state tolerance of ", solver.equations.resid_tol, + " reached at time ", time) println("-"^80) println() finalstep = true @@ -341,9 +350,16 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function # Analyze solution errors if analysis_interval > 0 && (step % analysis_interval == 0 || finalstep) # Calculate absolute and relative runtime + if is_parallel() + total_dofs = ndofs(solver) + else + dofs_buffer = [ndofs(solver)] + MPI.Reduce!(dofs_buffer, +, mpi_root(), mpi_comm()) + total_dofs = dofs_buffer[1] + end runtime_absolute = (time_ns() - loop_start_time) / 10^9 runtime_relative = ((time_ns() - analysis_start_time - output_time) / 10^9 / - (n_analysis_timesteps * ndofs(solver))) + (n_analysis_timesteps * total_dofs)) # Analyze solution l2_error, linf_error = @timeit timer() "analyze solution" analyze_solution( @@ -353,13 +369,13 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function analysis_start_time = time_ns() output_time = 0.0 n_analysis_timesteps = 0 - if finalstep + if finalstep && is_mpi_root() println("-"^80) println("Trixi simulation run finished. Final time: $time Time steps: $step") println("-"^80) println() end - elseif alive_interval > 0 && step % alive_interval == 0 + elseif alive_interval > 0 && step % alive_interval == 0 && is_mpi_root() runtime_absolute = (time_ns() - loop_start_time) / 10^9 @printf("#t/s: %6d | dt: %.4e | Sim. time: %.4e | Run time: %.4e s\n", step, dt, time, runtime_absolute) @@ -425,8 +441,10 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function end # Print timer information - print_timer(timer(), title="Trixi.jl", allocations=true, linechars=:ascii, compact=false) - println() + if is_mpi_root() + print_timer(timer(), title="Trixi.jl", allocations=true, linechars=:ascii, compact=false) + println() + end # Return error norms for EOC calculation return l2_error, linf_error, varnames_cons(solver.equations) @@ -443,7 +461,12 @@ refinement level will be increased by 1. Parameters can be overriden by specifyi additional keyword arguments, which are passed to the respective call to `run`.. """ function convtest(parameters_file, iterations; parameters...) - @assert(iterations > 1, "Number of iterations must be bigger than 1 for a convergence analysis") + # Initialize MPI + init_mpi() + + if is_mpi_root() + @assert(iterations > 1, "Number of iterations must be bigger than 1 for a convergence analysis") + end # Types of errors to be calcuated errors = Dict(:L2 => Float64[], :Linf => Float64[]) @@ -453,7 +476,7 @@ function convtest(parameters_file, iterations; parameters...) # Run trixi and extract errors for i = 1:iterations - println(string("Running convtest iteration ", i, "/", iterations)) + is_mpi_root() && println(string("Running convtest iteration ", i, "/", iterations)) l2_error, linf_error, variablenames = run(parameters_file; refinement_level_increment = i - 1, parameters...) @@ -474,44 +497,46 @@ function convtest(parameters_file, iterations; parameters...) eocs = Dict(kind => log.(error[2:end, :] ./ error[1:end-1, :]) ./ log(1 / 2) for (kind, error) in errorsmatrix) - for (kind, error) in errorsmatrix - println(kind) - - for v in variablenames - @printf("%-20s", v) - end - println("") + if is_mpi_root() + for (kind, error) in errorsmatrix + println(kind) - for k = 1:nvariables - @printf("%-10s", "error") - @printf("%-10s", "EOC") - end - println("") + for v in variablenames + @printf("%-20s", v) + end + println("") - # Print errors for the first iteration - for k = 1:nvariables - @printf("%-10.2e", error[1, k]) - @printf("%-10s", "-") - end - println("") + for k = 1:nvariables + @printf("%-10s", "error") + @printf("%-10s", "EOC") + end + println("") - # For the following iterations print errors and EOCs - for j = 2:iterations + # Print errors for the first iteration for k = 1:nvariables - @printf("%-10.2e", error[j, k]) - @printf("%-10.2f", eocs[kind][j-1, k]) + @printf("%-10.2e", error[1, k]) + @printf("%-10s", "-") + end + println("") + + # For the following iterations print errors and EOCs + for j = 2:iterations + for k = 1:nvariables + @printf("%-10.2e", error[j, k]) + @printf("%-10.2f", eocs[kind][j-1, k]) + end + println("") end println("") - end - println("") - # Print mean EOCs - for k = 1:nvariables - @printf("%-10s", "mean") - @printf("%-10.2f", sum(eocs[kind][:, k]) ./ length(eocs[kind][:, k])) + # Print mean EOCs + for k = 1:nvariables + @printf("%-10s", "mean") + @printf("%-10.2f", sum(eocs[kind][:, k]) ./ length(eocs[kind][:, k])) + end + println("") + println("-"^80) end - println("") - println("-"^80) end end diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index f88ce60ad7e..9fd2c940c0c 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -784,6 +784,10 @@ function calc_error_norms(dg::Dg2D, t::Float64) end # For L2 error, divide by total volume + if is_parallel() + MPI.Reduce!(l2_error, +, mpi_root(), mpi_comm()) + MPI.Reduce!(linf_error, max, mpi_root(), mpi_comm()) + end @. l2_error = sqrt(l2_error / dg.analysis_total_volume) return l2_error, linf_error @@ -859,17 +863,19 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: equation = equations(dg) # General information - println() - println("-"^80) - println(" Simulation running '", get_name(equation), "' with POLYDEG = ", polydeg(dg)) - println("-"^80) - println(" #timesteps: " * @sprintf("% 14d", step) * - " " * - " run time: " * @sprintf("%10.8e s", runtime_absolute)) - println(" dt: " * @sprintf("%10.8e", dt) * - " " * - " Time/DOF/step: " * @sprintf("%10.8e s", runtime_relative)) - println(" sim. time: " * @sprintf("%10.8e", time)) + if is_mpi_root() + println() + println("-"^80) + println(" Simulation running '", get_name(equation), "' with POLYDEG = ", polydeg(dg)) + println("-"^80) + println(" #timesteps: " * @sprintf("% 14d", step) * + " " * + " run time: " * @sprintf("%10.8e s", runtime_absolute)) + println(" dt: " * @sprintf("%10.8e", dt) * + " " * + " Time/DOF/step: " * @sprintf("%10.8e s", runtime_relative)) + println(" sim. time: " * @sprintf("%10.8e", time)) + end # Level information (only show for AMR) if parameter("amr_interval", 0) > 0 @@ -898,13 +904,15 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: # Calculate and print derived quantities (error norms, entropy etc.) # Variable names required for L2 error, Linf error, and conservation error - if any(q in dg.analysis_quantities for q in - (:l2_error, :linf_error, :conservation_error, :residual)) - print(" Variable: ") - for v in 1:nvariables(equation) - @printf(" %-14s", varnames_cons(equation)[v]) + if is_mpi_root() + if any(q in dg.analysis_quantities for q in + (:l2_error, :linf_error, :conservation_error, :residual)) + print(" Variable: ") + for v in 1:nvariables(equation) + @printf(" %-14s", varnames_cons(equation)[v]) + end + println() end - println() end # Calculate L2/Linf errors @@ -914,24 +922,26 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: error("Since `analyze_solution` returns L2/Linf errors, it is an error to not calculate them") end - # L2 error - if :l2_error in dg.analysis_quantities - print(" L2 error: ") - for v in 1:nvariables(equation) - @printf(" % 10.8e", l2_error[v]) - dg.save_analysis && @printf(f, " % 10.8e", l2_error[v]) + if is_mpi_root() + # L2 error + if :l2_error in dg.analysis_quantities + print(" L2 error: ") + for v in 1:nvariables(equation) + @printf(" % 10.8e", l2_error[v]) + dg.save_analysis && @printf(f, " % 10.8e", l2_error[v]) + end + println() end - println() - end - # Linf error - if :linf_error in dg.analysis_quantities - print(" Linf error: ") - for v in 1:nvariables(equation) - @printf(" % 10.8e", linf_error[v]) - dg.save_analysis && @printf(f, " % 10.8e", linf_error[v]) + # Linf error + if :linf_error in dg.analysis_quantities + print(" Linf error: ") + for v in 1:nvariables(equation) + @printf(" % 10.8e", linf_error[v]) + dg.save_analysis && @printf(f, " % 10.8e", linf_error[v]) + end + println() end - println() end # Conservation errror @@ -968,11 +978,17 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: # Entropy time derivative if :dsdu_ut in dg.analysis_quantities - duds_ut = calc_entropy_timederivative(dg, time) - print(" ∑∂S/∂U ⋅ Uₜ: ") - @printf(" % 10.8e", duds_ut) - dg.save_analysis && @printf(f, " % 10.8e", duds_ut) - println() + dsdu_ut = calc_entropy_timederivative(dg, time) + if is_parallel() + dsdu_ut_buffer = [dsdu_ut] + MPI.Reduce!(dsdu_ut_buffer, +, mpi_root(), mpi_comm()) + end + if is_mpi_root() + print(" ∑∂S/∂U ⋅ Uₜ: ") + @printf(" % 10.8e", dsdu_ut) + dg.save_analysis && @printf(f, " % 10.8e", dsdu_ut) + println() + end end # Entropy @@ -1084,8 +1100,10 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: println() end - println("-"^80) - println() + if is_mpi_root() + println("-"^80) + println() + end # Add line break and close analysis file if it was opened if dg.save_analysis @@ -2305,6 +2323,12 @@ function calc_dt(dg::Dg2D, cfl) min_dt = min(min_dt, dt) end + if is_parallel() + min_dt_buffer = [min_dt] + MPI.Allreduce!(min_dt_buffer, min, mpi_comm()) + min_dt = min_dt_buffer[1] + end + return min_dt end diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index ebd96e808b5..624f0e4dfe3 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -1,3 +1,12 @@ +function init_mpi() + if !MPI.Initialized() + # MPI.THREAD_FUNNELED: Only main thread makes MPI calls + provided = MPI.Init_thread(MPI.THREAD_FUNNELED) + @assert provided >= MPI.THREAD_FUNNELED "MPI library with insufficient threading support" + end +end + + # Count the number of MPI interfaces that need to be created function count_required_mpi_interfaces(mesh::TreeMesh{2}, cell_ids) count = 0 @@ -88,7 +97,11 @@ function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMe end # Set orientation (x -> 1, y -> 2) - mpi_interfaces.orientations[count] = div(direction, 2) + if direction in (1, 2) # x-direction + mpi_interfaces.orientations[count] = 1 + else # y-direction + mpi_interfaces.orientations[count] = 2 + end end end @@ -103,12 +116,14 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh # Determine neighbor domains and sides for MPI interfaces neighbor_domain_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) - # The global interface id is the smaller of the (globally unique) neighbor cell ids + # The global interface id is the smaller of the (globally unique) neighbor cell ids, multiplied by + # number of directions (2 * ndims) plus direction minus one global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) my_domain_id = domain_id() for interface_id in 1:nmpiinterfaces(mpi_interfaces) orientation = mpi_interfaces.orientations[interface_id] remote_side = mpi_interfaces.remote_sides[interface_id] + # Direction is from local cell to remote cell if orientation == 1 # MPI interface in x-direction if remote_side == 1 # remote cell on the "left" of MPI interface direction = 1 @@ -126,7 +141,12 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh local_cell_id = elements.cell_ids[local_element_id] remote_cell_id = tree.neighbor_ids[direction, local_cell_id] neighbor_domain_ids[interface_id] = tree.domain_ids[remote_cell_id] - global_interface_ids[interface_id] = min(local_cell_id, remote_cell_id) + if local_cell_id < remote_cell_id + global_interface_ids[interface_id] = 2 * ndims(tree) * local_cell_id + direction - 1 + else + global_interface_ids[interface_id] = (2 * ndims(tree) * remote_cell_id + + opposite_direction(direction) - 1) + end end # Get sorted, unique neighbor domain ids @@ -169,22 +189,125 @@ end function prolong2mpiinterfaces!(dg::Dg2D) + equation = equations(dg) + + Threads.@threads for s in 1:dg.n_mpi_interfaces + local_element_id = dg.mpi_interfaces.local_element_ids[s] + if dg.mpi_interfaces.orientations[s] == 1 # interface in x-direction + if dg.mpi_interfaces.remote_sides[s] == 1 # local element in positive direction + for j in 1:nnodes(dg), v in 1:nvariables(dg) + dg.mpi_interfaces.u[2, v, j, s] = dg.elements.u[v, 1, j, local_element_id] + end + else # local element in negative direction + for j in 1:nnodes(dg), v in 1:nvariables(dg) + dg.mpi_interfaces.u[1, v, j, s] = dg.elements.u[v, nnodes(dg), j, local_element_id] + end + end + else # interface in y-direction + if dg.mpi_interfaces.remote_sides[s] == 1 # local element in positive direction + for i in 1:nnodes(dg), v in 1:nvariables(dg) + dg.mpi_interfaces.u[2, v, i, s] = dg.elements.u[v, i, 1, local_element_id] + end + else # local element in negative direction + for i in 1:nnodes(dg), v in 1:nvariables(dg) + dg.mpi_interfaces.u[1, v, i, s] = dg.elements.u[v, i, nnodes(dg), local_element_id] + end + end + end + end end function start_mpi_send!(dg::Dg2D) - error("pack buffers") + data_size = nvariables(dg) * nnodes(dg)^(ndims(dg) - 1) + + for d in 1:length(dg.mpi_neighbor_domain_ids) + send_buffer = dg.mpi_send_buffers[d] + + for (index, s) in enumerate(dg.mpi_neighbor_interfaces[d]) + first = (index - 1) * data_size + 1 + last = (index - 1) * data_size + data_size + + if dg.mpi_interfaces.remote_sides[s] == 1 # local element in positive direction + @views send_buffer[first:last] .= vec(dg.mpi_interfaces.u[2, :, :, s]) + else # local element in negative direction + @views send_buffer[first:last] .= vec(dg.mpi_interfaces.u[1, :, :, s]) + end + end + end + + # Start sending for (index, d) in enumerate(dg.mpi_neighbor_domain_ids) - mpi_send_requests[index] = MPI.Isend(dg.mpi_send_buffers[index], d, domain_id(), mpi_comm()) + dg.mpi_send_requests[index] = MPI.Isend(dg.mpi_send_buffers[index], d, domain_id(), mpi_comm()) end end function finish_mpi_receive!(dg::Dg2D) + data_size = nvariables(dg) * nnodes(dg)^(ndims(dg) - 1) + + # Start receiving and unpack received data until all communication is finished + d, _ = MPI.Waitany!(dg.mpi_recv_requests) + while d != 0 + recv_buffer = dg.mpi_recv_buffers[d] + + for (index, s) in enumerate(dg.mpi_neighbor_interfaces[d]) + first = (index - 1) * data_size + 1 + last = (index - 1) * data_size + data_size + + if dg.mpi_interfaces.remote_sides[s] == 1 # local element in positive direction + @views vec(dg.mpi_interfaces.u[1, :, :, s]) .= recv_buffer[first:last] + else # local element in negative direction + @views vec(dg.mpi_interfaces.u[2, :, :, s]) .= recv_buffer[first:last] + end + end + + d, _ = MPI.Waitany!(dg.mpi_recv_requests) + end end -function calc_mpi_interface_flux!(dg::Dg2D) +# Calculate and store the surface fluxes (standard Riemann and nonconservative parts) at an MPI interface +# OBS! Regarding the nonconservative terms: 1) currently only needed for the MHD equations +# 2) not implemented for MPI +calc_mpi_interface_flux!(dg::Dg2D) = calc_mpi_interface_flux!(dg.elements.surface_flux_values, + have_nonconservative_terms(dg.equations), + dg) + +function calc_mpi_interface_flux!(surface_flux_values, nonconservative_terms::Val{false}, dg::Dg2D) + @unpack surface_flux_function = dg + @unpack u, local_element_ids, orientations, remote_sides = dg.mpi_interfaces + + Threads.@threads for s in 1:dg.n_mpi_interfaces + # Get local neighboring element + element_id = local_element_ids[s] + + # Determine interface direction with respect to element: + if orientations[s] == 1 # interface in x-direction + if remote_sides[s] == 1 # local element in positive direction + direction = 1 + else # local element in negative direction + direction = 2 + end + else # interface in y-direction + if remote_sides[s] == 1 # local element in positive direction + direction = 3 + else # local element in negative direction + direction = 4 + end + end + + for i in 1:nnodes(dg) + # Call pointwise Riemann solver + u_ll, u_rr = get_surface_node_vars(u, dg, i, s) + flux = surface_flux_function(u_ll, u_rr, orientations[s], equations(dg)) + + # Copy flux to local element storage + for v in 1:nvariables(dg) + surface_flux_values[v, i, direction, element_id] = flux[v] + end + end + end end From 1ca6441d16613b42b2f43b18f0ab50cbc75dd114 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 7 Sep 2020 14:58:51 +0200 Subject: [PATCH 16/81] Add total and serial performance index --- src/run.jl | 7 ++++--- src/solvers/dg/2d/dg.jl | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/run.jl b/src/run.jl index 348f1baa243..355e87141f2 100644 --- a/src/run.jl +++ b/src/run.jl @@ -211,9 +211,10 @@ function init_simulation() | time integration: $(get_name(time_integration_function)) | restart interval: $restart_interval | solution interval: $solution_interval - | #parallel threads: $(Threads.nthreads()) + | #MPI domains: $(n_domains()) + | #threads/domain: $(Threads.nthreads()) | - | Solver + | Solver (local) | | solver: $solver_name | | polydeg: $polydeg | | CFL: $cfl @@ -226,7 +227,7 @@ function init_simulation() | | #l2mortars: $(solver.n_l2mortars) | | #DOFs: $(ndofs(solver)) | - | Mesh + | Mesh (global) | | #cells: $(length(mesh.tree)) | | #leaf cells: $n_leaf_cells | | minimum level: $min_level diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 9fd2c940c0c..37d8471c0ad 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -873,8 +873,10 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: " run time: " * @sprintf("%10.8e s", runtime_absolute)) println(" dt: " * @sprintf("%10.8e", dt) * " " * - " Time/DOF/step: " * @sprintf("%10.8e s", runtime_relative)) - println(" sim. time: " * @sprintf("%10.8e", time)) + " PID (total): " * @sprintf("%10.8e s", runtime_relative)) + println(" sim. time: " * @sprintf("%10.8e", time) * + " " * + " PID (serial): " * @sprintf("%10.8e s", runtime_relative * n_domains())) end # Level information (only show for AMR) From a271e533ee91ea759d7805fc741eec7140bf17e7 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 7 Sep 2020 16:35:41 +0200 Subject: [PATCH 17/81] Move init_mpi() to its proper place --- src/parallel/parallel.jl | 17 +++++++++++++++++ src/solvers/dg/2d/parallel.jl | 9 --------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index 40510a428a9..57e93ab154e 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -1,3 +1,20 @@ +""" + init_mpi + +Initialize MPI by calling `MPI.Initialized()`. The function will check if MPI is already initialized +and if yes, do nothing, thus it is safe to call it multiple times. +""" +function init_mpi() + if !MPI.Initialized() + # MPI.THREAD_FUNNELED: Only main thread makes MPI calls + provided = MPI.Init_thread(MPI.THREAD_FUNNELED) + @assert provided >= MPI.THREAD_FUNNELED "MPI library with insufficient threading support" + end + + return nothing +end + + @inline mpi_comm() = MPI.COMM_WORLD @inline domain_id(comm) = MPI.Comm_rank(comm) diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index 624f0e4dfe3..56f50c7db22 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -1,12 +1,3 @@ -function init_mpi() - if !MPI.Initialized() - # MPI.THREAD_FUNNELED: Only main thread makes MPI calls - provided = MPI.Init_thread(MPI.THREAD_FUNNELED) - @assert provided >= MPI.THREAD_FUNNELED "MPI library with insufficient threading support" - end -end - - # Count the number of MPI interfaces that need to be created function count_required_mpi_interfaces(mesh::TreeMesh{2}, cell_ids) count = 0 From 0c75f91e17f65c304a13f08d20213c591dcb5a97 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Tue, 8 Sep 2020 11:59:18 +0200 Subject: [PATCH 18/81] Fix parallel output --- src/mesh/mesh.jl | 1 + src/solvers/dg/2d/dg.jl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index 6f7e4ed8a38..d67127b915e 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -181,6 +181,7 @@ end function partition(mesh) # Determine number of leaf cells per domain leaves = leaf_cells(mesh.tree) + @assert length(leaves) > n_domains() n_leaves_per_domain = OffsetArray(fill(div(length(leaves), n_domains()), n_domains()), 0:(n_domains() - 1)) for d in 0:(rem(length(leaves), n_domains()) - 1) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 37d8471c0ad..0e12896308e 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -880,7 +880,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: end # Level information (only show for AMR) - if parameter("amr_interval", 0) > 0 + if parameter("amr_interval", 0) > 0 && is_mpi_root() levels = Vector{Int}(undef, dg.n_elements) for element_id in 1:dg.n_elements levels[element_id] = mesh.tree.levels[dg.elements.cell_ids[element_id]] @@ -894,7 +894,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: end println(" └── level $min_level: " * @sprintf("% 14d", count(x->x==min_level, levels))) end - println() + is_mpi_root() && println() # Open file for appending and store time step and time information if dg.save_analysis From a0bb0987aa5ec35a65457c15b2f5e540edf8fb0f Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Wed, 9 Sep 2020 10:01:08 +0200 Subject: [PATCH 19/81] Calculate total number of elements and local offset --- src/solvers/dg/2d/dg.jl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 0a37cb536a2..b6e4506623a 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -73,6 +73,8 @@ mutable struct Dg2D{Eqn<:AbstractEquation, NVARS, POLYDEG, mpi_recv_buffers::Vector{Vector{Float64}} mpi_send_requests::Vector{MPI.Request} mpi_recv_requests::Vector{MPI.Request} + n_elements_global::Int + first_element_global_id::Int element_variables::Dict{Symbol, Union{Vector{Float64}, Vector{Int}}} cache::Dict{Symbol, Any} @@ -212,6 +214,17 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v mpi_send_requests, mpi_recv_requests) = init_mpi_data_structures(mpi_neighbor_interfaces, Val(NDIMS), Val(NVARS), Val(POLYDEG)) + + # Determine total number of elements and the global element id of the first element + n_elements_global = MPI.Allreduce(n_elements, +, mpi_comm()) + first_element_global_id = MPI.Exscan(n_elements, +, mpi_comm()) + if is_mpi_root() + # With Exscan, the result on the first rank is undefined + first_element_global_id = 1 + else + # On all other ranks we need to add one, since Julia has one-based indices + first_element_global_id += 1 + end else mpi_neighbor_domain_ids = Int[] mpi_neighbor_interfaces = Vector{Int}[] @@ -219,6 +232,8 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v mpi_recv_buffers = Vector{Float64}[] mpi_send_requests = MPI.Request[] mpi_recv_requests = MPI.Request[] + n_elements_global = n_elements + first_element_global_id = 1 end # Initialize element variables such that they are available in the first solution file @@ -259,6 +274,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v amr_indicator, amr_alpha_max, amr_alpha_min, amr_alpha_smooth, mpi_neighbor_domain_ids, mpi_neighbor_interfaces, mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests, + n_elements_global, first_element_global_id, element_variables, cache, thread_cache, initial_state_integrals) From 623041a0b3b750e489d06500dad9ae11f9e286a7 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Wed, 9 Sep 2020 10:29:57 +0200 Subject: [PATCH 20/81] Fix L2/Linf error calculation for MPI --- src/solvers/dg/2d/dg.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index b6e4506623a..88a8e83a5a5 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -809,8 +809,12 @@ function calc_error_norms(func, dg::Dg2D, t) # For L2 error, divide by total volume if is_parallel() - MPI.Reduce!(l2_error, +, mpi_root(), mpi_comm()) - MPI.Reduce!(linf_error, max, mpi_root(), mpi_comm()) + global_l2_error = Vector(l2_error) + global_linf_error = Vector(linf_error) + MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm()) + MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm()) + l2_error = convert(typeof(l2_error), global_l2_error) + linf_error = convert(typeof(linf_error), global_linf_error) end l2_error = @. sqrt(l2_error / dg.analysis_total_volume) From 9a3b277541cef056a884abb9549b8067cdd6c307 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Wed, 9 Sep 2020 12:38:28 +0200 Subject: [PATCH 21/81] MVector -> SVector for `center_level_0` --- src/mesh/parallel_tree.jl | 4 ++-- src/mesh/tree.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl index 8f3a781c6ad..dc7d492216e 100644 --- a/src/mesh/parallel_tree.jl +++ b/src/mesh/parallel_tree.jl @@ -32,7 +32,7 @@ mutable struct ParallelTree{NDIMS} <: AbstractContainer length::Int dummy::Int - center_level_0::MVector{NDIMS, Float64} + center_level_0::SVector{NDIMS, Float64} length_level_0::Float64 periodicity::NTuple{NDIMS, Bool} @@ -57,7 +57,7 @@ mutable struct ParallelTree{NDIMS} <: AbstractContainer t.length = 0 t.dummy = capacity + 1 - t.center_level_0 = @MVector fill(NaN, NDIMS) + t.center_level_0 = @SVector fill(NaN, NDIMS) t.length_level_0 = NaN return t diff --git a/src/mesh/tree.jl b/src/mesh/tree.jl index beb27bea3f7..09fd2071e34 100644 --- a/src/mesh/tree.jl +++ b/src/mesh/tree.jl @@ -31,7 +31,7 @@ mutable struct Tree{NDIMS} <: AbstractContainer length::Int dummy::Int - center_level_0::MVector{NDIMS, Float64} + center_level_0::SVector{NDIMS, Float64} length_level_0::Float64 periodicity::NTuple{NDIMS, Bool} @@ -55,7 +55,7 @@ mutable struct Tree{NDIMS} <: AbstractContainer t.length = 0 t.dummy = capacity + 1 - t.center_level_0 = @MVector fill(NaN, NDIMS) + t.center_level_0 = @SVector fill(NaN, NDIMS) t.length_level_0 = NaN return t From 3566b475cbb5c9f2b0ff792b7ad3fd3de66454d7 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Wed, 9 Sep 2020 15:11:47 +0200 Subject: [PATCH 22/81] Add MeshType to Dg2D/Dg3D parameters --- src/mesh/mesh.jl | 41 +++++++++++------------ src/mesh/parallel_tree.jl | 2 +- src/mesh/tree.jl | 2 +- src/solvers/dg/2d/dg.jl | 52 +++++++++++------------------ src/solvers/dg/2d/parallel.jl | 63 ++++++++++++++++++++++++++++++++--- src/solvers/dg/3d/dg.jl | 29 ++++++++-------- src/solvers/dg/dg.jl | 5 ++- 7 files changed, 118 insertions(+), 76 deletions(-) diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index d67127b915e..b0175400052 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -1,23 +1,22 @@ +abstract type AbstractTree{NDIMS} <: AbstractContainer end +@inline Base.ndims(::AbstractTree{NDIMS}) where NDIMS = NDIMS include("tree.jl") include("parallel_tree.jl") # Composite type to hold the actual tree in addition to other mesh-related data # that is not strictly part of the tree. -mutable struct TreeMesh{NDIMS, TreeType} +mutable struct TreeMesh{TreeType<:AbstractTree{NDIMS} where NDIMS} tree::TreeType current_filename::String unsaved_changes::Bool first_cell_by_domain::OffsetVector{Int, Vector{Int}} n_cells_by_domain::OffsetVector{Int, Vector{Int}} - function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer) where {NDIMS, TreeType} - # Verify that NDIMS is an integer - @assert NDIMS == ndims(TreeType) - + function TreeMesh{TreeType}(n_cells_max::Integer) where TreeType # Create mesh m = new() - m.tree = TreeType{NDIMS}(n_cells_max) + m.tree = TreeType(n_cells_max) m.current_filename = "" m.unsaved_changes = false m.first_cell_by_domain = OffsetVector(Int[], 0) @@ -26,11 +25,8 @@ mutable struct TreeMesh{NDIMS, TreeType} return m end - function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer, domain_center::AbstractArray{Float64}, - domain_length, periodicity=true) where{NDIMS, TreeType} - # Verify that NDIMS matches the tree - @assert NDIMS == ndims(TreeType) - + function TreeMesh{TreeType}(n_cells_max::Integer, domain_center::AbstractArray{Float64}, + domain_length, periodicity=true) where TreeType # Create mesh m = new() m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity) @@ -43,14 +39,16 @@ mutable struct TreeMesh{NDIMS, TreeType} end end +const TreeMesh1D = TreeMesh{TreeType} where {TreeType <: AbstractTree{1}} +const TreeMesh2D = TreeMesh{TreeType} where {TreeType <: AbstractTree{2}} +const TreeMesh3D = TreeMesh{TreeType} where {TreeType <: AbstractTree{3}} + # Constructor for passing the dimension and mesh type as an argument -function TreeMesh(::Val{NDIMS}, ::Val{TreeType}, args...) where {NDIMS, TreeType} - return TreeMesh{NDIMS, TreeType}(args...) -end +TreeMesh(::Type{TreeType}, args...) where TreeType = TreeMesh{TreeType}(args...) # Constructor accepting a single number as center (as opposed to an array) for 1D -function TreeMesh{1, TreeType}(n::Int, center::Real, len::Real, periodicity=true) where TreeType - return TreeMesh{1, TreeType}(n, [convert(Float64, center)], len, periodicity) +function TreeMesh{TreeType}(n::Int, center::Real, len::Real, periodicity=true) where {TreeType<:AbstractTree{1}} + return TreeMesh{TreeType}(n, [convert(Float64, center)], len, periodicity) end @@ -78,12 +76,11 @@ function generate_mesh() # Create mesh if is_parallel() - @timeit timer() "creation" mesh = TreeMesh(Val{ndims_}(), Val{ParallelTree{ndims_}}(), - n_cells_max, + @timeit timer() "creation" mesh = TreeMesh(ParallelTree{ndims_}, n_cells_max, domain_center, domain_length, periodicity) else - @timeit timer() "creation" mesh = TreeMesh(Val{ndims_}(), Val{Tree{ndims_}}(), n_cells_max, - domain_center, domain_length, periodicity) + @timeit timer() "creation" mesh = TreeMesh(Tree{ndims_}, n_cells_max, domain_center, + domain_length, periodicity) end # Create initial refinement @@ -94,7 +91,7 @@ function generate_mesh() # Partition mesh if is_parallel() - partition(mesh) + partition!(mesh) end # Apply refinement patches @@ -178,7 +175,7 @@ end # Partition mesh using a static domain decomposition algorithm based on leaf cell count alone # Return first cell id for each domain -function partition(mesh) +function partition!(mesh) # Determine number of leaf cells per domain leaves = leaf_cells(mesh.tree) @assert length(leaves) > n_domains() diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl index dc7d492216e..978da841bc9 100644 --- a/src/mesh/parallel_tree.jl +++ b/src/mesh/parallel_tree.jl @@ -19,7 +19,7 @@ # function, which is required for implementing level-wise refinement in a sane # way. Also, depth-first ordering *might* not by guaranteed during # refinement/coarsening operations. -mutable struct ParallelTree{NDIMS} <: AbstractContainer +mutable struct ParallelTree{NDIMS} <: AbstractTree{NDIMS} parent_ids::Vector{Int} child_ids::Matrix{Int} neighbor_ids::Matrix{Int} diff --git a/src/mesh/tree.jl b/src/mesh/tree.jl index 09fd2071e34..a9462e8df79 100644 --- a/src/mesh/tree.jl +++ b/src/mesh/tree.jl @@ -19,7 +19,7 @@ # function, which is required for implementing level-wise refinement in a sane # way. Also, depth-first ordering *might* not by guaranteed during # refinement/coarsening operations. -mutable struct Tree{NDIMS} <: AbstractContainer +mutable struct Tree{NDIMS} <: AbstractTree{NDIMS} parent_ids::Vector{Int} child_ids::Matrix{Int} neighbor_ids::Matrix{Int} diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 88a8e83a5a5..fe1ce95e49b 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -1,11 +1,12 @@ # Main DG data structure that contains all relevant data for the DG solver -mutable struct Dg2D{Eqn<:AbstractEquation, NVARS, POLYDEG, +mutable struct Dg2D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, SurfaceFlux, VolumeFlux, InitialConditions, SourceTerms, MortarType, VolumeIntegralType, ShockIndicatorVariable, VectorNnodes, MatrixNnodes, MatrixNnodes2, InverseVandermondeLegendre, MortarMatrix, - VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{2, POLYDEG} + VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{2, POLYDEG, MeshType} equations::Eqn + mesh_that_should_not_be_used::MeshType surface_flux_function::SurfaceFlux volume_flux_function::VolumeFlux @@ -85,7 +86,7 @@ end # Convenience constructor to create DG solver instance -function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh{NDIMS}, POLYDEG) where {NDIMS, NVARS} +function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh, POLYDEG) where {NDIMS, NVARS} # Get local cells for which an element needs to be created (i.e., all leaf cells) if is_parallel() leaf_cell_ids = local_leaf_cells(mesh.tree) @@ -250,7 +251,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Create actual DG solver instance dg = Dg2D( - equation, + equation, mesh, surface_flux_function, volume_flux_function, initial_conditions, source_terms, elements, n_elements, @@ -314,7 +315,7 @@ end # Count the number of interfaces that need to be created -function count_required_interfaces(mesh::TreeMesh{2}, cell_ids) +function count_required_interfaces(mesh::TreeMesh2D, cell_ids) count = 0 # Iterate over all cells @@ -350,7 +351,7 @@ end # Count the number of boundaries that need to be created -function count_required_boundaries(mesh::TreeMesh{2}, cell_ids) +function count_required_boundaries(mesh::TreeMesh2D, cell_ids) count = 0 # Iterate over all cells @@ -376,7 +377,7 @@ end # Count the number of mortars that need to be created -function count_required_mortars(mesh::TreeMesh{2}, cell_ids) +function count_required_mortars(mesh::TreeMesh2D, cell_ids) count = 0 # Iterate over all cells and count mortars from perspective of coarse cells @@ -405,7 +406,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_elements(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} +function init_elements(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} # Initialize container n_elements = length(cell_ids) elements = ElementContainer2D{NVARS, POLYDEG}(n_elements) @@ -447,7 +448,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_interfaces(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_interfaces(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_interfaces = count_required_interfaces(mesh, cell_ids) interfaces = InterfaceContainer2D{NVARS, POLYDEG}(n_interfaces) @@ -463,7 +464,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_boundaries(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_boundaries(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_boundaries = count_required_boundaries(mesh, cell_ids) boundaries = BoundaryContainer2D{NVARS, POLYDEG}(n_boundaries) @@ -479,7 +480,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_mortars(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements, mortar_type) where {NVARS, POLYDEG} +function init_mortars(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}, elements, mortar_type) where {NVARS, POLYDEG} # Initialize containers n_mortars = count_required_mortars(mesh, cell_ids) if mortar_type === Val(:l2) @@ -508,7 +509,7 @@ end # Initialize connectivity between elements and interfaces -function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{2}) +function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh2D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) @@ -563,7 +564,7 @@ end # Initialize connectivity between elements and boundaries -function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{2}) +function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh2D) # Reset boundaries count count = 0 @@ -626,7 +627,7 @@ end # Initialize connectivity between elements and mortars -function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh{2}) +function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh2D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) @@ -1269,20 +1270,14 @@ function set_initial_conditions!(dg::Dg2D, time) end -# Calculate time derivative -function rhs!(dg::Dg2D, t_stage) - # Start to receive MPI data - is_parallel() && @timeit timer() "start MPI receive" start_mpi_receive!(dg) +@inline rhs!(dg::Dg2D, t_stage) = rhs!(dg, t_stage, uses_mpi(dg)) + +# Calculate time derivative +function rhs!(dg::Dg2D, t_stage, uses_mpi::Val{false}) # Reset u_t @timeit timer() "reset ∂u/∂t" dg.elements.u_t .= 0 - # Prolong solution to MPI interfaces - is_parallel() && @timeit timer() "prolong2mpiinterfaces" prolong2mpiinterfaces!(dg) - - # Start to send MPI data - is_parallel() && @timeit timer() "start MPI send" start_mpi_send!(dg) - # Calculate volume integral @timeit timer() "volume integral" calc_volume_integral!(dg) @@ -1304,12 +1299,6 @@ function rhs!(dg::Dg2D, t_stage) # Calculate mortar fluxes @timeit timer() "mortar flux" calc_mortar_flux!(dg) - # Finish to receive MPI data - is_parallel() && @timeit timer() "finish MPI receive" finish_mpi_receive!(dg) - - # Calculate MPI interface fluxes - is_parallel() && @timeit timer() "MPI interface flux" calc_mpi_interface_flux!(dg) - # Calculate surface integrals @timeit timer() "surface integral" calc_surface_integral!(dg) @@ -1318,9 +1307,6 @@ function rhs!(dg::Dg2D, t_stage) # Calculate source terms @timeit timer() "source terms" calc_sources!(dg, dg.source_terms, t_stage) - - # Finish to send MPI data - is_parallel() && @timeit timer() "finish MPI send" finish_mpi_send!(dg) end diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index 56f50c7db22..513ab323a2e 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -1,5 +1,60 @@ +# Calculate time derivative +function rhs!(dg::Dg2D, t_stage, uses_mpi::Val{true}) + # Start to receive MPI data + @timeit timer() "start MPI receive" start_mpi_receive!(dg) + + # Reset u_t + @timeit timer() "reset ∂u/∂t" dg.elements.u_t .= 0 + + # Prolong solution to MPI interfaces + @timeit timer() "prolong2mpiinterfaces" prolong2mpiinterfaces!(dg) + + # Start to send MPI data + @timeit timer() "start MPI send" start_mpi_send!(dg) + + # Calculate volume integral + @timeit timer() "volume integral" calc_volume_integral!(dg) + + # Prolong solution to interfaces + @timeit timer() "prolong2interfaces" prolong2interfaces!(dg) + + # Calculate interface fluxes + @timeit timer() "interface flux" calc_interface_flux!(dg) + + # Prolong solution to boundaries + @timeit timer() "prolong2boundaries" prolong2boundaries!(dg) + + # Calculate boundary fluxes + @timeit timer() "boundary flux" calc_boundary_flux!(dg, t_stage) + + # Prolong solution to mortars + @timeit timer() "prolong2mortars" prolong2mortars!(dg) + + # Calculate mortar fluxes + @timeit timer() "mortar flux" calc_mortar_flux!(dg) + + # Finish to receive MPI data + @timeit timer() "finish MPI receive" finish_mpi_receive!(dg) + + # Calculate MPI interface fluxes + @timeit timer() "MPI interface flux" calc_mpi_interface_flux!(dg) + + # Calculate surface integrals + @timeit timer() "surface integral" calc_surface_integral!(dg) + + # Apply Jacobian from mapping to reference element + @timeit timer() "Jacobian" apply_jacobian!(dg) + + # Calculate source terms + @timeit timer() "source terms" calc_sources!(dg, dg.source_terms, t_stage) + + # Finish to send MPI data + @timeit timer() "finish MPI send" finish_mpi_send!(dg) +end + + # Count the number of MPI interfaces that need to be created -function count_required_mpi_interfaces(mesh::TreeMesh{2}, cell_ids) +function count_required_mpi_interfaces(mesh::TreeMesh2D, cell_ids) count = 0 # Iterate over all cells @@ -30,7 +85,7 @@ end # Create MPI interface container, initialize interface data, and return interface container for further use -function init_mpi_interfaces(cell_ids, mesh::TreeMesh{2}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_mpi_interfaces(cell_ids, mesh::TreeMesh2D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_mpi_interfaces = count_required_mpi_interfaces(mesh, cell_ids) mpi_interfaces = MpiInterfaceContainer2D{NVARS, POLYDEG}(n_mpi_interfaces) @@ -50,7 +105,7 @@ end # Initialize connectivity between elements and interfaces -function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMesh{2}) +function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMesh2D) # Reset interface count count = 0 @@ -102,7 +157,7 @@ end # Initialize connectivity between MPI neighbor domains -function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh{2}) +function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh2D) tree = mesh.tree # Determine neighbor domains and sides for MPI interfaces diff --git a/src/solvers/dg/3d/dg.jl b/src/solvers/dg/3d/dg.jl index 517ecd34ef1..d4dfb79a402 100644 --- a/src/solvers/dg/3d/dg.jl +++ b/src/solvers/dg/3d/dg.jl @@ -1,11 +1,12 @@ # Main DG data structure that contains all relevant data for the DG solver -mutable struct Dg3D{Eqn<:AbstractEquation, NVARS, POLYDEG, +mutable struct Dg3D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, SurfaceFlux, VolumeFlux, InitialConditions, SourceTerms, MortarType, VolumeIntegralType, ShockIndicatorVariable, VectorNnodes, MatrixNnodes, MatrixNnodes2, InverseVandermondeLegendre, MortarMatrix, - VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{3, POLYDEG} + VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{3, POLYDEG, MeshType} equations::Eqn + mesh_that_should_not_be_used::MeshType surface_flux_function::SurfaceFlux volume_flux_function::VolumeFlux @@ -69,7 +70,7 @@ end # Convenience constructor to create DG solver instance -function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh{NDIMS}, POLYDEG) where {NDIMS, NVARS} +function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh3D, POLYDEG) where {NDIMS, NVARS} # Get cells for which an element needs to be created (i.e., all leaf cells) leaf_cell_ids = leaf_cells(mesh.tree) @@ -189,7 +190,7 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Create actual DG solver instance dg = Dg3D( - equation, + equation, mesh, surface_flux_function, volume_flux_function, initial_conditions, source_terms, elements, n_elements, @@ -258,7 +259,7 @@ end # Count the number of interfaces that need to be created -function count_required_interfaces(mesh::TreeMesh{3}, cell_ids) +function count_required_interfaces(mesh::TreeMesh3D, cell_ids) count = 0 # Iterate over all cells @@ -289,7 +290,7 @@ end # Count the number of boundaries that need to be created -function count_required_boundaries(mesh::TreeMesh{3}, cell_ids) +function count_required_boundaries(mesh::TreeMesh3D, cell_ids) count = 0 # Iterate over all cells @@ -315,7 +316,7 @@ end # Count the number of mortars that need to be created -function count_required_mortars(mesh::TreeMesh{3}, cell_ids) +function count_required_mortars(mesh::TreeMesh3D, cell_ids) count = 0 # Iterate over all cells and count mortars from perspective of coarse cells @@ -344,7 +345,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_elements(cell_ids, mesh::TreeMesh{3}, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} +function init_elements(cell_ids, mesh::TreeMesh3D, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} # Initialize container n_elements = length(cell_ids) elements = ElementContainer3D{NVARS, POLYDEG}(n_elements) @@ -386,7 +387,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_interfaces(cell_ids, mesh::TreeMesh{3}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_interfaces(cell_ids, mesh::TreeMesh3D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_interfaces = count_required_interfaces(mesh, cell_ids) interfaces = InterfaceContainer3D{NVARS, POLYDEG}(n_interfaces) @@ -402,7 +403,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_boundaries(cell_ids, mesh::TreeMesh{3}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_boundaries(cell_ids, mesh::TreeMesh3D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_boundaries = count_required_boundaries(mesh, cell_ids) boundaries = BoundaryContainer3D{NVARS, POLYDEG}(n_boundaries) @@ -418,7 +419,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_mortars(cell_ids, mesh::TreeMesh{3}, ::Val{NVARS}, ::Val{POLYDEG}, elements, mortar_type) where {NVARS, POLYDEG} +function init_mortars(cell_ids, mesh::TreeMesh3D, ::Val{NVARS}, ::Val{POLYDEG}, elements, mortar_type) where {NVARS, POLYDEG} # Initialize containers n_mortars = count_required_mortars(mesh, cell_ids) if mortar_type === Val(:l2) @@ -440,7 +441,7 @@ end # Initialize connectivity between elements and interfaces -function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{3}) +function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh3D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) @@ -496,7 +497,7 @@ end # Initialize connectivity between elements and boundaries -function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{3}) +function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh3D) # Reset boundaries count count = 0 @@ -565,7 +566,7 @@ end # Initialize connectivity between elements and mortars -function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh{3}) +function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh3D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) diff --git a/src/solvers/dg/dg.jl b/src/solvers/dg/dg.jl index 391fed5175b..4e3d086c257 100644 --- a/src/solvers/dg/dg.jl +++ b/src/solvers/dg/dg.jl @@ -1,6 +1,6 @@ # Abstract supertype for DG-type solvers # `POLYDEG` corresponds to `N` in the school of Kopriva -abstract type AbstractDg{NDIMS, POLYDEG} <: AbstractSolver{NDIMS} end +abstract type AbstractDg{NDIMS, POLYDEG, MeshType} <: AbstractSolver{NDIMS} end @inline Base.ndims(dg::AbstractDg) = ndims(equations(dg)) @@ -19,6 +19,9 @@ abstract type AbstractDg{NDIMS, POLYDEG} <: AbstractSolver{NDIMS} end # Return number of degrees of freedom @inline ndofs(dg::AbstractDg) = dg.n_elements * nnodes(dg)^ndims(dg) +@inline uses_mpi(::AbstractDg{NDIMS, POLYDEG, TreeMesh{ParallelTree{NDIMS}}}) where {NDIMS, POLYDEG}= Val(true) +@inline uses_mpi(::AbstractDg{NDIMS, POLYDEG, TreeMesh{Tree{NDIMS}}}) where {NDIMS, POLYDEG} = Val(false) + """ get_node_coords(x, dg::AbstractDg, indices...) From f7d24632b2387438418e41d7fc742af8b41521fb Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Wed, 9 Sep 2020 16:49:05 +0200 Subject: [PATCH 23/81] Remove mesh from Dg2D struct again --- src/solvers/dg/2d/dg.jl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index fe1ce95e49b..c0d66ec3570 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -6,7 +6,6 @@ mutable struct Dg2D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, InverseVandermondeLegendre, MortarMatrix, VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{2, POLYDEG, MeshType} equations::Eqn - mesh_that_should_not_be_used::MeshType surface_flux_function::SurfaceFlux volume_flux_function::VolumeFlux @@ -125,11 +124,13 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Initialize interpolation data structures n_nodes = POLYDEG + 1 nodes, weights = gauss_lobatto_nodes_weights(n_nodes) + nodes = SVector{POLYDEG+1}(nodes) inverse_weights = 1 ./ weights _, inverse_vandermonde_legendre = vandermonde_legendre(nodes) lhat = zeros(n_nodes, 2) lhat[:, 1] = calc_lhat(-1.0, nodes, weights) lhat[:, 2] = calc_lhat( 1.0, nodes, weights) + lhat = SMatrix{POLYDEG+1,2}(lhat) # Initialize differentiation operator volume_integral_type = Val(Symbol(parameter("volume_integral_type", "weak_form", @@ -139,6 +140,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v volume_integral_type = Val(:weak_form) end dhat = calc_dhat(nodes, weights) + dhat = SMatrix{POLYDEG+1,POLYDEG+1}(dhat) dsplit = calc_dsplit(nodes, weights) dsplit_transposed = transpose(calc_dsplit(nodes, weights)) @@ -149,11 +151,18 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v l2mortar_reverse_lower = calc_reverse_lower(n_nodes, Val(:gauss)) ecmortar_reverse_upper = calc_reverse_upper(n_nodes, Val(:gauss_lobatto)) ecmortar_reverse_lower = calc_reverse_lower(n_nodes, Val(:gauss_lobatto)) + mortar_forward_upper = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper) + mortar_forward_lower = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower) + l2mortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper) + l2mortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower) + ecmortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_upper) + ecmortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_lower) # Initialize data structures for error analysis (by default, we use twice the # number of analysis nodes as the normal solution) analysis_polydeg = 2 * (n_nodes) - 1 analysis_nodes, analysis_weights = gauss_lobatto_nodes_weights(analysis_polydeg + 1) + analysis_nodes = SVector{analysis_polydeg+1}(analysis_nodes) analysis_weights_volume = analysis_weights analysis_vandermonde = polynomial_interpolation_matrix(nodes, analysis_nodes) analysis_total_volume = mesh.tree.length_level_0^ndims(mesh) @@ -250,8 +259,11 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v initial_state_integrals = Vector{Float64}() # Create actual DG solver instance - dg = Dg2D( - equation, mesh, + dg = Dg2D{typeof(equation), typeof(mesh), NVARS, POLYDEG, typeof(surface_flux_function), typeof(volume_flux_function), + typeof(initial_conditions), typeof(source_terms), typeof(mortar_type), typeof(volume_integral_type), typeof(shock_indicator_variable), + typeof(nodes), typeof(dhat), typeof(lhat), typeof(inverse_vandermonde_legendre), typeof(mortar_forward_upper), + typeof(analysis_nodes), typeof(analysis_vandermonde)}( + equation, surface_flux_function, volume_flux_function, initial_conditions, source_terms, elements, n_elements, From d8212caaeb63cb8bad4d9e894e476d52cca55426 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Thu, 10 Sep 2020 05:27:32 +0200 Subject: [PATCH 24/81] Also clean up Dg3D constructor --- src/solvers/dg/2d/dg.jl | 52 ++++++++++++++++++++++++----------------- src/solvers/dg/3d/dg.jl | 40 +++++++++++++++++++++++-------- 2 files changed, 61 insertions(+), 31 deletions(-) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index c0d66ec3570..2a7fcdbb3e8 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -124,13 +124,11 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Initialize interpolation data structures n_nodes = POLYDEG + 1 nodes, weights = gauss_lobatto_nodes_weights(n_nodes) - nodes = SVector{POLYDEG+1}(nodes) inverse_weights = 1 ./ weights _, inverse_vandermonde_legendre = vandermonde_legendre(nodes) lhat = zeros(n_nodes, 2) lhat[:, 1] = calc_lhat(-1.0, nodes, weights) lhat[:, 2] = calc_lhat( 1.0, nodes, weights) - lhat = SMatrix{POLYDEG+1,2}(lhat) # Initialize differentiation operator volume_integral_type = Val(Symbol(parameter("volume_integral_type", "weak_form", @@ -140,7 +138,6 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v volume_integral_type = Val(:weak_form) end dhat = calc_dhat(nodes, weights) - dhat = SMatrix{POLYDEG+1,POLYDEG+1}(dhat) dsplit = calc_dsplit(nodes, weights) dsplit_transposed = transpose(calc_dsplit(nodes, weights)) @@ -151,18 +148,11 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v l2mortar_reverse_lower = calc_reverse_lower(n_nodes, Val(:gauss)) ecmortar_reverse_upper = calc_reverse_upper(n_nodes, Val(:gauss_lobatto)) ecmortar_reverse_lower = calc_reverse_lower(n_nodes, Val(:gauss_lobatto)) - mortar_forward_upper = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper) - mortar_forward_lower = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower) - l2mortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper) - l2mortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower) - ecmortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_upper) - ecmortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_lower) # Initialize data structures for error analysis (by default, we use twice the # number of analysis nodes as the normal solution) analysis_polydeg = 2 * (n_nodes) - 1 analysis_nodes, analysis_weights = gauss_lobatto_nodes_weights(analysis_polydeg + 1) - analysis_nodes = SVector{analysis_polydeg+1}(analysis_nodes) analysis_weights_volume = analysis_weights analysis_vandermonde = polynomial_interpolation_matrix(nodes, analysis_nodes) analysis_total_volume = mesh.tree.length_level_0^ndims(mesh) @@ -258,11 +248,31 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Store initial state integrals for conservation error calculation initial_state_integrals = Vector{Float64}() + # Convert all performance-critical fields to StaticArrays types + nodes = SVector{POLYDEG+1}(nodes) + weights = SVector{POLYDEG+1}(weights) + inverse_weights = SVector{POLYDEG+1}(inverse_weights) + lhat = SMatrix{POLYDEG+1,2}(lhat) + dhat = SMatrix{POLYDEG+1,POLYDEG+1}(dhat) + dsplit = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit) + dsplit_transposed = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed) + mortar_forward_upper = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper) + mortar_forward_lower = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower) + l2mortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper) + l2mortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower) + ecmortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_upper) + ecmortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_lower) + analysis_nodes = SVector{analysis_polydeg+1}(analysis_nodes) + analysis_weights = SVector{analysis_polydeg+1}(analysis_weights) + analysis_weights_volume = SVector{analysis_polydeg+1}(analysis_weights_volume) + # Create actual DG solver instance - dg = Dg2D{typeof(equation), typeof(mesh), NVARS, POLYDEG, typeof(surface_flux_function), typeof(volume_flux_function), - typeof(initial_conditions), typeof(source_terms), typeof(mortar_type), typeof(volume_integral_type), typeof(shock_indicator_variable), - typeof(nodes), typeof(dhat), typeof(lhat), typeof(inverse_vandermonde_legendre), typeof(mortar_forward_upper), - typeof(analysis_nodes), typeof(analysis_vandermonde)}( + dg = Dg2D{typeof(equation), typeof(mesh), NVARS, POLYDEG, + typeof(surface_flux_function), typeof(volume_flux_function), typeof(initial_conditions), + typeof(source_terms), + typeof(mortar_type), typeof(volume_integral_type), typeof(shock_indicator_variable), + typeof(nodes), typeof(dhat), typeof(lhat), typeof(inverse_vandermonde_legendre), + typeof(mortar_forward_upper), typeof(analysis_nodes), typeof(analysis_vandermonde)}( equation, surface_flux_function, volume_flux_function, initial_conditions, source_terms, @@ -273,14 +283,14 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v mortar_type, l2mortars, n_l2mortars, ecmortars, n_ecmortars, - SVector{POLYDEG+1}(nodes), SVector{POLYDEG+1}(weights), SVector{POLYDEG+1}(inverse_weights), - inverse_vandermonde_legendre, SMatrix{POLYDEG+1,2}(lhat), + nodes, weights, inverse_weights, + inverse_vandermonde_legendre, lhat, volume_integral_type, - SMatrix{POLYDEG+1,POLYDEG+1}(dhat), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed), - SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper), SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower), - SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper), SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower), - SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_upper), SMatrix{POLYDEG+1,POLYDEG+1}(ecmortar_reverse_lower), - SVector{analysis_polydeg+1}(analysis_nodes), SVector{analysis_polydeg+1}(analysis_weights), SVector{analysis_polydeg+1}(analysis_weights_volume), + dhat, dsplit, dsplit_transposed, + mortar_forward_upper, mortar_forward_lower, + l2mortar_reverse_upper, l2mortar_reverse_lower, + ecmortar_reverse_upper, ecmortar_reverse_lower, + analysis_nodes, analysis_weights, analysis_weights_volume, analysis_vandermonde, analysis_total_volume, analysis_quantities, save_analysis, analysis_filename, shock_indicator_variable, shock_alpha_max, shock_alpha_min, shock_alpha_smooth, diff --git a/src/solvers/dg/3d/dg.jl b/src/solvers/dg/3d/dg.jl index d4dfb79a402..0a8b623d4bd 100644 --- a/src/solvers/dg/3d/dg.jl +++ b/src/solvers/dg/3d/dg.jl @@ -6,7 +6,6 @@ mutable struct Dg3D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, InverseVandermondeLegendre, MortarMatrix, VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{3, POLYDEG, MeshType} equations::Eqn - mesh_that_should_not_be_used::MeshType surface_flux_function::SurfaceFlux volume_flux_function::VolumeFlux @@ -125,8 +124,8 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Initialize data structures for error analysis (by default, we use twice the # number of analysis nodes as the normal solution) - NAna = 2 * (n_nodes) - 1 - analysis_nodes, analysis_weights = gauss_lobatto_nodes_weights(NAna + 1) + analysis_polydeg = 2 * (n_nodes) - 1 + analysis_nodes, analysis_weights = gauss_lobatto_nodes_weights(analysis_polydeg + 1) analysis_weights_volume = analysis_weights analysis_vandermonde = polynomial_interpolation_matrix(nodes, analysis_nodes) analysis_total_volume = mesh.tree.length_level_0^ndims(mesh) @@ -188,9 +187,30 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Store initial state integrals for conservation error calculation initial_state_integrals = Vector{Float64}() + # Convert all performance-critical fields to StaticArrays types + nodes = SVector{POLYDEG+1}(nodes) + weights = SVector{POLYDEG+1}(weights) + inverse_weights = SVector{POLYDEG+1}(inverse_weights) + lhat = SMatrix{POLYDEG+1,2}(lhat) + dhat = SMatrix{POLYDEG+1,POLYDEG+1}(dhat) + dsplit = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit) + dsplit_transposed = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed) + mortar_forward_upper = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper) + mortar_forward_lower = SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower) + l2mortar_reverse_upper = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper) + l2mortar_reverse_lower = SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower) + analysis_nodes = SVector{analysis_polydeg+1}(analysis_nodes) + analysis_weights = SVector{analysis_polydeg+1}(analysis_weights) + analysis_weights_volume = SVector{analysis_polydeg+1}(analysis_weights_volume) + # Create actual DG solver instance - dg = Dg3D( - equation, mesh, + dg = Dg3D{typeof(equation), typeof(mesh), NVARS, POLYDEG, + typeof(surface_flux_function), typeof(volume_flux_function), typeof(initial_conditions), + typeof(source_terms), + typeof(mortar_type), typeof(volume_integral_type), typeof(shock_indicator_variable), + typeof(nodes), typeof(dhat), typeof(lhat), typeof(inverse_vandermonde_legendre), + typeof(mortar_forward_upper), typeof(analysis_nodes), typeof(analysis_vandermonde)}( + equation, surface_flux_function, volume_flux_function, initial_conditions, source_terms, elements, n_elements, @@ -198,13 +218,13 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v boundaries, n_boundaries, mortar_type, l2mortars, n_l2mortars, - SVector{POLYDEG+1}(nodes), SVector{POLYDEG+1}(weights), SVector{POLYDEG+1}(inverse_weights), + nodes, weights, inverse_weights, inverse_vandermonde_legendre, SMatrix{POLYDEG+1,2}(lhat), volume_integral_type, - SMatrix{POLYDEG+1,POLYDEG+1}(dhat), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed), - SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_upper), SMatrix{POLYDEG+1,POLYDEG+1}(mortar_forward_lower), - SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_upper), SMatrix{POLYDEG+1,POLYDEG+1}(l2mortar_reverse_lower), - SVector{NAna+1}(analysis_nodes), SVector{NAna+1}(analysis_weights), SVector{NAna+1}(analysis_weights_volume), + dhat, dsplit, dsplit_transposed, + mortar_forward_upper, mortar_forward_lower, + l2mortar_reverse_upper, l2mortar_reverse_lower, + analysis_nodes, analysis_weights, analysis_weights_volume, analysis_vandermonde, analysis_total_volume, analysis_quantities, save_analysis, analysis_filename, shock_indicator_variable, shock_alpha_max, shock_alpha_min, shock_alpha_smooth, From baf999dcc7709b8a9db7631a58a4fbed05074cc9 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Thu, 10 Sep 2020 12:56:12 +0200 Subject: [PATCH 25/81] Store `n_elements_by_domain` in solver for MPI Gatherv/Scatterv operations --- src/solvers/dg/2d/dg.jl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 2a7fcdbb3e8..220dab21010 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -73,6 +73,7 @@ mutable struct Dg2D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, mpi_recv_buffers::Vector{Vector{Float64}} mpi_send_requests::Vector{MPI.Request} mpi_recv_requests::Vector{MPI.Request} + n_elements_by_domain::OffsetArray{Int, 1, Array{Int, 1}} n_elements_global::Int first_element_global_id::Int @@ -215,8 +216,15 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v mpi_recv_requests) = init_mpi_data_structures(mpi_neighbor_interfaces, Val(NDIMS), Val(NVARS), Val(POLYDEG)) - # Determine total number of elements and the global element id of the first element + # Determine local and total number of elements + n_elements_by_domain = Vector{Int}(undef, n_domains()) + n_elements_by_domain[domain_id() + 1] = n_elements + MPI.Allgather!(n_elements_by_domain, 1, mpi_comm()) + n_elements_by_domain = OffsetArray(n_elements_by_domain, 0:(n_domains() - 1)) n_elements_global = MPI.Allreduce(n_elements, +, mpi_comm()) + @assert n_elements_global == sum(n_elements_by_domain) "error in total number of elements" + + # Determine the global element id of the first element first_element_global_id = MPI.Exscan(n_elements, +, mpi_comm()) if is_mpi_root() # With Exscan, the result on the first rank is undefined @@ -232,6 +240,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v mpi_recv_buffers = Vector{Float64}[] mpi_send_requests = MPI.Request[] mpi_recv_requests = MPI.Request[] + n_elements_by_domain = OffsetArray([n_elements], 0:0) n_elements_global = n_elements first_element_global_id = 1 end @@ -297,7 +306,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v amr_indicator, amr_alpha_max, amr_alpha_min, amr_alpha_smooth, mpi_neighbor_domain_ids, mpi_neighbor_interfaces, mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests, - n_elements_global, first_element_global_id, + n_elements_by_domain, n_elements_global, first_element_global_id, element_variables, cache, thread_cache, initial_state_integrals) From 67d2c73b1cd0c72794d3b1bbad35218c248fd9be Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Thu, 10 Sep 2020 16:15:16 +0200 Subject: [PATCH 26/81] Writing restart files in parallel works (it seems) --- src/io/io.jl | 86 ++++++++++++++++++++++++++++++++++------- src/solvers/dg/2d/dg.jl | 4 +- 2 files changed, 74 insertions(+), 16 deletions(-) diff --git a/src/io/io.jl b/src/io/io.jl index 7c320f0055a..1f4ee765407 100644 --- a/src/io/io.jl +++ b/src/io/io.jl @@ -75,7 +75,7 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep) attrs(file)["equations"] = get_name(equation) attrs(file)["polydeg"] = polydeg(dg) attrs(file)["n_vars"] = nvariables(dg) - attrs(file)["n_elements"] = dg.n_elements + attrs(file)["n_elements"] = dg.n_elements_global attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] attrs(file)["time"] = time attrs(file)["dt"] = dt @@ -85,20 +85,78 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep) data = dg.elements.u varnames = varnames_cons(equation) - # Store each variable of the solution - for v in 1:nvariables(dg) - # Convert to 1D array - if ndims(dg) == 2 - file["variables_$v"] = vec(data[v, :, :, :]) - elseif ndims(dg) == 3 - file["variables_$v"] = vec(data[v, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) + # If in parallel, only write from MPI root (poor man's version of parallel I/O) + if is_parallel() # Parallel I/O version + element_size = nnodes(dg)^ndims(dg) + counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) + + # Store data in buffer + if is_mpi_root() + first_buffer_index = (dg.first_element_global_id - 1) * element_size + 1 + local_data_size = element_size * dg.n_elements + last_buffer_index = first_buffer_index + local_data_size - 1 + + # Create buffer for global element data + buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements_global) + + # Store each variable of the solution + for v in 1:nvariables(dg) + # Convert to 1D array and store in global buffer + if ndims(dg) == 2 + buffer[first_buffer_index:last_buffer_index] = vec(data[v, :, :, :]) + elseif ndims(dg) == 3 + buffer[first_buffer_index:last_buffer_index] = vec(data[v, :, :, :, :]) + else + error("Unsupported number of spatial dimensions: ", ndims(dg)) + end + + # Collect data on root domain + # Note: `collect(...)` is required since we store domain info in OffsetArrays + MPI.Gatherv!(nothing, buffer, counts, mpi_root(), mpi_comm()) + + # Write to file + file["variables_$v"] = buffer + + # Add variable name as attribute + var = file["variables_$v"] + attrs(var)["name"] = varnames[v] + end + else # On non-root domains + # Create buffer for local element data + buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements) + + # Store each variable of the solution + for v in 1:nvariables(dg) + # Convert to 1D array and store in global buffer + if ndims(dg) == 2 + buffer[:] = vec(data[v, :, :, :]) + elseif ndims(dg) == 3 + buffer[:] = vec(data[v, :, :, :, :]) + else + error("Unsupported number of spatial dimensions: ", ndims(dg)) + end + + # Collect data on root domain + # Note: `collect(...)` is required since we store domain info in OffsetArrays + MPI.Gatherv!(buffer, nothing, counts, mpi_root(), mpi_comm()) + end + end + else # Serial I/O version + # Store each variable of the solution + for v in 1:nvariables(dg) + # Convert to 1D array + if ndims(dg) == 2 + file["variables_$v"] = vec(data[v, :, :, :]) + elseif ndims(dg) == 3 + file["variables_$v"] = vec(data[v, :, :, :, :]) + else + error("Unsupported number of spatial dimensions: ", ndims(dg)) + end + + # Add variable name as attribute + var = file["variables_$v"] + attrs(var)["name"] = varnames[v] end - - # Add variable name as attribute - var = file["variables_$v"] - attrs(var)["name"] = varnames[v] end end end diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 220dab21010..33e215b2db4 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -932,10 +932,10 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: " run time: " * @sprintf("%10.8e s", runtime_absolute)) println(" dt: " * @sprintf("%10.8e", dt) * " " * - " PID (total): " * @sprintf("%10.8e s", runtime_relative)) + " PID : " * @sprintf("%10.8e s", runtime_relative)) println(" sim. time: " * @sprintf("%10.8e", time) * " " * - " PID (serial): " * @sprintf("%10.8e s", runtime_relative * n_domains())) + " PID × #domains: " * @sprintf("%10.8e s", runtime_relative * n_domains())) end # Level information (only show for AMR) From 5533451d9822168ecb4d4523bb838dc8c10002c1 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 19 Sep 2020 11:20:33 +0200 Subject: [PATCH 27/81] Fix errors from previous merge --- src/solvers/dg/2d/dg.jl | 4 ++-- src/solvers/dg/3d/dg.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 6f8b741ba1b..5e3c2368244 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -296,7 +296,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v elements, n_elements, interfaces, n_interfaces, mpi_interfaces, n_mpi_interfaces, - boundaries, n_boundaries, n_boundaries_per_direction,, + boundaries, n_boundaries, n_boundaries_per_direction, mortar_type, l2mortars, n_l2mortars, ecmortars, n_ecmortars, @@ -748,7 +748,7 @@ function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh2D) end -function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{2}) +function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh2D) # "eval is evil" # This is a temporary hack until we have switched to a library based approach # with pure Julia code instead of parameter files. diff --git a/src/solvers/dg/3d/dg.jl b/src/solvers/dg/3d/dg.jl index a3e882f3646..23571536b85 100644 --- a/src/solvers/dg/3d/dg.jl +++ b/src/solvers/dg/3d/dg.jl @@ -726,7 +726,7 @@ function init_mortar_connectivity!(elements, mortars, mesh::TreeMesh3D) end -function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{3}) +function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh3D) # "eval is evil" # This is a temporary hack until we have switched to a library based approach # with pure Julia code instead of parameter files. From da2c11c000a7a258061fb26468987ee3f18cee8a Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 21 Sep 2020 06:27:11 +0200 Subject: [PATCH 28/81] Initialize global MPI state in __init__() --- src/Trixi.jl | 20 ++++++++++++++++++++ src/parallel/parallel.jl | 17 ++++++++++++----- src/run.jl | 6 ------ 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index b1e96a9ddc0..9af5588feee 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -60,4 +60,24 @@ export flux_central, flux_lax_friedrichs, flux_hll, export examples_dir, get_examples, default_example +function __init__() + # Initialize MPI + init_mpi() + + # Initialize global MPI state + MPI_RANK[] = MPI.Comm_rank(mpi_comm()) + MPI_SIZE[] = MPI.Comm_size(mpi_comm()) + MPI_IS_PARALLEL[] = MPI_SIZE[] > 1 + MPI_IS_SERIAL[] = !MPI_IS_PARALLEL[] + MPI_IS_ROOT[] = MPI_IS_SERIAL[] || MPI_RANK[] == 0 + + # Initialize methods for dispatching on parallel execution + if MPI_IS_PARALLEL[] + eval(:(mpi_parallel() = Val{true})) + else + eval(:(mpi_parallel() = Val{false})) + end +end + + end diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index 57e93ab154e..bc29ff06b91 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -15,21 +15,28 @@ function init_mpi() end +const MPI_RANK = Ref(-1) +const MPI_SIZE = Ref(-1) +const MPI_IS_PARALLEL = Ref(false) +const MPI_IS_SERIAL = Ref(true) +const MPI_IS_ROOT = Ref(true) + + @inline mpi_comm() = MPI.COMM_WORLD @inline domain_id(comm) = MPI.Comm_rank(comm) -@inline domain_id() = MPI.Comm_rank(mpi_comm()) +@inline domain_id() = MPI_RANK[] @inline n_domains(comm) = MPI.Comm_size(comm) -@inline n_domains() = MPI.Comm_size(mpi_comm()) +@inline n_domains() = MPI_SIZE[] @inline is_parallel(comm) = n_domains(comm) > 1 -@inline is_parallel() = is_parallel(mpi_comm()) +@inline is_parallel() = MPI_IS_PARALLEL[] @inline is_serial(comm) = !is_parallel(comm) -@inline is_serial() = is_serial(mpi_comm()) +@inline is_serial() = MPI_IS_SERIAL[] @inline is_mpi_root(comm) = is_serial() || domain_id(comm) == 0 -@inline is_mpi_root() = is_mpi_root(mpi_comm()) +@inline is_mpi_root() = MPI_IS_ROOT[] @inline mpi_root() = 0 diff --git a/src/run.jl b/src/run.jl index 355e87141f2..e81fba2c58a 100644 --- a/src/run.jl +++ b/src/run.jl @@ -29,9 +29,6 @@ function run(parameters_file; verbose=false, refinement_level_increment=0, param # Reset timer reset_timer!(timer()) - # Initialize MPI - init_mpi() - # Read command line or keyword arguments and parse parameters file init_parameters(parameters_file; verbose=verbose, refinement_level_increment=refinement_level_increment, parameters...) @@ -462,9 +459,6 @@ refinement level will be increased by 1. Parameters can be overriden by specifyi additional keyword arguments, which are passed to the respective call to `run`.. """ function convtest(parameters_file, iterations; parameters...) - # Initialize MPI - init_mpi() - if is_mpi_root() @assert(iterations > 1, "Number of iterations must be bigger than 1 for a convergence analysis") end From 00aee1c5f442aae71de643dca64d5b2a5f04c1ea Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 21 Sep 2020 06:40:36 +0200 Subject: [PATCH 29/81] Make parse_parameters_file MPI-aware --- src/Trixi.jl | 4 ++-- src/auxiliary/auxiliary.jl | 31 +++++++++++++++---------------- src/run.jl | 2 +- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 9af5588feee..015ff84b68f 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -73,9 +73,9 @@ function __init__() # Initialize methods for dispatching on parallel execution if MPI_IS_PARALLEL[] - eval(:(mpi_parallel() = Val{true})) + eval(:(mpi_parallel() = Val(true))) else - eval(:(mpi_parallel() = Val{false})) + eval(:(mpi_parallel() = Val(false))) end end diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index dc37b65bddf..bd324509979 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -13,24 +13,23 @@ const parameters = Dict{Symbol,Any}() # Parse parameters file into global dict -function parse_parameters_file(filename) - if is_parallel() - # If parallel, read in file on root domain and distribute to other domains - if is_mpi_root() - buffer = read(filename) - buffer_length = Int[length(buffer)] - MPI.Bcast!(buffer_length, mpi_root(), mpi_comm()) - MPI.Bcast!(buffer, mpi_root(), mpi_comm()) - else - buffer_length = Int[0] - MPI.Bcast!(buffer_length, mpi_root(), mpi_comm()) - buffer = Vector{UInt8}(undef, buffer_length[1]) - MPI.Bcast!(buffer, mpi_root(), mpi_comm()) - end - parameters[:default] = parse(String(buffer)) +function parse_parameters_file(filename, mpi_parallel::Val{false}) + parameters[:default] = parsefile(filename) + parameters[:default]["parameters_file"] = filename +end +function parse_parameters_file(filename, mpi_parallel::Val{true}) + if is_mpi_root() + buffer = read(filename) + buffer_length = Int[length(buffer)] + MPI.Bcast!(buffer_length, mpi_root(), mpi_comm()) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) else - parameters[:default] = parsefile(filename) + buffer_length = Int[0] + MPI.Bcast!(buffer_length, mpi_root(), mpi_comm()) + buffer = Vector{UInt8}(undef, buffer_length[1]) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) end + parameters[:default] = parse(String(buffer)) parameters[:default]["parameters_file"] = filename end diff --git a/src/run.jl b/src/run.jl index e81fba2c58a..3b51e19b5df 100644 --- a/src/run.jl +++ b/src/run.jl @@ -52,7 +52,7 @@ function init_parameters(parameters_file=nothing; verbose=false, refinement_leve globals[:verbose] = verbose # Parse parameters file - @timeit timer() "read parameter file" parse_parameters_file(parameters_file) + @timeit timer() "read parameter file" parse_parameters_file(parameters_file, mpi_parallel()) # Override specified parameters for (parameter, value) in parameters From affacfb8aaef9e159229cf0639797e20300493d2 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 21 Sep 2020 10:46:58 +0200 Subject: [PATCH 30/81] Use MPI.COMM_WORLD directly --- src/Trixi.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 015ff84b68f..6ff21ad93c7 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -65,8 +65,8 @@ function __init__() init_mpi() # Initialize global MPI state - MPI_RANK[] = MPI.Comm_rank(mpi_comm()) - MPI_SIZE[] = MPI.Comm_size(mpi_comm()) + MPI_RANK[] = MPI.Comm_rank(MPI.COMM_WORLD) + MPI_SIZE[] = MPI.Comm_size(MPI.COMM_WORLD) MPI_IS_PARALLEL[] = MPI_SIZE[] > 1 MPI_IS_SERIAL[] = !MPI_IS_PARALLEL[] MPI_IS_ROOT[] = MPI_IS_SERIAL[] || MPI_RANK[] == 0 From 10f94a8942438b7db7c3ca495370aec097a4d68c Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Tue, 22 Sep 2020 06:23:07 +0200 Subject: [PATCH 31/81] Make save_xxx_file MPI-aware (parallel noet yet fully working) --- src/io/io.jl | 106 ++++------------- src/io/parallel.jl | 241 +++++++++++++++++++++++++++++++++++++++ src/run.jl | 6 +- src/run_euler_gravity.jl | 8 +- 4 files changed, 269 insertions(+), 92 deletions(-) create mode 100644 src/io/parallel.jl diff --git a/src/io/io.jl b/src/io/io.jl index 1f4ee765407..c8989de0246 100644 --- a/src/io/io.jl +++ b/src/io/io.jl @@ -1,3 +1,4 @@ +include("parallel.jl") # Load restart file and store solution in solver function load_restart_file!(dg::AbstractDg, restart_filename) @@ -54,7 +55,8 @@ end # Save current DG solution with some context information as a HDF5 file for # restarting. -function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep) +function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, + mpi_parallel::Val{false}) # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") mkpath(output_directory) @@ -85,78 +87,20 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep) data = dg.elements.u varnames = varnames_cons(equation) - # If in parallel, only write from MPI root (poor man's version of parallel I/O) - if is_parallel() # Parallel I/O version - element_size = nnodes(dg)^ndims(dg) - counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) - - # Store data in buffer - if is_mpi_root() - first_buffer_index = (dg.first_element_global_id - 1) * element_size + 1 - local_data_size = element_size * dg.n_elements - last_buffer_index = first_buffer_index + local_data_size - 1 - - # Create buffer for global element data - buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements_global) - - # Store each variable of the solution - for v in 1:nvariables(dg) - # Convert to 1D array and store in global buffer - if ndims(dg) == 2 - buffer[first_buffer_index:last_buffer_index] = vec(data[v, :, :, :]) - elseif ndims(dg) == 3 - buffer[first_buffer_index:last_buffer_index] = vec(data[v, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) - end - - # Collect data on root domain - # Note: `collect(...)` is required since we store domain info in OffsetArrays - MPI.Gatherv!(nothing, buffer, counts, mpi_root(), mpi_comm()) - - # Write to file - file["variables_$v"] = buffer - - # Add variable name as attribute - var = file["variables_$v"] - attrs(var)["name"] = varnames[v] - end - else # On non-root domains - # Create buffer for local element data - buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements) - - # Store each variable of the solution - for v in 1:nvariables(dg) - # Convert to 1D array and store in global buffer - if ndims(dg) == 2 - buffer[:] = vec(data[v, :, :, :]) - elseif ndims(dg) == 3 - buffer[:] = vec(data[v, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) - end - - # Collect data on root domain - # Note: `collect(...)` is required since we store domain info in OffsetArrays - MPI.Gatherv!(buffer, nothing, counts, mpi_root(), mpi_comm()) - end - end - else # Serial I/O version - # Store each variable of the solution - for v in 1:nvariables(dg) - # Convert to 1D array - if ndims(dg) == 2 - file["variables_$v"] = vec(data[v, :, :, :]) - elseif ndims(dg) == 3 - file["variables_$v"] = vec(data[v, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) - end - - # Add variable name as attribute - var = file["variables_$v"] - attrs(var)["name"] = varnames[v] + # Store each variable of the solution + for v in 1:nvariables(dg) + # Convert to 1D array + if ndims(dg) == 2 + file["variables_$v"] = vec(data[v, :, :, :]) + elseif ndims(dg) == 3 + file["variables_$v"] = vec(data[v, :, :, :, :]) + else + error("Unsupported number of spatial dimensions: ", ndims(dg)) end + + # Add variable name as attribute + var = file["variables_$v"] + attrs(var)["name"] = varnames[v] end end end @@ -164,7 +108,11 @@ end # Save current DG solution with some context information as a HDF5 file for # postprocessing. -function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system="") +function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, mpi_parallel) + return save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, "", mpi_parallel) +end +function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system, + mpi_parallel::Val{false}) # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") mkpath(output_directory) @@ -195,18 +143,6 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, attrs(file)["dt"] = dt attrs(file)["timestep"] = timestep - # Add coordinates as 1D arrays - if ndims(dg) == 2 - file["x"] = vec(dg.elements.node_coordinates[1, :, :, :]) - file["y"] = vec(dg.elements.node_coordinates[2, :, :, :]) - elseif ndims(dg) == 3 - file["x"] = vec(dg.elements.node_coordinates[1, :, :, :, :]) - file["y"] = vec(dg.elements.node_coordinates[2, :, :, :, :]) - file["z"] = vec(dg.elements.node_coordinates[3, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) - end - # Convert to primitive variables if requested solution_variables = parameter("solution_variables", "primitive", valid=["conservative", "primitive"]) diff --git a/src/io/parallel.jl b/src/io/parallel.jl new file mode 100644 index 00000000000..bc85c2bdd0a --- /dev/null +++ b/src/io/parallel.jl @@ -0,0 +1,241 @@ +function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, + mpi_parallel::Val{true}) + # Create output directory (if it does not exist) + output_directory = parameter("output_directory", "out") + if is_mpi_root() + mkpath(output_directory) + end + + # Filename without extension based on current time step + filename = joinpath(output_directory, @sprintf("restart_%06d", timestep)) + + # Convert time and time step size to floats + time = convert(Float64, time) + dt = convert(Float64, dt) + + # Open file (clobber existing content) + h5open(filename * ".h5", "w") do file + equation = equations(dg) + + # Add context information as attributes + attrs(file)["ndims"] = ndims(dg) + attrs(file)["equations"] = get_name(equation) + attrs(file)["polydeg"] = polydeg(dg) + attrs(file)["n_vars"] = nvariables(dg) + attrs(file)["n_elements"] = dg.n_elements_global + attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attrs(file)["time"] = time + attrs(file)["dt"] = dt + attrs(file)["timestep"] = timestep + + # Restart files always store conservative variables + data = dg.elements.u + varnames = varnames_cons(equation) + + # Only write from MPI root (poor man's version of parallel I/O) + element_size = nnodes(dg)^ndims(dg) + counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) + + # Store data in buffer + if is_mpi_root() + first_buffer_index = (dg.first_element_global_id - 1) * element_size + 1 + local_data_size = element_size * dg.n_elements + last_buffer_index = first_buffer_index + local_data_size - 1 + + # Create buffer for global element data + buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements_global) + + # Store each variable of the solution + for v in 1:nvariables(dg) + # Convert to 1D array and store in global buffer + if ndims(dg) == 2 + buffer[first_buffer_index:last_buffer_index] = vec(data[v, :, :, :]) + elseif ndims(dg) == 3 + buffer[first_buffer_index:last_buffer_index] = vec(data[v, :, :, :, :]) + else + error("Unsupported number of spatial dimensions: ", ndims(dg)) + end + + # Collect data on root domain + # Note: `collect(...)` is required since we store domain info in OffsetArrays + MPI.Gatherv!(nothing, buffer, counts, mpi_root(), mpi_comm()) + + # Write to file + file["variables_$v"] = buffer + + # Add variable name as attribute + var = file["variables_$v"] + attrs(var)["name"] = varnames[v] + end + else # On non-root domains + # Create buffer for local element data + buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements) + + # Store each variable of the solution + for v in 1:nvariables(dg) + # Convert to 1D array and store in global buffer + if ndims(dg) == 2 + buffer[:] = vec(data[v, :, :, :]) + elseif ndims(dg) == 3 + buffer[:] = vec(data[v, :, :, :, :]) + else + error("Unsupported number of spatial dimensions: ", ndims(dg)) + end + + # Collect data on root domain + # Note: `collect(...)` is required since we store domain info in OffsetArrays + MPI.Gatherv!(buffer, nothing, counts, mpi_root(), mpi_comm()) + end + end + end +end + + +# Save current DG solution with some context information as a HDF5 file for +# postprocessing. +function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system, + mpi_parallel::Val{true}) + # Create output directory (if it does not exist) + output_directory = parameter("output_directory", "out") + if is_mpi_root() + mkpath(output_directory) + end + + # Filename without extension based on current time step + if isempty(system) + filename = joinpath(output_directory, @sprintf("solution_%06d", timestep)) + else + filename = joinpath(output_directory, @sprintf("solution_%s_%06d", system, timestep)) + end + + # Convert time and time step size to floats + time = convert(Float64, time) + dt = convert(Float64, dt) + + # Open file (clobber existing content) + h5open(filename * ".h5", "w") do file + equation = equations(dg) + + # Add context information as attributes + attrs(file)["ndims"] = ndims(dg) + attrs(file)["equations"] = get_name(equation) + attrs(file)["polydeg"] = polydeg(dg) + attrs(file)["n_vars"] = nvariables(dg) + attrs(file)["n_elements"] = dg.n_elements + attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attrs(file)["time"] = time + attrs(file)["dt"] = dt + attrs(file)["timestep"] = timestep + + # Convert to primitive variables if requested + solution_variables = parameter("solution_variables", "primitive", + valid=["conservative", "primitive"]) + if solution_variables == "conservative" + data = dg.elements.u + varnames = varnames_cons(equation) + else + # Reinterpret the solution array as an array of conservative variables, + # compute the primitive variables via broadcasting, and reinterpret the + # result as a plain array of floating point numbers + data = Array(reinterpret(eltype(dg.elements.u), + cons2prim.(reinterpret(SVector{nvariables(dg),eltype(dg.elements.u)}, dg.elements.u), + Ref(equations(dg))))) + varnames = varnames_prim(equation) + end + + # Only write from MPI root (poor man's version of parallel I/O) + element_size = nnodes(dg)^ndims(dg) + counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) + + # Store data in buffer + if is_mpi_root() + first_buffer_index = (dg.first_element_global_id - 1) * element_size + 1 + local_data_size = element_size * dg.n_elements + last_buffer_index = first_buffer_index + local_data_size - 1 + + # Create buffer for global element data + buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements_global) + + # Store each variable of the solution + for v in 1:nvariables(dg) + # Convert to 1D array + if ndims(dg) == 2 + file["variables_$v"] = vec(data[v, :, :, :]) + elseif ndims(dg) == 3 + file["variables_$v"] = vec(data[v, :, :, :, :]) + else + error("Unsupported number of spatial dimensions: ", ndims(dg)) + end + + # Add variable name as attribute + var = file["variables_$v"] + attrs(var)["name"] = varnames[v] + end + + # Store element variables + for (v, (key, element_variables)) in enumerate(dg.element_variables) + # Add to file + file["element_variables_$v"] = element_variables + + # Add variable name as attribute + var = file["element_variables_$v"] + attrs(var)["name"] = string(key) + end + else # On non-root domains + # Add coordinates as 1D arrays + if ndims(dg) == 2 + file["x"] = vec(dg.elements.node_coordinates[1, :, :, :]) + file["y"] = vec(dg.elements.node_coordinates[2, :, :, :]) + elseif ndims(dg) == 3 + file["x"] = vec(dg.elements.node_coordinates[1, :, :, :, :]) + file["y"] = vec(dg.elements.node_coordinates[2, :, :, :, :]) + file["z"] = vec(dg.elements.node_coordinates[3, :, :, :, :]) + else + error("Unsupported number of spatial dimensions: ", ndims(dg)) + end + + # Convert to primitive variables if requested + solution_variables = parameter("solution_variables", "primitive", + valid=["conservative", "primitive"]) + if solution_variables == "conservative" + data = dg.elements.u + varnames = varnames_cons(equation) + else + # Reinterpret the solution array as an array of conservative variables, + # compute the primitive variables via broadcasting, and reinterpret the + # result as a plain array of floating point numbers + data = Array(reinterpret(eltype(dg.elements.u), + cons2prim.(reinterpret(SVector{nvariables(dg),eltype(dg.elements.u)}, dg.elements.u), + Ref(equations(dg))))) + varnames = varnames_prim(equation) + end + + # Store each variable of the solution + for v in 1:nvariables(dg) + # Convert to 1D array + if ndims(dg) == 2 + file["variables_$v"] = vec(data[v, :, :, :]) + elseif ndims(dg) == 3 + file["variables_$v"] = vec(data[v, :, :, :, :]) + else + error("Unsupported number of spatial dimensions: ", ndims(dg)) + end + + # Add variable name as attribute + var = file["variables_$v"] + attrs(var)["name"] = varnames[v] + end + + # Store element variables + for (v, (key, element_variables)) in enumerate(dg.element_variables) + # Add to file + file["element_variables_$v"] = element_variables + + # Add variable name as attribute + var = file["element_variables_$v"] + attrs(var)["name"] = string(key) + end + end + end +end + diff --git a/src/run.jl b/src/run.jl index 3b51e19b5df..f49bd873b7c 100644 --- a/src/run.jl +++ b/src/run.jl @@ -252,7 +252,7 @@ function init_simulation() # we need to make sure, that derived quantities, such as e.g. blending # factor is already computed for the initial condition @notimeit timer() rhs!(solver, time) - save_solution_file(solver, mesh, time, 0, step) + save_solution_file(solver, mesh, time, 0, step, mpi_parallel()) end # Print initial solution analysis and initialize solution analysis @@ -397,7 +397,7 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function end # Then write solution file - save_solution_file(solver, mesh, time, dt, step) + save_solution_file(solver, mesh, time, dt, step, mpi_parallel()) end output_time += time_ns() - output_start_time end @@ -414,7 +414,7 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function end # Then write restart file - save_restart_file(solver, mesh, time, dt, step) + save_restart_file(solver, mesh, time, dt, step, mpi_parallel()) end output_time += time_ns() - output_start_time end diff --git a/src/run_euler_gravity.jl b/src/run_euler_gravity.jl index 11aae83f16c..3812da9a25c 100644 --- a/src/run_euler_gravity.jl +++ b/src/run_euler_gravity.jl @@ -176,10 +176,10 @@ function init_simulation_euler_gravity() # we need to make sure, that derived quantities, such as e.g. blending # factor is already computed for the initial condition @notimeit timer() rhs!(solver, time) - save_solution_file(solver, mesh, time, 0, step, "euler") + save_solution_file(solver, mesh, time, 0, step, "euler", mpi_parallel()) @notimeit timer() rhs!(solver_gravity, time) - save_solution_file(solver_gravity, mesh, time, 0, step, "gravity") + save_solution_file(solver_gravity, mesh, time, 0, step, "gravity", mpi_parallel()) end # Print initial solution analysis and initialize solution analysis if analysis_interval > 0 @@ -320,8 +320,8 @@ function run_simulation_euler_gravity(mesh, solvers, time_parameters, time_integ end # Then write solution file - save_solution_file(solver, mesh, time, dt, step, "euler") - save_solution_file(solver_gravity, mesh, time, dt, step, "gravity") + save_solution_file(solver, mesh, time, dt, step, "euler", mpi_parallel()) + save_solution_file(solver_gravity, mesh, time, dt, step, "gravity", mpi_parallel()) end output_time += time_ns() - output_start_time end From 527fe01f1aa9be2ca3706e4f09df145af219d875 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Tue, 22 Sep 2020 21:09:59 +0200 Subject: [PATCH 32/81] save_restart_file and save_solution_file work in parallel --- src/io/parallel.jl | 141 ++++++++------------------------------------- 1 file changed, 24 insertions(+), 117 deletions(-) diff --git a/src/io/parallel.jl b/src/io/parallel.jl index bc85c2bdd0a..3d17f768902 100644 --- a/src/io/parallel.jl +++ b/src/io/parallel.jl @@ -36,30 +36,13 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, element_size = nnodes(dg)^ndims(dg) counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) - # Store data in buffer - if is_mpi_root() - first_buffer_index = (dg.first_element_global_id - 1) * element_size + 1 - local_data_size = element_size * dg.n_elements - last_buffer_index = first_buffer_index + local_data_size - 1 - - # Create buffer for global element data - buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements_global) - - # Store each variable of the solution - for v in 1:nvariables(dg) - # Convert to 1D array and store in global buffer - if ndims(dg) == 2 - buffer[first_buffer_index:last_buffer_index] = vec(data[v, :, :, :]) - elseif ndims(dg) == 3 - buffer[first_buffer_index:last_buffer_index] = vec(data[v, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) - end - - # Collect data on root domain - # Note: `collect(...)` is required since we store domain info in OffsetArrays - MPI.Gatherv!(nothing, buffer, counts, mpi_root(), mpi_comm()) + # Store each variable of the solution + for v in 1:nvariables(dg) + # Collect data on root domain + buffer = MPI.Gatherv(vec(data[v, .., :]), counts, mpi_root(), mpi_comm()) + # Write only from root domain + if is_mpi_root() # Write to file file["variables_$v"] = buffer @@ -67,25 +50,6 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, var = file["variables_$v"] attrs(var)["name"] = varnames[v] end - else # On non-root domains - # Create buffer for local element data - buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements) - - # Store each variable of the solution - for v in 1:nvariables(dg) - # Convert to 1D array and store in global buffer - if ndims(dg) == 2 - buffer[:] = vec(data[v, :, :, :]) - elseif ndims(dg) == 3 - buffer[:] = vec(data[v, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) - end - - # Collect data on root domain - # Note: `collect(...)` is required since we store domain info in OffsetArrays - MPI.Gatherv!(buffer, nothing, counts, mpi_root(), mpi_comm()) - end end end end @@ -121,7 +85,7 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, attrs(file)["equations"] = get_name(equation) attrs(file)["polydeg"] = polydeg(dg) attrs(file)["n_vars"] = nvariables(dg) - attrs(file)["n_elements"] = dg.n_elements + attrs(file)["n_elements"] = dg.n_elements_global attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] attrs(file)["time"] = time attrs(file)["dt"] = dt @@ -145,91 +109,34 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, # Only write from MPI root (poor man's version of parallel I/O) element_size = nnodes(dg)^ndims(dg) - counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) + counts_elements = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) + counts_nodes = counts_elements * Cint(element_size) - # Store data in buffer - if is_mpi_root() - first_buffer_index = (dg.first_element_global_id - 1) * element_size + 1 - local_data_size = element_size * dg.n_elements - last_buffer_index = first_buffer_index + local_data_size - 1 + # Store each variable of the solution + for v in 1:nvariables(dg) + # Collect data on root domain + buffer = MPI.Gatherv(vec(data[v, .., :]), counts_nodes, mpi_root(), mpi_comm()) - # Create buffer for global element data - buffer = Vector{eltype(data)}(undef, element_size * dg.n_elements_global) - - # Store each variable of the solution - for v in 1:nvariables(dg) + # Write only from root domain + if is_mpi_root() # Convert to 1D array - if ndims(dg) == 2 - file["variables_$v"] = vec(data[v, :, :, :]) - elseif ndims(dg) == 3 - file["variables_$v"] = vec(data[v, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) - end + file["variables_$v"] = buffer # Add variable name as attribute var = file["variables_$v"] attrs(var)["name"] = varnames[v] end + end - # Store element variables - for (v, (key, element_variables)) in enumerate(dg.element_variables) - # Add to file - file["element_variables_$v"] = element_variables - - # Add variable name as attribute - var = file["element_variables_$v"] - attrs(var)["name"] = string(key) - end - else # On non-root domains - # Add coordinates as 1D arrays - if ndims(dg) == 2 - file["x"] = vec(dg.elements.node_coordinates[1, :, :, :]) - file["y"] = vec(dg.elements.node_coordinates[2, :, :, :]) - elseif ndims(dg) == 3 - file["x"] = vec(dg.elements.node_coordinates[1, :, :, :, :]) - file["y"] = vec(dg.elements.node_coordinates[2, :, :, :, :]) - file["z"] = vec(dg.elements.node_coordinates[3, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) - end - - # Convert to primitive variables if requested - solution_variables = parameter("solution_variables", "primitive", - valid=["conservative", "primitive"]) - if solution_variables == "conservative" - data = dg.elements.u - varnames = varnames_cons(equation) - else - # Reinterpret the solution array as an array of conservative variables, - # compute the primitive variables via broadcasting, and reinterpret the - # result as a plain array of floating point numbers - data = Array(reinterpret(eltype(dg.elements.u), - cons2prim.(reinterpret(SVector{nvariables(dg),eltype(dg.elements.u)}, dg.elements.u), - Ref(equations(dg))))) - varnames = varnames_prim(equation) - end - - # Store each variable of the solution - for v in 1:nvariables(dg) - # Convert to 1D array - if ndims(dg) == 2 - file["variables_$v"] = vec(data[v, :, :, :]) - elseif ndims(dg) == 3 - file["variables_$v"] = vec(data[v, :, :, :, :]) - else - error("Unsupported number of spatial dimensions: ", ndims(dg)) - end - - # Add variable name as attribute - var = file["variables_$v"] - attrs(var)["name"] = varnames[v] - end + # Store element variables + for (v, (key, element_variables)) in enumerate(dg.element_variables) + # Collect data on root domain + buffer = MPI.Gatherv(element_variables, counts_elements, mpi_root(), mpi_comm()) - # Store element variables - for (v, (key, element_variables)) in enumerate(dg.element_variables) + # Write only from root domain + if is_mpi_root() # Add to file - file["element_variables_$v"] = element_variables + file["element_variables_$v"] = buffer # Add variable name as attribute var = file["element_variables_$v"] From 757e2a2ac9d6f56b2603c896e4022717da54f668 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Wed, 23 Sep 2020 07:42:30 +0200 Subject: [PATCH 33/81] Fix that only root actually creates output files --- src/io/parallel.jl | 247 +++++++++++++++++++++++++++------------------ 1 file changed, 146 insertions(+), 101 deletions(-) diff --git a/src/io/parallel.jl b/src/io/parallel.jl index 3d17f768902..eac09bf7520 100644 --- a/src/io/parallel.jl +++ b/src/io/parallel.jl @@ -1,56 +1,103 @@ -function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, - mpi_parallel::Val{true}) - # Create output directory (if it does not exist) - output_directory = parameter("output_directory", "out") - if is_mpi_root() - mkpath(output_directory) - end - - # Filename without extension based on current time step - filename = joinpath(output_directory, @sprintf("restart_%06d", timestep)) - # Convert time and time step size to floats - time = convert(Float64, time) - dt = convert(Float64, dt) +# Load restart file and store solution in solver +function load_restart_file!(dg::AbstractDg, restart_filename, mpi_parallel::Val{true}) + # Create variables to be returned later + time = NaN + step = -1 - # Open file (clobber existing content) - h5open(filename * ".h5", "w") do file + # Open file + h5open(restart_filename, "r") do file equation = equations(dg) - # Add context information as attributes - attrs(file)["ndims"] = ndims(dg) - attrs(file)["equations"] = get_name(equation) - attrs(file)["polydeg"] = polydeg(dg) - attrs(file)["n_vars"] = nvariables(dg) - attrs(file)["n_elements"] = dg.n_elements_global - attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] - attrs(file)["time"] = time - attrs(file)["dt"] = dt - attrs(file)["timestep"] = timestep - - # Restart files always store conservative variables - data = dg.elements.u - varnames = varnames_cons(equation) + # Read attributes to perform some sanity checks + if read(attrs(file)["ndims"]) != ndims(dg) + error("restart mismatch: ndims in solver differs from value in restart file") + end + if read(attrs(file)["equations"]) != get_name(equation) + error("restart mismatch: equations in solver differs from value in restart file") + end + if read(attrs(file)["polydeg"]) != polydeg(dg) + error("restart mismatch: polynomial degree in solver differs from value in restart file") + end + if read(attrs(file)["n_elements"]) != dg.n_elements_global + error("restart mismatch: polynomial degree in solver differs from value in restart file") + end - # Only write from MPI root (poor man's version of parallel I/O) - element_size = nnodes(dg)^ndims(dg) - counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) + # Read time and time step + time = read(attrs(file)["time"]) + step = read(attrs(file)["timestep"]) - # Store each variable of the solution + # Read data + varnames = varnames_cons(equation) for v in 1:nvariables(dg) - # Collect data on root domain - buffer = MPI.Gatherv(vec(data[v, .., :]), counts, mpi_root(), mpi_comm()) + # Check if variable name matches + var = file["variables_$v"] + if (name = read(attrs(var)["name"])) != varnames[v] + error("mismatch: variables_$v should be '$(varnames[v])', but found '$name'") + end + + # Read variable + println("Reading variables_$v ($name)...") + dg.elements.u[v, .., :] = read(file["variables_$v"]) + end + end + + return time, step +end - # Write only from root domain - if is_mpi_root() +function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, + mpi_parallel::Val{true}) + # Calculate node counts by domain + element_size = nnodes(dg)^ndims(dg) + node_counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) + + # Restart files always store conservative variables + data = dg.elements.u + varnames = varnames_cons(equations(dg)) + + # Only write from MPI root (poor man's version of parallel I/O) + if is_mpi_root() + # Create output directory (if it does not exist) + output_directory = parameter("output_directory", "out") + if is_mpi_root() + mkpath(output_directory) + end + + # Filename without extension based on current time step + filename = joinpath(output_directory, @sprintf("restart_%06d", timestep)) + + # Convert time and time step size to floats + time = convert(Float64, time) + dt = convert(Float64, dt) + + # Open file (clobber existing content) + h5open(filename * ".h5", "w") do file + # Add context information as attributes + attrs(file)["ndims"] = ndims(dg) + attrs(file)["equations"] = get_name(equations(dg)) + attrs(file)["polydeg"] = polydeg(dg) + attrs(file)["n_vars"] = nvariables(dg) + attrs(file)["n_elements"] = dg.n_elements_global + attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attrs(file)["time"] = time + attrs(file)["dt"] = dt + attrs(file)["timestep"] = timestep + + # Store each variable of the solution + for v in 1:nvariables(dg) # Write to file - file["variables_$v"] = buffer + file["variables_$v"] = MPI.Gatherv(vec(data[v, .., :]), node_counts, mpi_root(), mpi_comm()) # Add variable name as attribute var = file["variables_$v"] attrs(var)["name"] = varnames[v] end end + else # non-root ranks only send data + # Send nodal data to root + for v in 1:nvariables(dg) + MPI.Gatherv(vec(data[v, .., :]), node_counts, mpi_root(), mpi_comm()) + end end end @@ -59,90 +106,88 @@ end # postprocessing. function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system, mpi_parallel::Val{true}) - # Create output directory (if it does not exist) - output_directory = parameter("output_directory", "out") - if is_mpi_root() - mkpath(output_directory) - end - # Filename without extension based on current time step - if isempty(system) - filename = joinpath(output_directory, @sprintf("solution_%06d", timestep)) + # Calculate element and node counts by domain + element_size = nnodes(dg)^ndims(dg) + element_counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) + node_counts = element_counts * Cint(element_size) + + # Convert to primitive variables if requested + solution_variables = parameter("solution_variables", "primitive", + valid=["conservative", "primitive"]) + if solution_variables == "conservative" + data = dg.elements.u + varnames = varnames_cons(equations(dg)) else - filename = joinpath(output_directory, @sprintf("solution_%s_%06d", system, timestep)) + # Reinterpret the solution array as an array of conservative variables, + # compute the primitive variables via broadcasting, and reinterpret the + # result as a plain array of floating point numbers + data = Array(reinterpret(eltype(dg.elements.u), + cons2prim.(reinterpret(SVector{nvariables(dg),eltype(dg.elements.u)}, dg.elements.u), + Ref(equations(dg))))) + varnames = varnames_prim(equations(dg)) end - # Convert time and time step size to floats - time = convert(Float64, time) - dt = convert(Float64, dt) - - # Open file (clobber existing content) - h5open(filename * ".h5", "w") do file - equation = equations(dg) + # Only write from MPI root (poor man's version of parallel I/O) + if is_mpi_root() + # Create output directory (if it does not exist) + output_directory = parameter("output_directory", "out") + mkpath(output_directory) - # Add context information as attributes - attrs(file)["ndims"] = ndims(dg) - attrs(file)["equations"] = get_name(equation) - attrs(file)["polydeg"] = polydeg(dg) - attrs(file)["n_vars"] = nvariables(dg) - attrs(file)["n_elements"] = dg.n_elements_global - attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] - attrs(file)["time"] = time - attrs(file)["dt"] = dt - attrs(file)["timestep"] = timestep - - # Convert to primitive variables if requested - solution_variables = parameter("solution_variables", "primitive", - valid=["conservative", "primitive"]) - if solution_variables == "conservative" - data = dg.elements.u - varnames = varnames_cons(equation) + # Filename without extension based on current time step + if isempty(system) + filename = joinpath(output_directory, @sprintf("solution_%06d", timestep)) else - # Reinterpret the solution array as an array of conservative variables, - # compute the primitive variables via broadcasting, and reinterpret the - # result as a plain array of floating point numbers - data = Array(reinterpret(eltype(dg.elements.u), - cons2prim.(reinterpret(SVector{nvariables(dg),eltype(dg.elements.u)}, dg.elements.u), - Ref(equations(dg))))) - varnames = varnames_prim(equation) + filename = joinpath(output_directory, @sprintf("solution_%s_%06d", system, timestep)) end - # Only write from MPI root (poor man's version of parallel I/O) - element_size = nnodes(dg)^ndims(dg) - counts_elements = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) - counts_nodes = counts_elements * Cint(element_size) - - # Store each variable of the solution - for v in 1:nvariables(dg) - # Collect data on root domain - buffer = MPI.Gatherv(vec(data[v, .., :]), counts_nodes, mpi_root(), mpi_comm()) - - # Write only from root domain - if is_mpi_root() - # Convert to 1D array - file["variables_$v"] = buffer + # Convert time and time step size to floats + time = convert(Float64, time) + dt = convert(Float64, dt) + + # Open file (clobber existing content) + h5open(filename * ".h5", "w") do file + # Add context information as attributes + attrs(file)["ndims"] = ndims(dg) + attrs(file)["equations"] = get_name(equations(dg)) + attrs(file)["polydeg"] = polydeg(dg) + attrs(file)["n_vars"] = nvariables(dg) + attrs(file)["n_elements"] = dg.n_elements_global + attrs(file)["mesh_file"] = splitdir(mesh.current_filename)[2] + attrs(file)["time"] = time + attrs(file)["dt"] = dt + attrs(file)["timestep"] = timestep + + # Store each variable of the solution + for v in 1:nvariables(dg) + # Write to file + file["variables_$v"] = MPI.Gatherv(vec(data[v, .., :]), node_counts, mpi_root(), mpi_comm()) # Add variable name as attribute var = file["variables_$v"] attrs(var)["name"] = varnames[v] end - end - - # Store element variables - for (v, (key, element_variables)) in enumerate(dg.element_variables) - # Collect data on root domain - buffer = MPI.Gatherv(element_variables, counts_elements, mpi_root(), mpi_comm()) - # Write only from root domain - if is_mpi_root() + # Store element variables + for (v, (key, element_variables)) in enumerate(dg.element_variables) # Add to file - file["element_variables_$v"] = buffer + file["element_variables_$v"] = MPI.Gatherv(element_variables, element_counts, mpi_root(), mpi_comm()) # Add variable name as attribute var = file["element_variables_$v"] attrs(var)["name"] = string(key) end end + else # non-root ranks only send data + # Send nodal data to root + for v in 1:nvariables(dg) + MPI.Gatherv(vec(data[v, .., :]), node_counts, mpi_root(), mpi_comm()) + end + + # Send element data to root + for (v, (key, element_variables)) in enumerate(dg.element_variables) + MPI.Gatherv(element_variables, element_counts, mpi_root(), mpi_comm()) + end end end From 626353843d70f0ce9f7b5305190172dd4b359e38 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Thu, 24 Sep 2020 15:51:53 +0200 Subject: [PATCH 34/81] Simplify MPI code --- src/auxiliary/auxiliary.jl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index bd324509979..98ff9335ff3 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -20,13 +20,11 @@ end function parse_parameters_file(filename, mpi_parallel::Val{true}) if is_mpi_root() buffer = read(filename) - buffer_length = Int[length(buffer)] - MPI.Bcast!(buffer_length, mpi_root(), mpi_comm()) + MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm()) MPI.Bcast!(buffer, mpi_root(), mpi_comm()) else - buffer_length = Int[0] - MPI.Bcast!(buffer_length, mpi_root(), mpi_comm()) - buffer = Vector{UInt8}(undef, buffer_length[1]) + count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm()) + buffer = Vector{UInt8}(undef, count[]) MPI.Bcast!(buffer, mpi_root(), mpi_comm()) end parameters[:default] = parse(String(buffer)) From 9f56ff67243ba1124eb45d30a76d536e31a077f6 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 25 Sep 2020 09:26:27 +0200 Subject: [PATCH 35/81] Parallel restarting works --- src/auxiliary/auxiliary.jl | 1 + src/io/io.jl | 34 ++++++----- src/io/parallel.jl | 113 +++++++++++++++++++++++++++---------- src/mesh/mesh.jl | 16 +++--- src/mesh/parallel.jl | 77 +++++++++++++++++++++++++ src/run.jl | 9 ++- src/run_euler_gravity.jl | 8 +-- 7 files changed, 195 insertions(+), 63 deletions(-) create mode 100644 src/mesh/parallel.jl diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 98ff9335ff3..6d3172670e7 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -13,6 +13,7 @@ const parameters = Dict{Symbol,Any}() # Parse parameters file into global dict +parse_parameters_file(filename) = parse_parameters_file(filename, mpi_parallel()) function parse_parameters_file(filename, mpi_parallel::Val{false}) parameters[:default] = parsefile(filename) parameters[:default]["parameters_file"] = filename diff --git a/src/io/io.jl b/src/io/io.jl index a3ccbca500c..41337e393d8 100644 --- a/src/io/io.jl +++ b/src/io/io.jl @@ -1,20 +1,19 @@ include("parallel.jl") # Load restart file and store solution in solver -function load_restart_file!(dg::AbstractDg, restart_filename) +load_restart_file!(dg, restart_filename) = load_restart_file!(dg, restart_filename, mpi_parallel()) +function load_restart_file!(dg::AbstractDg, restart_filename, mpi_parallel::Val{false}) # Create variables to be returned later time = NaN step = -1 # Open file h5open(restart_filename, "r") do file - equation = equations(dg) - # Read attributes to perform some sanity checks if read(attrs(file)["ndims"]) != ndims(dg) error("restart mismatch: ndims in solver differs from value in restart file") end - if read(attrs(file)["equations"]) != get_name(equation) + if read(attrs(file)["equations"]) != get_name(equations(dg)) error("restart mismatch: equations in solver differs from value in restart file") end if read(attrs(file)["polydeg"]) != polydeg(dg) @@ -29,7 +28,7 @@ function load_restart_file!(dg::AbstractDg, restart_filename) step = read(attrs(file)["timestep"]) # Read data - varnames = varnames_cons(equation) + varnames = varnames_cons(equations(dg)) for v in 1:nvariables(dg) # Check if variable name matches var = file["variables_$v"] @@ -38,7 +37,6 @@ function load_restart_file!(dg::AbstractDg, restart_filename) end # Read variable - println("Reading variables_$v ($name)...") dg.elements.u[v, .., :] = read(file["variables_$v"]) end end @@ -49,6 +47,8 @@ end # Save current DG solution with some context information as a HDF5 file for # restarting. +save_restart_file(dg, mesh, time, dt, timestep) = save_restart_file(dg, mesh, time, dt, timestep, + mpi_parallel()) function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, mpi_parallel::Val{false}) # Create output directory (if it does not exist) @@ -64,11 +64,9 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, # Open file (clobber existing content) h5open(filename * ".h5", "w") do file - equation = equations(dg) - # Add context information as attributes attrs(file)["ndims"] = ndims(dg) - attrs(file)["equations"] = get_name(equation) + attrs(file)["equations"] = get_name(equations(dg)) attrs(file)["polydeg"] = polydeg(dg) attrs(file)["n_vars"] = nvariables(dg) attrs(file)["n_elements"] = dg.n_elements_global @@ -79,7 +77,7 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, # Restart files always store conservative variables data = dg.elements.u - varnames = varnames_cons(equation) + varnames = varnames_cons(equations(dg)) # Store each variable of the solution for v in 1:nvariables(dg) @@ -96,8 +94,9 @@ end # Save current DG solution with some context information as a HDF5 file for # postprocessing. -function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, mpi_parallel) - return save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, "", mpi_parallel) +function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system="") + return save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system, + mpi_parallel()) end function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system, mpi_parallel::Val{false}) @@ -118,11 +117,9 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, # Open file (clobber existing content) h5open(filename * ".h5", "w") do file - equation = equations(dg) - # Add context information as attributes attrs(file)["ndims"] = ndims(dg) - attrs(file)["equations"] = get_name(equation) + attrs(file)["equations"] = get_name(equations(dg)) attrs(file)["polydeg"] = polydeg(dg) attrs(file)["n_vars"] = nvariables(dg) attrs(file)["n_elements"] = dg.n_elements @@ -136,7 +133,7 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, valid=["conservative", "primitive"]) if solution_variables == "conservative" data = dg.elements.u - varnames = varnames_cons(equation) + varnames = varnames_cons(equations(dg)) else # Reinterpret the solution array as an array of conservative variables, # compute the primitive variables via broadcasting, and reinterpret the @@ -144,7 +141,7 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, data = Array(reinterpret(eltype(dg.elements.u), cons2prim.(reinterpret(SVector{nvariables(dg),eltype(dg.elements.u)}, dg.elements.u), Ref(equations(dg))))) - varnames = varnames_prim(equation) + varnames = varnames_prim(equations(dg)) end # Store each variable of the solution @@ -171,7 +168,8 @@ end # Save current mesh with some context information as an HDF5 file. -function save_mesh_file(mesh::TreeMesh, timestep=-1) +save_mesh_file(mesh, mpi_parallel) = save_mesh_file(mesh, -1, mpi_parallel) +function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{false}) # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") mkpath(output_directory) diff --git a/src/io/parallel.jl b/src/io/parallel.jl index eac09bf7520..ee21e662c64 100644 --- a/src/io/parallel.jl +++ b/src/io/parallel.jl @@ -5,40 +5,52 @@ function load_restart_file!(dg::AbstractDg, restart_filename, mpi_parallel::Val{ time = NaN step = -1 - # Open file - h5open(restart_filename, "r") do file - equation = equations(dg) + # Calculate node counts by domain + element_size = nnodes(dg)^ndims(dg) + node_counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) - # Read attributes to perform some sanity checks - if read(attrs(file)["ndims"]) != ndims(dg) - error("restart mismatch: ndims in solver differs from value in restart file") - end - if read(attrs(file)["equations"]) != get_name(equation) - error("restart mismatch: equations in solver differs from value in restart file") - end - if read(attrs(file)["polydeg"]) != polydeg(dg) - error("restart mismatch: polynomial degree in solver differs from value in restart file") - end - if read(attrs(file)["n_elements"]) != dg.n_elements_global - error("restart mismatch: polynomial degree in solver differs from value in restart file") - end + if is_mpi_root() + # Open file + h5open(restart_filename, "r") do file + # Read attributes to perform some sanity checks + if read(attrs(file)["ndims"]) != ndims(dg) + error("restart mismatch: ndims in solver differs from value in restart file") + end + if read(attrs(file)["equations"]) != get_name(equations(dg)) + error("restart mismatch: equations in solver differs from value in restart file") + end + if read(attrs(file)["polydeg"]) != polydeg(dg) + error("restart mismatch: polynomial degree in solver differs from value in restart file") + end + if read(attrs(file)["n_elements"]) != dg.n_elements_global + error("restart mismatch: polynomial degree in solver differs from value in restart file") + end - # Read time and time step - time = read(attrs(file)["time"]) - step = read(attrs(file)["timestep"]) + # Read time and time step + time = read(attrs(file)["time"]) + step = read(attrs(file)["timestep"]) + MPI.Bcast!(Ref(time), mpi_root(), mpi_comm()) + MPI.Bcast!(Ref(step), mpi_root(), mpi_comm()) - # Read data - varnames = varnames_cons(equation) - for v in 1:nvariables(dg) - # Check if variable name matches - var = file["variables_$v"] - if (name = read(attrs(var)["name"])) != varnames[v] - error("mismatch: variables_$v should be '$(varnames[v])', but found '$name'") - end + # Read data + varnames = varnames_cons(equations(dg)) + for v in 1:nvariables(dg) + # Check if variable name matches + var = file["variables_$v"] + if (name = read(attrs(var)["name"])) != varnames[v] + error("mismatch: variables_$v should be '$(varnames[v])', but found '$name'") + end + # Read variable + dg.elements.u[v, .., :] = MPI.Scatterv(read(file["variables_$v"]), node_counts, mpi_root(), mpi_comm()) + end + end + else # on non-root ranks, receive data from root + time = MPI.Bcast!(Ref(time), mpi_root(), mpi_comm())[] + step = MPI.Bcast!(Ref(step), mpi_root(), mpi_comm())[] + for v in 1:nvariables(dg) # Read variable - println("Reading variables_$v ($name)...") - dg.elements.u[v, .., :] = read(file["variables_$v"]) + dg.elements.u[v, .., :] = MPI.Scatterv(eltype(dg.elements.u)[], node_counts, mpi_root(), mpi_comm()) end end @@ -191,3 +203,46 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, end end + +# Save current mesh with some context information as an HDF5 file. +function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{true}) + # Since the mesh is replicated on all domains, only save from root domain + if !is_mpi_root() + return + end + + # Create output directory (if it does not exist) + output_directory = parameter("output_directory", "out") + mkpath(output_directory) + + # Determine file name based on existence of meaningful time step + if timestep >= 0 + filename = joinpath(output_directory, @sprintf("mesh_%06d", timestep)) + else + filename = joinpath(output_directory, "mesh") + end + + # Create output directory (if it does not exist) + # Open file (clobber existing content) + h5open(filename * ".h5", "w") do file + # Add context information as attributes + n_cells = length(mesh.tree) + attrs(file)["ndims"] = ndims(mesh) + attrs(file)["n_cells"] = n_cells + attrs(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree) + attrs(file)["minimum_level"] = minimum_level(mesh.tree) + attrs(file)["maximum_level"] = maximum_level(mesh.tree) + attrs(file)["center_level_0"] = mesh.tree.center_level_0 + attrs(file)["length_level_0"] = mesh.tree.length_level_0 + attrs(file)["periodicity"] = collect(mesh.tree.periodicity) + + # Add tree data + file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells] + file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells] + file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells] + file["levels"] = @view mesh.tree.levels[1:n_cells] + file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells] + end + + return filename * ".h5" +end diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index b0175400052..0ed13afaf18 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -3,6 +3,7 @@ abstract type AbstractTree{NDIMS} <: AbstractContainer end include("tree.jl") include("parallel_tree.jl") +include("parallel.jl") # Composite type to hold the actual tree in addition to other mesh-related data # that is not strictly part of the tree. @@ -89,11 +90,6 @@ function generate_mesh() refine!(mesh.tree) end - # Partition mesh - if is_parallel() - partition!(mesh) - end - # Apply refinement patches @timeit timer() "refinement patches" for patch in parameter("refinement_patches", []) is_parallel() && error("non-uniform meshes not supported in parallel") @@ -114,12 +110,18 @@ function generate_mesh() end end + # Partition mesh + if is_parallel() + partition!(mesh) + end + return mesh end # Load existing mesh from file -function load_mesh(restart_filename) +load_mesh(restart_filename) = load_mesh(restart_filename, mpi_parallel()) +function load_mesh(restart_filename, mpi_parallel::Val{false}) # Get number of spatial dimensions ndims_ = parameter("ndims") @@ -127,7 +129,7 @@ function load_mesh(restart_filename) n_cells_max = parameter("n_cells_max") # Create mesh - @timeit timer() "creation" mesh = TreeMesh(Val{ndims_}(), n_cells_max) + @timeit timer() "creation" mesh = TreeMesh(Tree{ndims_}, n_cells_max) # Determine mesh filename filename = get_restart_mesh_filename(restart_filename) diff --git a/src/mesh/parallel.jl b/src/mesh/parallel.jl new file mode 100644 index 00000000000..f651cccf6cc --- /dev/null +++ b/src/mesh/parallel.jl @@ -0,0 +1,77 @@ +function load_mesh(restart_filename, mpi_parallel::Val{true}) + # Get number of spatial dimensions + ndims_ = parameter("ndims") + + # Get maximum number of cells that should be supported + n_cells_max = parameter("n_cells_max") + + # Create mesh + @timeit timer() "creation" mesh = TreeMesh(ParallelTree{ndims_}, n_cells_max) + + # Determine mesh filename + if is_mpi_root() + filename = get_restart_mesh_filename(restart_filename) + buffer = Vector{UInt8}(filename) + MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm()) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + else # non-root ranks + count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm()) + buffer = Vector{UInt8}(undef, count[]) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + filename = String(buffer) + end + mesh.current_filename = filename + mesh.unsaved_changes = false + + # Read mesh file + if is_mpi_root() + h5open(filename, "r") do file + # Set domain information + mesh.tree.center_level_0 = read(attrs(file)["center_level_0"]) + mesh.tree.length_level_0 = read(attrs(file)["length_level_0"]) + mesh.tree.periodicity = Tuple(read(attrs(file)["periodicity"])) + MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm()) + MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm()) + MPI.Bcast!(collect(mesh.tree.periodicity), mpi_root(), mpi_comm()) + + # Set length + n_cells = read(attrs(file)["n_cells"]) + MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm()) + resize!(mesh.tree, n_cells) + + # Read in data + mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"]) + mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"]) + mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"]) + mesh.tree.levels[1:n_cells] = read(file["levels"]) + mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"]) + @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), mpi_comm()) + end + else # non-root domains + # Set domain information + mesh.tree.center_level_0 = MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm()) + mesh.tree.length_level_0 = MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm())[1] + mesh.tree.periodicity = Tuple(MPI.Bcast!(collect(mesh.tree.periodicity), mpi_root(), mpi_comm())) + + # Set length + n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[] + resize!(mesh.tree, n_cells) + + # Read in data + @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm()) + @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), mpi_comm()) + end + + # Partition mesh + partition!(mesh) + + return mesh +end + diff --git a/src/run.jl b/src/run.jl index f8825aafb72..d72e159fc6a 100644 --- a/src/run.jl +++ b/src/run.jl @@ -52,7 +52,7 @@ function init_parameters(parameters_file=nothing; verbose=false, refinement_leve globals[:verbose] = verbose # Parse parameters file - @timeit timer() "read parameter file" parse_parameters_file(parameters_file, mpi_parallel()) + @timeit timer() "read parameter file" parse_parameters_file(parameters_file) # Override specified parameters for (parameter, value) in parameters @@ -85,7 +85,6 @@ function init_simulation() # Initialize mesh if restart - is_parallel() && error("restarting not yet implemented in parallel") # TODO parallel is_mpi_root() && print("Loading mesh... ") @timeit timer() "mesh loading" mesh = load_mesh(restart_filename) is_parallel() && MPI.Barrier(mpi_comm()) @@ -252,7 +251,7 @@ function init_simulation() # we need to make sure, that derived quantities, such as e.g. blending # factor is already computed for the initial condition @notimeit timer() rhs!(solver, time) - save_solution_file(solver, mesh, time, 0, step, mpi_parallel()) + save_solution_file(solver, mesh, time, 0, step) end # Print initial solution analysis and initialize solution analysis @@ -391,7 +390,7 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function end # Then write solution file - save_solution_file(solver, mesh, time, dt, step, mpi_parallel()) + save_solution_file(solver, mesh, time, dt, step) end output_time += time_ns() - output_start_time end @@ -408,7 +407,7 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function end # Then write restart file - save_restart_file(solver, mesh, time, dt, step, mpi_parallel()) + save_restart_file(solver, mesh, time, dt, step) end output_time += time_ns() - output_start_time end diff --git a/src/run_euler_gravity.jl b/src/run_euler_gravity.jl index 3812da9a25c..11aae83f16c 100644 --- a/src/run_euler_gravity.jl +++ b/src/run_euler_gravity.jl @@ -176,10 +176,10 @@ function init_simulation_euler_gravity() # we need to make sure, that derived quantities, such as e.g. blending # factor is already computed for the initial condition @notimeit timer() rhs!(solver, time) - save_solution_file(solver, mesh, time, 0, step, "euler", mpi_parallel()) + save_solution_file(solver, mesh, time, 0, step, "euler") @notimeit timer() rhs!(solver_gravity, time) - save_solution_file(solver_gravity, mesh, time, 0, step, "gravity", mpi_parallel()) + save_solution_file(solver_gravity, mesh, time, 0, step, "gravity") end # Print initial solution analysis and initialize solution analysis if analysis_interval > 0 @@ -320,8 +320,8 @@ function run_simulation_euler_gravity(mesh, solvers, time_parameters, time_integ end # Then write solution file - save_solution_file(solver, mesh, time, dt, step, "euler", mpi_parallel()) - save_solution_file(solver_gravity, mesh, time, dt, step, "gravity", mpi_parallel()) + save_solution_file(solver, mesh, time, dt, step, "euler") + save_solution_file(solver_gravity, mesh, time, dt, step, "gravity") end output_time += time_ns() - output_start_time end From d8e44f305a06edc499fd6293dd2163557a2603bb Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 25 Sep 2020 23:43:28 +0200 Subject: [PATCH 36/81] added parallel versions of analyze_solution, calc_error_norms, and integrate --- src/parallel/parallel.jl | 3 + src/solvers/dg/2d/dg.jl | 138 ++++++-------- src/solvers/dg/2d/parallel.jl | 346 ++++++++++++++++++++++++++++++++++ 3 files changed, 411 insertions(+), 76 deletions(-) diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index bc29ff06b91..9f0d2501e39 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -40,3 +40,6 @@ const MPI_IS_ROOT = Ref(true) @inline is_mpi_root() = MPI_IS_ROOT[] @inline mpi_root() = 0 + +@inline mpi_println(args...) = is_mpi_root() && println(args...) +@inline mpi_print(args...) = is_mpi_root() && print(args...) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 5e3c2368244..77ffaea5ac4 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -807,7 +807,9 @@ dsdu_ut = integrate(dg, dg.elements.u, dg.elements.u_t) do i, j, element_id, dg, end ``` """ -function integrate(func, dg::Dg2D, args...; normalize=true) +integrate(func, dg::Dg2D, args...; normalize=true) = integrate(func, dg, uses_mpi(dg), args...; + normalize=normalize) +function integrate(func, dg::Dg2D, uses_mpi::Val{false}, args...; normalize=true) # Initialize integral with zeros of the right shape integral = zero(func(1, 1, 1, dg, args...)) @@ -848,18 +850,21 @@ Calculate the integral over all conservative variables: state_integrals = integrate(dg.elements.u, dg) ``` """ -function integrate(func, u, dg::Dg2D; normalize=true) +integrate(func, u, dg::Dg2D; normalize=true) = integrate(func, u, dg, uses_mpi(dg); + normalize=normalize) +function integrate(func, u, dg::Dg2D, uses_mpi::Val{false}; normalize=true) func_wrapped = function(i, j, element_id, dg, u) u_local = get_node_vars(u, dg, i, j, element_id) return func(u_local) end - return integrate(func_wrapped, dg, u; normalize=normalize) + return integrate(func_wrapped, dg, Val(false), u; normalize=normalize) end integrate(u, dg::Dg2D; normalize=true) = integrate(identity, u, dg; normalize=normalize) # Calculate L2/Linf error norms based on "exact solution" -function calc_error_norms(func, dg::Dg2D, t) +calc_error_norms(func, dg::Dg2D, t) = calc_error_norms(func, dg, t, uses_mpi(dg)) +function calc_error_norms(func, dg::Dg2D, t, uses_mpi::Val{false}) # Gather necessary information equation = equations(dg) n_nodes_analysis = size(dg.analysis_vandermonde, 1) @@ -896,14 +901,6 @@ function calc_error_norms(func, dg::Dg2D, t) end # For L2 error, divide by total volume - if is_parallel() - global_l2_error = Vector(l2_error) - global_linf_error = Vector(linf_error) - MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm()) - MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm()) - l2_error = convert(typeof(l2_error), global_l2_error) - linf_error = convert(typeof(linf_error), global_linf_error) - end l2_error = @. sqrt(l2_error / dg.analysis_total_volume) return l2_error, linf_error @@ -911,12 +908,13 @@ end # Integrate ∂S/∂u ⋅ ∂u/∂t over the entire domain -function calc_entropy_timederivative(dg::Dg2D, t) +calc_entropy_timederivative(dg::Dg2D, t) = calc_entropy_timederivative(dg, t, uses_mpi(dg)) +function calc_entropy_timederivative(dg::Dg2D, t, uses_mpi) # Compute ut = rhs(u) with current solution u @notimeit timer() rhs!(dg, t) # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ - dsdu_ut = integrate(dg, dg.elements.u, dg.elements.u_t) do i, j, element_id, dg, u, u_t + dsdu_ut = integrate(dg, uses_mpi, dg.elements.u, dg.elements.u_t) do i, j, element_id, dg, u, u_t u_node = get_node_vars(u, dg, i, j, element_id) u_t_node = get_node_vars(u_t, dg, i, j, element_id) dot(cons2entropy(u_node, equations(dg)), u_t_node) @@ -929,7 +927,8 @@ end # Calculate L2/Linf norms of a solenoidal condition ∇ ⋅ B = 0 # OBS! This works only when the problem setup is designed such that ∂B₁/∂x + ∂B₂/∂y = 0. Cannot # compute the full 3D divergence from the given data -function calc_mhd_solenoid_condition(dg::Dg2D, t::Float64) +calc_mhd_solenoid_condition(dg::Dg2D, t) = calc_mhd_solenoid_condition(dg, t, mpi_parallel()) +function calc_mhd_solenoid_condition(dg::Dg2D, t, mpi_parallel::Val{false}) @assert equations(dg) isa IdealGlmMhdEquations2D "Only relevant for MHD" # Local copy of standard derivative matrix @@ -973,29 +972,30 @@ performance index is specified in `runtime_relative`. **Note:** Keep order of analysis quantities in sync with [`save_analysis_header`](@ref) when adding or changing quantities. """ -function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step::Integer, - runtime_absolute::Real, runtime_relative::Real; solver_gravity=nothing) +function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, + runtime_absolute, runtime_relative; solver_gravity=nothing) + analyze_solution(dg, mesh, time, dt, step, runtime_absolute, runtime_relative, uses_mpi(dg), + solver_gravity=solver_gravity) +end +function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_absolute, + runtime_relative, uses_mpi::Val{false}; solver_gravity=nothing) equation = equations(dg) # General information - if is_mpi_root() - println() - println("-"^80) - println(" Simulation running '", get_name(equation), "' with POLYDEG = ", polydeg(dg)) - println("-"^80) - println(" #timesteps: " * @sprintf("% 14d", step) * - " " * - " run time: " * @sprintf("%10.8e s", runtime_absolute)) - println(" dt: " * @sprintf("%10.8e", dt) * - " " * - " PID : " * @sprintf("%10.8e s", runtime_relative)) - println(" sim. time: " * @sprintf("%10.8e", time) * - " " * - " PID × #domains: " * @sprintf("%10.8e s", runtime_relative * n_domains())) - end + println() + println("-"^80) + println(" Simulation running '", get_name(equation), "' with POLYDEG = ", polydeg(dg)) + println("-"^80) + println(" #timesteps: " * @sprintf("% 14d", step) * + " " * + " run time: " * @sprintf("%10.8e s", runtime_absolute)) + println(" dt: " * @sprintf("%10.8e", dt) * + " " * + " Time/DOF/step: " * @sprintf("%10.8e s", runtime_relative)) + println(" sim. time: " * @sprintf("%10.8e", time)) # Level information (only show for AMR) - if parameter("amr_interval", 0)::Int > 0 && is_mpi_root() + if parameter("amr_interval", 0)::Int > 0 levels = Vector{Int}(undef, dg.n_elements) for element_id in 1:dg.n_elements levels[element_id] = mesh.tree.levels[dg.elements.cell_ids[element_id]] @@ -1009,7 +1009,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: end println(" └── level $min_level: " * @sprintf("% 14d", count(x->x==min_level, levels))) end - is_mpi_root() && println() + println() # Open file for appending and store time step and time information if dg.save_analysis @@ -1021,40 +1021,36 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: # Calculate and print derived quantities (error norms, entropy etc.) # Variable names required for L2 error, Linf error, and conservation error - if is_mpi_root() - if any(q in dg.analysis_quantities for q in - (:l2_error, :linf_error, :conservation_error, :residual)) - print(" Variable: ") - for v in 1:nvariables(equation) - @printf(" %-14s", varnames_cons(equation)[v]) - end - println() + if any(q in dg.analysis_quantities for q in + (:l2_error, :linf_error, :conservation_error, :residual)) + print(" Variable: ") + for v in 1:nvariables(equation) + @printf(" %-14s", varnames_cons(equation)[v]) end + println() end # Calculate L2/Linf errors, which are also returned by analyze_solution l2_error, linf_error = calc_error_norms(dg, time) - if is_mpi_root() - # L2 error - if :l2_error in dg.analysis_quantities - print(" L2 error: ") - for v in 1:nvariables(equation) - @printf(" % 10.8e", l2_error[v]) - dg.save_analysis && @printf(f, " % 10.8e", l2_error[v]) - end - println() + # L2 error + if :l2_error in dg.analysis_quantities + print(" L2 error: ") + for v in 1:nvariables(equation) + @printf(" % 10.8e", l2_error[v]) + dg.save_analysis && @printf(f, " % 10.8e", l2_error[v]) end + println() + end - # Linf error - if :linf_error in dg.analysis_quantities - print(" Linf error: ") - for v in 1:nvariables(equation) - @printf(" % 10.8e", linf_error[v]) - dg.save_analysis && @printf(f, " % 10.8e", linf_error[v]) - end - println() + # Linf error + if :linf_error in dg.analysis_quantities + print(" Linf error: ") + for v in 1:nvariables(equation) + @printf(" % 10.8e", linf_error[v]) + dg.save_analysis && @printf(f, " % 10.8e", linf_error[v]) end + println() end # Conservation errror @@ -1123,16 +1119,10 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: # Entropy time derivative if :dsdu_ut in dg.analysis_quantities dsdu_ut = calc_entropy_timederivative(dg, time) - if is_parallel() - dsdu_ut_buffer = [dsdu_ut] - MPI.Reduce!(dsdu_ut_buffer, +, mpi_root(), mpi_comm()) - end - if is_mpi_root() - print(" ∑∂S/∂U ⋅ Uₜ: ") - @printf(" % 10.8e", dsdu_ut) - dg.save_analysis && @printf(f, " % 10.8e", dsdu_ut) - println() - end + print(" ∑∂S/∂U ⋅ Uₜ: ") + @printf(" % 10.8e", dsdu_ut) + dg.save_analysis && @printf(f, " % 10.8e", dsdu_ut) + println() end # Entropy @@ -1244,10 +1234,8 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time::Real, dt::Real, step:: println() end - if is_mpi_root() - println("-"^80) - println() - end + println("-"^80) + println() # Add line break and close analysis file if it was opened if dg.save_analysis @@ -1357,10 +1345,8 @@ function set_initial_conditions!(dg::Dg2D, time) end -@inline rhs!(dg::Dg2D, t_stage) = rhs!(dg, t_stage, uses_mpi(dg)) - - # Calculate time derivative +@inline rhs!(dg::Dg2D, t_stage) = rhs!(dg, t_stage, uses_mpi(dg)) function rhs!(dg::Dg2D, t_stage, uses_mpi::Val{false}) # Reset u_t @timeit timer() "reset ∂u/∂t" dg.elements.u_t .= 0 diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index 513ab323a2e..c72461f26ce 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -360,3 +360,349 @@ end function finish_mpi_send!(dg::Dg2D) MPI.Waitall!(dg.mpi_send_requests) end + + +function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_absolute, + runtime_relative, uses_mpi::Val{true}; solver_gravity=nothing) + equation = equations(dg) + + # General information + mpi_println() + mpi_println("-"^80) + mpi_println(" Simulation running '", get_name(equation), "' with POLYDEG = ", polydeg(dg)) + mpi_println("-"^80) + mpi_println(" #timesteps: " * @sprintf("% 14d", step) * + " " * + " run time: " * @sprintf("%10.8e s", runtime_absolute)) + mpi_println(" dt: " * @sprintf("%10.8e", dt) * + " " * + " PID: " * @sprintf("%10.8e s", runtime_relative)) + mpi_println(" sim. time: " * @sprintf("%10.8e", time) * + " " * + " PID × #domains: " * @sprintf("%10.8e s", runtime_relative * n_domains())) + + # Level information (only show for AMR) + if parameter("amr_interval", 0)::Int > 0 && is_mpi_root() + levels = Vector{Int}(undef, dg.n_elements) + for element_id in 1:dg.n_elements + levels[element_id] = mesh.tree.levels[dg.elements.cell_ids[element_id]] + end + min_level = minimum(levels) + max_level = maximum(levels) + + mpi_println(" #elements: " * @sprintf("% 14d", dg.n_elements)) + for level = max_level:-1:min_level+1 + mpi_println(" ├── level $level: " * @sprintf("% 14d", count(x->x==level, levels))) + end + mpi_println(" └── level $min_level: " * @sprintf("% 14d", count(x->x==min_level, levels))) + end + mpi_println() + + # Open file for appending and store time step and time information + if dg.save_analysis && is_mpi_root() + f = open(dg.analysis_filename, "a") + @printf(f, "% 9d", step) + @printf(f, " %10.8e", time) + @printf(f, " %10.8e", dt) + end + + # Calculate and print derived quantities (error norms, entropy etc.) + # Variable names required for L2 error, Linf error, and conservation error + if is_mpi_root() + if any(q in dg.analysis_quantities for q in + (:l2_error, :linf_error, :conservation_error, :residual)) + print(" Variable: ") + for v in 1:nvariables(equation) + @printf(" %-14s", varnames_cons(equation)[v]) + end + println() + end + end + + # Calculate L2/Linf errors, which are also returned by analyze_solution + l2_error, linf_error = calc_error_norms(dg, time) + + if is_mpi_root() + # L2 error + if :l2_error in dg.analysis_quantities + print(" L2 error: ") + for v in 1:nvariables(equation) + @printf(" % 10.8e", l2_error[v]) + dg.save_analysis && @printf(f, " % 10.8e", l2_error[v]) + end + println() + end + + # Linf error + if :linf_error in dg.analysis_quantities + print(" Linf error: ") + for v in 1:nvariables(equation) + @printf(" % 10.8e", linf_error[v]) + dg.save_analysis && @printf(f, " % 10.8e", linf_error[v]) + end + println() + end + end + + # Conservation errror + if :conservation_error in dg.analysis_quantities + # Calculate state integrals + state_integrals = integrate(dg.elements.u, dg) + + # Store initial state integrals at first invocation + if isempty(dg.initial_state_integrals) + dg.initial_state_integrals = zeros(nvariables(equation)) + dg.initial_state_integrals .= state_integrals + end + + if is_mpi_root() + print(" |∑U - ∑U₀|: ") + for v in 1:nvariables(equation) + err = abs(state_integrals[v] - dg.initial_state_integrals[v]) + @printf(" % 10.8e", err) + dg.save_analysis && @printf(f, " % 10.8e", err) + end + println() + end + end + + # Residual (defined here as the vector maximum of the absolute values of the time derivatives) + if :residual in dg.analysis_quantities + mpi_print(" max(|Uₜ|): ") + for v in 1:nvariables(equation) + # Calculate maximum absolute value of Uₜ + res = maximum(abs, view(dg.elements.u_t, v, :, :, :)) + res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm())[] + is_mpi_root() && @printf(" % 10.8e", res) + is_mpi_root() && dg.save_analysis && @printf(f, " % 10.8e", res) + end + mpi_println() + end + + # L2/L∞ errors of the primitive variables + if :l2_error_primitive in dg.analysis_quantities || :linf_error_primitive in dg.analysis_quantities + l2_error_prim, linf_error_prim = calc_error_norms(cons2prim, dg, time) + + if is_mpi_root() + print(" Variable: ") + for v in 1:nvariables(equation) + @printf(" %-14s", varnames_prim(equation)[v]) + end + println() + + # L2 error + if :l2_error_primitive in dg.analysis_quantities + print(" L2 error prim.: ") + for v in 1:nvariables(equation) + @printf("%10.8e ", l2_error_prim[v]) + dg.save_analysis && @printf(f, " % 10.8e", l2_error_prim[v]) + end + println() + end + + # L∞ error + if :linf_error_primitive in dg.analysis_quantities + print(" Linf error pri.:") + for v in 1:nvariables(equation) + @printf("%10.8e ", linf_error_prim[v]) + dg.save_analysis && @printf(f, " % 10.8e", linf_error_prim[v]) + end + println() + end + end + end + + # Entropy time derivative + if :dsdu_ut in dg.analysis_quantities + dsdu_ut = calc_entropy_timederivative(dg, time) + if is_mpi_root() + print(" ∑∂S/∂U ⋅ Uₜ: ") + @printf(" % 10.8e", dsdu_ut) + dg.save_analysis && @printf(f, " % 10.8e", dsdu_ut) + println() + end + end + + # Entropy + if :entropy in dg.analysis_quantities + s = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return entropy(cons, equations(dg)) + end + if is_mpi_root() + print(" ∑S: ") + @printf(" % 10.8e", s) + dg.save_analysis && @printf(f, " % 10.8e", s) + println() + end + end + + # Total energy + if :energy_total in dg.analysis_quantities + e_total = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return energy_total(cons, equations(dg)) + end + if is_mpi_root() + print(" ∑e_total: ") + @printf(" % 10.8e", e_total) + dg.save_analysis && @printf(f, " % 10.8e", e_total) + println() + end + end + + # Kinetic energy + if :energy_kinetic in dg.analysis_quantities + e_kinetic = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return energy_kinetic(cons, equations(dg)) + end + if is_mpi_root() + print(" ∑e_kinetic: ") + @printf(" % 10.8e", e_kinetic) + dg.save_analysis && @printf(f, " % 10.8e", e_kinetic) + println() + end + end + + # Internal energy + if :energy_internal in dg.analysis_quantities + e_internal = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return energy_internal(cons, equations(dg)) + end + if is_mpi_root() + print(" ∑e_internal: ") + @printf(" % 10.8e", e_internal) + dg.save_analysis && @printf(f, " % 10.8e", e_internal) + println() + end + end + + # Magnetic energy + if :energy_magnetic in dg.analysis_quantities + e_magnetic = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return energy_magnetic(cons, equations(dg)) + end + if is_mpi_root() + print(" ∑e_magnetic: ") + @printf(" % 10.8e", e_magnetic) + dg.save_analysis && @printf(f, " % 10.8e", e_magnetic) + println() + end + end + + # Potential energy + if :energy_potential in dg.analysis_quantities + # FIXME: This should be implemented properly for multiple coupled solvers + @assert !isnothing(solver_gravity) "Only works if gravity solver is supplied" + @assert dg.initial_conditions == initial_conditions_jeans_instability "Only works with Jeans instability setup" + + e_potential = integrate(dg, dg.elements.u, solver_gravity.elements.u) do i, j, element_id, dg, u_euler, u_gravity + cons_euler = get_node_vars(u_euler, dg, i, j, element_id) + cons_gravity = get_node_vars(u_gravity, solver_gravity, i, j, element_id) + # OBS! subtraction is specific to Jeans instability test where rho_0 = 1.5e7 + return (cons_euler[1] - 1.5e7) * cons_gravity[1] + end + if is_mpi_root() + print(" ∑e_pot: ") + @printf(" % 10.8e", e_potential) + dg.save_analysis && @printf(f, " % 10.8e", e_potential) + println() + end + end + + # Solenoidal condition ∇ ⋅ B = 0 + if :l2_divb in dg.analysis_quantities || :linf_divb in dg.analysis_quantities + l2_divb, linf_divb = calc_mhd_solenoid_condition(dg, time) + end + if is_mpi_root() + # L2 norm of ∇ ⋅ B + if :l2_divb in dg.analysis_quantities + print(" L2 ∇ ⋅B: ") + @printf(" % 10.8e", l2_divb) + dg.save_analysis && @printf(f, " % 10.8e", l2_divb) + println() + end + # Linf norm of ∇ ⋅ B + if :linf_divb in dg.analysis_quantities + print(" Linf ∇ ⋅B: ") + @printf(" % 10.8e", linf_divb) + dg.save_analysis && @printf(f, " % 10.8e", linf_divb) + println() + end + end + + # Cross helicity + if :cross_helicity in dg.analysis_quantities + h_c = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + cons = get_node_vars(u, dg, i, j, element_id) + return cross_helicity(cons, equations(dg)) + end + if is_mpi_root() + print(" ∑H_c: ") + @printf(" % 10.8e", h_c) + dg.save_analysis && @printf(f, " % 10.8e", h_c) + println() + end + end + + if is_mpi_root() + println("-"^80) + println() + + # Add line break and close analysis file if it was opened + if dg.save_analysis + println(f) + close(f) + end + end + + # Return errors for EOC analysis + return l2_error, linf_error +end + + +# OBS! Global results are only calculated on root domain +function calc_error_norms(func, dg::Dg2D, t, uses_mpi::Val{true}) + l2_error, linf_error = calc_error_norms(func, dg, t, Val(false)) + + # Since the local L2 norm is already normalized and square-rooted, we need to undo this first + global_l2_error = Vector(l2_error.^2 .* dg.analysis_total_volume) + global_linf_error = Vector(linf_error) + MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm()) + MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm()) + l2_error = convert(typeof(l2_error), global_l2_error) + linf_error = convert(typeof(linf_error), global_linf_error) + + l2_error = @. sqrt(l2_error / dg.analysis_total_volume) + + return l2_error, linf_error +end + + +function calc_mhd_solenoid_condition(dg::Dg2D, t, mpi_parallel::Val{true}) + l2_divb, linf_divb = calc_mhd_solenoid_condition(func, dg, t, Val(false)) + + # Since the local L2 norm is already normalized and square-rooted, we need to undo this first + global_l2_divb = Vector(l2_divb.^2 .* dg.analysis_total_volume) + global_linf_divb = Vector(linf_divb) + MPI.Reduce!(global_l2_divb, +, mpi_root(), mpi_comm()) + MPI.Reduce!(global_linf_divb, max, mpi_root(), mpi_comm()) + l2_divb = convert(typeof(l2_divb), global_l2_divb) + linf_divb = convert(typeof(linf_divb), global_linf_divb) + + l2_divb = @. sqrt(l2_divb / dg.analysis_total_volume) + + return l2_divb, linf_divb +end + + +# OBS! Global results are only calculated on root domain +function integrate(func, dg::Dg2D, uses_mpi::Val{true}, args...; normalize=true) + integral = integrate(func, dg, Val(false), args...; normalize=normalize) + integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm()) + + return is_mpi_root() ? integral[] : integral +end From ac3c87b78d0b9afa95f9487584d0dd17e7aa2f5d Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 26 Sep 2020 00:02:54 +0200 Subject: [PATCH 37/81] Rename 'domain'/'domain_id'/'n_domains' -> 'rank'/'mpi_rank'/'n_mpi_ranks' --- src/io/parallel.jl | 14 ++++----- src/mesh/mesh.jl | 59 +++++++++++++++++------------------ src/mesh/parallel.jl | 2 +- src/mesh/parallel_tree.jl | 30 +++++++++--------- src/parallel/parallel.jl | 12 +++---- src/run.jl | 4 +-- src/solvers/dg/2d/dg.jl | 28 ++++++++--------- src/solvers/dg/2d/parallel.jl | 43 +++++++++++++------------ 8 files changed, 95 insertions(+), 97 deletions(-) diff --git a/src/io/parallel.jl b/src/io/parallel.jl index ee21e662c64..441c6994ea6 100644 --- a/src/io/parallel.jl +++ b/src/io/parallel.jl @@ -5,9 +5,9 @@ function load_restart_file!(dg::AbstractDg, restart_filename, mpi_parallel::Val{ time = NaN step = -1 - # Calculate node counts by domain + # Calculate node counts by MPI rank element_size = nnodes(dg)^ndims(dg) - node_counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) + node_counts = convert(Vector{Cint}, collect(dg.n_elements_by_rank)) * Cint(element_size) if is_mpi_root() # Open file @@ -59,9 +59,9 @@ end function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, mpi_parallel::Val{true}) - # Calculate node counts by domain + # Calculate node counts by MPI rank element_size = nnodes(dg)^ndims(dg) - node_counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) * Cint(element_size) + node_counts = convert(Vector{Cint}, collect(dg.n_elements_by_rank)) * Cint(element_size) # Restart files always store conservative variables data = dg.elements.u @@ -119,9 +119,9 @@ end function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, system, mpi_parallel::Val{true}) - # Calculate element and node counts by domain + # Calculate element and node counts by MPI rank element_size = nnodes(dg)^ndims(dg) - element_counts = convert(Vector{Cint}, collect(dg.n_elements_by_domain)) + element_counts = convert(Vector{Cint}, collect(dg.n_elements_by_rank)) node_counts = element_counts * Cint(element_size) # Convert to primitive variables if requested @@ -206,7 +206,7 @@ end # Save current mesh with some context information as an HDF5 file. function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{true}) - # Since the mesh is replicated on all domains, only save from root domain + # Since the mesh is replicated on all ranks, only save from MPI root if !is_mpi_root() return end diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index 0ed13afaf18..bb56f4f0528 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -11,8 +11,8 @@ mutable struct TreeMesh{TreeType<:AbstractTree{NDIMS} where NDIMS} tree::TreeType current_filename::String unsaved_changes::Bool - first_cell_by_domain::OffsetVector{Int, Vector{Int}} - n_cells_by_domain::OffsetVector{Int, Vector{Int}} + first_cell_by_rank::OffsetVector{Int, Vector{Int}} + n_cells_by_rank::OffsetVector{Int, Vector{Int}} function TreeMesh{TreeType}(n_cells_max::Integer) where TreeType # Create mesh @@ -20,8 +20,8 @@ mutable struct TreeMesh{TreeType<:AbstractTree{NDIMS} where NDIMS} m.tree = TreeType(n_cells_max) m.current_filename = "" m.unsaved_changes = false - m.first_cell_by_domain = OffsetVector(Int[], 0) - m.n_cells_by_domain = OffsetVector(Int[], 0) + m.first_cell_by_rank = OffsetVector(Int[], 0) + m.n_cells_by_rank = OffsetVector(Int[], 0) return m end @@ -33,8 +33,8 @@ mutable struct TreeMesh{TreeType<:AbstractTree{NDIMS} where NDIMS} m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity) m.current_filename = "" m.unsaved_changes = false - m.first_cell_by_domain = OffsetVector(Int[], 0) - m.n_cells_by_domain = OffsetVector(Int[], 0) + m.first_cell_by_rank = OffsetVector(Int[], 0) + m.n_cells_by_rank = OffsetVector(Int[], 0) return m end @@ -176,38 +176,37 @@ end # Partition mesh using a static domain decomposition algorithm based on leaf cell count alone -# Return first cell id for each domain function partition!(mesh) - # Determine number of leaf cells per domain + # Determine number of leaf cells per rank leaves = leaf_cells(mesh.tree) - @assert length(leaves) > n_domains() - n_leaves_per_domain = OffsetArray(fill(div(length(leaves), n_domains()), n_domains()), - 0:(n_domains() - 1)) - for d in 0:(rem(length(leaves), n_domains()) - 1) - n_leaves_per_domain[d] += 1 + @assert length(leaves) > n_mpi_ranks() + n_leaves_per_rank = OffsetArray(fill(div(length(leaves), n_mpi_ranks()), n_mpi_ranks()), + 0:(n_mpi_ranks() - 1)) + for d in 0:(rem(length(leaves), n_mpi_ranks()) - 1) + n_leaves_per_rank[d] += 1 end - @assert sum(n_leaves_per_domain) == length(leaves) + @assert sum(n_leaves_per_rank) == length(leaves) - # Assign domain ids to all cells such that all ancestors of each cell - if not yet assigned to a - # domain - belong to the same domain - mesh.first_cell_by_domain = similar(n_leaves_per_domain) - mesh.n_cells_by_domain = similar(n_leaves_per_domain) + # Assign MPI ranks to all cells such that all ancestors of each cell - if not yet assigned to a + # rank - belong to the same rank + mesh.first_cell_by_rank = similar(n_leaves_per_rank) + mesh.n_cells_by_rank = similar(n_leaves_per_rank) leaf_count = 0 - last_id = leaves[n_leaves_per_domain[0]] - mesh.first_cell_by_domain[0] = 1 - mesh.n_cells_by_domain[0] = last_id - mesh.tree.domain_ids[1:last_id] .= 0 - for d in 1:(length(n_leaves_per_domain)-1) - leaf_count += n_leaves_per_domain[d-1] - last_id = leaves[leaf_count + n_leaves_per_domain[d]] - mesh.first_cell_by_domain[d] = mesh.first_cell_by_domain[d-1] + mesh.n_cells_by_domain[d-1] - mesh.n_cells_by_domain[d] = last_id - mesh.first_cell_by_domain[d] + 1 - mesh.tree.domain_ids[mesh.first_cell_by_domain[d]:last_id] .= d + last_id = leaves[n_leaves_per_rank[0]] + mesh.first_cell_by_rank[0] = 1 + mesh.n_cells_by_rank[0] = last_id + mesh.tree.mpi_ranks[1:last_id] .= 0 + for d in 1:(length(n_leaves_per_rank)-1) + leaf_count += n_leaves_per_rank[d-1] + last_id = leaves[leaf_count + n_leaves_per_rank[d]] + mesh.first_cell_by_rank[d] = mesh.first_cell_by_rank[d-1] + mesh.n_cells_by_rank[d-1] + mesh.n_cells_by_rank[d] = last_id - mesh.first_cell_by_rank[d] + 1 + mesh.tree.mpi_ranks[mesh.first_cell_by_rank[d]:last_id] .= d end - @assert all(x->x >= 0, mesh.tree.domain_ids[1:length(mesh.tree)]) - @assert sum(mesh.n_cells_by_domain) == length(mesh.tree) + @assert all(x->x >= 0, mesh.tree.mpi_ranks[1:length(mesh.tree)]) + @assert sum(mesh.n_cells_by_rank) == length(mesh.tree) return nothing end diff --git a/src/mesh/parallel.jl b/src/mesh/parallel.jl index f651cccf6cc..1069928fb78 100644 --- a/src/mesh/parallel.jl +++ b/src/mesh/parallel.jl @@ -51,7 +51,7 @@ function load_mesh(restart_filename, mpi_parallel::Val{true}) @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm()) @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), mpi_comm()) end - else # non-root domains + else # non-root ranks # Set domain information mesh.tree.center_level_0 = MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm()) mesh.tree.length_level_0 = MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm())[1] diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl index 978da841bc9..cac57149178 100644 --- a/src/mesh/parallel_tree.jl +++ b/src/mesh/parallel_tree.jl @@ -26,7 +26,7 @@ mutable struct ParallelTree{NDIMS} <: AbstractTree{NDIMS} levels::Vector{Int} coordinates::Matrix{Float64} original_cell_ids::Vector{Int} - domain_ids::Vector{Int} + mpi_ranks::Vector{Int} capacity::Int length::Int @@ -51,7 +51,7 @@ mutable struct ParallelTree{NDIMS} <: AbstractTree{NDIMS} t.levels = fill(typemin(Int), capacity + 1) t.coordinates = fill(NaN, NDIMS, capacity + 1) t.original_cell_ids = fill(typemin(Int), capacity + 1) - t.domain_ids = fill(typemin(Int), capacity + 1) + t.mpi_ranks = fill(typemin(Int), capacity + 1) t.capacity = capacity t.length = 0 @@ -99,7 +99,7 @@ function init!(t::ParallelTree, center::AbstractArray{Float64}, length::Real, pe t.levels[1] = 0 t.coordinates[:, 1] .= t.center_level_0 t.original_cell_ids[1] = 0 - t.domain_ids[1] = typemin(Int) + t.mpi_ranks[1] = typemin(Int) # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor if all(periodicity) @@ -137,7 +137,7 @@ function Base.show(io::IO, t::ParallelTree{NDIMS}) where NDIMS println(io, "t.levels[1:l] = $(t.levels[1:l])") println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") - println(io, "t.domain_ids[1:l] = $(t.domain_ids[1:l])") + println(io, "t.mpi_ranks[1:l] = $(t.mpi_ranks[1:l])") println(io, "t.capacity = $(t.capacity)") println(io, "t.length = $(t.length)") println(io, "t.dummy = $(t.dummy)") @@ -170,8 +170,8 @@ has_child(t::ParallelTree, cell_id::Int, child::Int) = t.child_ids[child, cell_i # Check if cell has a neighbor at the same refinement level in the given direction has_neighbor(t::ParallelTree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0 -# Check if cell is own cell, i.e., belongs to this MPI domain -is_own_cell(t::ParallelTree, cell_id) = t.domain_ids[cell_id] == domain_id() +# Check if cell is own cell, i.e., belongs to this MPI rank +is_own_cell(t::ParallelTree, cell_id) = t.mpi_ranks[cell_id] == mpi_rank() # Check if cell has a coarse neighbor, i.e., with one refinement level lower function has_coarse_neighbor(t::ParallelTree, cell_id::Int, direction::Int) @@ -285,13 +285,13 @@ end leaf_cells(t::ParallelTree) = filter_leaf_cells((cell_id)->true, t) -# Return an array with the ids of all leaf cells for a given domain -leaf_cells_by_domain(t::ParallelTree, domain_id) = filter_leaf_cells(t) do cell_id - t.domain_ids[cell_id] == domain_id - end +# Return an array with the ids of all leaf cells for a given rank +leaf_cells_by_rank(t::ParallelTree, rank) = filter_leaf_cells(t) do cell_id + t.mpi_ranks[cell_id] == rank + end # Return an array with the ids of all local leaf cells -local_leaf_cells(t::ParallelTree) = leaf_cells_by_domain(t, domain_id()) +local_leaf_cells(t::ParallelTree) = leaf_cells_by_rank(t, mpi_rank()) # Count the number of leaf cells. @@ -443,7 +443,7 @@ function refine_unbalanced!(t::ParallelTree, cell_ids) t.coordinates[:, child_id] .= child_coordinates( t, t.coordinates[:, cell_id], length_at_cell(t, cell_id), child) t.original_cell_ids[child_id] = 0 - t.domain_ids[child_id] = t.domain_ids[cell_id] + t.mpi_ranks[child_id] = t.mpi_ranks[cell_id] # For determining neighbors, use neighbor connections of parent cell for direction in 1:n_directions(t) @@ -689,7 +689,7 @@ function invalidate!(t::ParallelTree, first::Int, last::Int) t.levels[first:last] .= typemin(Int) t.coordinates[:, first:last] .= NaN t.original_cell_ids[first:last] .= typemin(Int) - t.domain_ids[first:last] .= typemin(Int) + t.mpi_ranks[first:last] .= typemin(Int) return nothing end @@ -815,7 +815,7 @@ function raw_copy!(target::ParallelTree, source::ParallelTree, first::Int, last: copy_data!(target.levels, source.levels, first, last, destination) copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) - copy_data!(target.domain_ids, source.domain_ids, first, last, destination) + copy_data!(target.mpi_ranks, source.mpi_ranks, first, last, destination) end @@ -827,7 +827,7 @@ function reset_data_structures!(t::ParallelTree{NDIMS}) where NDIMS t.levels = Vector{Int}(undef, t.capacity + 1) t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) - t.domain_ids = Vector{Int}(undef, t.capacity + 1) + t.mpi_ranks = Vector{Int}(undef, t.capacity + 1) invalidate!(t, 1, capacity(t) + 1) end diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index 9f0d2501e39..c5c47023fdc 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -24,19 +24,19 @@ const MPI_IS_ROOT = Ref(true) @inline mpi_comm() = MPI.COMM_WORLD -@inline domain_id(comm) = MPI.Comm_rank(comm) -@inline domain_id() = MPI_RANK[] +@inline mpi_rank(comm) = MPI.Comm_rank(comm) +@inline mpi_rank() = MPI_RANK[] -@inline n_domains(comm) = MPI.Comm_size(comm) -@inline n_domains() = MPI_SIZE[] +@inline n_mpi_ranks(comm) = MPI.Comm_size(comm) +@inline n_mpi_ranks() = MPI_SIZE[] -@inline is_parallel(comm) = n_domains(comm) > 1 +@inline is_parallel(comm) = n_mpi_ranks(comm) > 1 @inline is_parallel() = MPI_IS_PARALLEL[] @inline is_serial(comm) = !is_parallel(comm) @inline is_serial() = MPI_IS_SERIAL[] -@inline is_mpi_root(comm) = is_serial() || domain_id(comm) == 0 +@inline is_mpi_root(comm) = is_serial() || mpi_rank(comm) == 0 @inline is_mpi_root() = MPI_IS_ROOT[] @inline mpi_root() = 0 diff --git a/src/run.jl b/src/run.jl index d72e159fc6a..8464511142a 100644 --- a/src/run.jl +++ b/src/run.jl @@ -207,8 +207,8 @@ function init_simulation() | time integration: $(get_name(time_integration_function)) | restart interval: $restart_interval | solution interval: $solution_interval - | #MPI domains: $(n_domains()) - | #threads/domain: $(Threads.nthreads()) + | #MPI ranks: $(n_mpi_ranks()) + | #threads/rank: $(Threads.nthreads()) | | Solver (local) | | solver: $solver_name diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 77ffaea5ac4..0baef452f28 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -70,13 +70,13 @@ mutable struct Dg2D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, amr_alpha_min::Float64 amr_alpha_smooth::Bool - mpi_neighbor_domain_ids::Vector{Int} + mpi_neighbor_ranks::Vector{Int} mpi_neighbor_interfaces::Vector{Vector{Int}} mpi_send_buffers::Vector{Vector{Float64}} mpi_recv_buffers::Vector{Vector{Float64}} mpi_send_requests::Vector{MPI.Request} mpi_recv_requests::Vector{MPI.Request} - n_elements_by_domain::OffsetArray{Int, 1, Array{Int, 1}} + n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}} n_elements_global::Int first_element_global_id::Int @@ -216,7 +216,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Set up MPI neighbor connectivity and communication data structures if is_parallel() - (mpi_neighbor_domain_ids, + (mpi_neighbor_ranks, mpi_neighbor_interfaces) = init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh) (mpi_send_buffers, mpi_recv_buffers, @@ -225,12 +225,12 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v Val(NDIMS), Val(NVARS), Val(POLYDEG)) # Determine local and total number of elements - n_elements_by_domain = Vector{Int}(undef, n_domains()) - n_elements_by_domain[domain_id() + 1] = n_elements - MPI.Allgather!(n_elements_by_domain, 1, mpi_comm()) - n_elements_by_domain = OffsetArray(n_elements_by_domain, 0:(n_domains() - 1)) + n_elements_by_rank = Vector{Int}(undef, n_mpi_ranks()) + n_elements_by_rank[mpi_rank() + 1] = n_elements + MPI.Allgather!(n_elements_by_rank, 1, mpi_comm()) + n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(n_mpi_ranks() - 1)) n_elements_global = MPI.Allreduce(n_elements, +, mpi_comm()) - @assert n_elements_global == sum(n_elements_by_domain) "error in total number of elements" + @assert n_elements_global == sum(n_elements_by_rank) "error in total number of elements" # Determine the global element id of the first element first_element_global_id = MPI.Exscan(n_elements, +, mpi_comm()) @@ -242,13 +242,13 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v first_element_global_id += 1 end else - mpi_neighbor_domain_ids = Int[] + mpi_neighbor_ranks = Int[] mpi_neighbor_interfaces = Vector{Int}[] mpi_send_buffers = Vector{Float64}[] mpi_recv_buffers = Vector{Float64}[] mpi_send_requests = MPI.Request[] mpi_recv_requests = MPI.Request[] - n_elements_by_domain = OffsetArray([n_elements], 0:0) + n_elements_by_rank = OffsetArray([n_elements], 0:0) n_elements_global = n_elements first_element_global_id = 1 end @@ -313,9 +313,9 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v analysis_quantities, save_analysis, analysis_filename, shock_indicator_variable, shock_alpha_max, shock_alpha_min, shock_alpha_smooth, amr_indicator, amr_alpha_max, amr_alpha_min, amr_alpha_smooth, - mpi_neighbor_domain_ids, mpi_neighbor_interfaces, + mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests, - n_elements_by_domain, n_elements_global, first_element_global_id, + n_elements_by_rank, n_elements_global, first_element_global_id, element_variables, cache, thread_cache, initial_state_integrals) @@ -377,7 +377,7 @@ function count_required_interfaces(mesh::TreeMesh2D, cell_ids) continue end - # Skip if neighbor is on different domain -> create MPI interface instead + # Skip if neighbor is on different rank -> create MPI interface instead if is_parallel() && !is_own_cell(mesh.tree, neighbor_cell_id) continue end @@ -583,7 +583,7 @@ function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh2D) continue end - # Skip if neighbor is on different domain -> create MPI interface instead + # Skip if neighbor is on different rank -> create MPI interface instead if is_parallel() && !is_own_cell(mesh.tree, neighbor_cell_id) continue end diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index c72461f26ce..b76ddc1a191 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -71,7 +71,7 @@ function count_required_mpi_interfaces(mesh::TreeMesh2D, cell_ids) continue end - # Skip if neighbor is on this domain -> create regular interface instead + # Skip if neighbor is on this rank -> create regular interface instead if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) continue end @@ -98,7 +98,7 @@ end function start_mpi_receive!(dg::Dg2D) - for (index, d) in enumerate(dg.mpi_neighbor_domain_ids) + for (index, d) in enumerate(dg.mpi_neighbor_ranks) dg.mpi_recv_requests[index] = MPI.Irecv!(dg.mpi_recv_buffers[index], d, d, mpi_comm()) end end @@ -127,7 +127,7 @@ function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMe continue end - # Skip if neighbor is on this domain -> create regular interface instead + # Skip if neighbor is on this MPI rank -> create regular interface instead if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) continue end @@ -156,16 +156,15 @@ function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMe end -# Initialize connectivity between MPI neighbor domains +# Initialize connectivity between MPI neighbor ranks function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh2D) tree = mesh.tree - # Determine neighbor domains and sides for MPI interfaces - neighbor_domain_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) + # Determine neighbor ranks and sides for MPI interfaces + neighbor_ranks = fill(-1, nmpiinterfaces(mpi_interfaces)) # The global interface id is the smaller of the (globally unique) neighbor cell ids, multiplied by # number of directions (2 * ndims) plus direction minus one global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces)) - my_domain_id = domain_id() for interface_id in 1:nmpiinterfaces(mpi_interfaces) orientation = mpi_interfaces.orientations[interface_id] remote_side = mpi_interfaces.remote_sides[interface_id] @@ -186,7 +185,7 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh local_element_id = mpi_interfaces.local_element_ids[interface_id] local_cell_id = elements.cell_ids[local_element_id] remote_cell_id = tree.neighbor_ids[direction, local_cell_id] - neighbor_domain_ids[interface_id] = tree.domain_ids[remote_cell_id] + neighbor_ranks[interface_id] = tree.mpi_ranks[remote_cell_id] if local_cell_id < remote_cell_id global_interface_ids[interface_id] = 2 * ndims(tree) * local_cell_id + direction - 1 else @@ -195,24 +194,24 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh::TreeMesh end end - # Get sorted, unique neighbor domain ids - mpi_neighbor_domain_ids = unique(sort(neighbor_domain_ids)) + # Get sorted, unique neighbor ranks + mpi_neighbor_ranks = unique(sort(neighbor_ranks)) # Sort interfaces by global interface id p = sortperm(global_interface_ids) - neighbor_domain_ids .= neighbor_domain_ids[p] + neighbor_ranks .= neighbor_ranks[p] interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p] - # For each neighbor domain id, init connectivity data structures - mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_domain_ids)) - for (index, d) in enumerate(mpi_neighbor_domain_ids) - mpi_neighbor_interfaces[index] = interface_ids[findall(x->(x == d), neighbor_domain_ids)] + # For each neighbor rank, init connectivity data structures + mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks)) + for (index, d) in enumerate(mpi_neighbor_ranks) + mpi_neighbor_interfaces[index] = interface_ids[findall(x->(x == d), neighbor_ranks)] end # Sanity check that we counted all interfaces exactly once @assert sum(length(v) for v in mpi_neighbor_interfaces) == nmpiinterfaces(mpi_interfaces) - return mpi_neighbor_domain_ids, mpi_neighbor_interfaces + return mpi_neighbor_ranks, mpi_neighbor_interfaces end @@ -267,7 +266,7 @@ end function start_mpi_send!(dg::Dg2D) data_size = nvariables(dg) * nnodes(dg)^(ndims(dg) - 1) - for d in 1:length(dg.mpi_neighbor_domain_ids) + for d in 1:length(dg.mpi_neighbor_ranks) send_buffer = dg.mpi_send_buffers[d] for (index, s) in enumerate(dg.mpi_neighbor_interfaces[d]) @@ -283,8 +282,8 @@ function start_mpi_send!(dg::Dg2D) end # Start sending - for (index, d) in enumerate(dg.mpi_neighbor_domain_ids) - dg.mpi_send_requests[index] = MPI.Isend(dg.mpi_send_buffers[index], d, domain_id(), mpi_comm()) + for (index, d) in enumerate(dg.mpi_neighbor_ranks) + dg.mpi_send_requests[index] = MPI.Isend(dg.mpi_send_buffers[index], d, mpi_rank(), mpi_comm()) end end @@ -379,7 +378,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso " PID: " * @sprintf("%10.8e s", runtime_relative)) mpi_println(" sim. time: " * @sprintf("%10.8e", time) * " " * - " PID × #domains: " * @sprintf("%10.8e s", runtime_relative * n_domains())) + " PID × #ranks: " * @sprintf("%10.8e s", runtime_relative * n_mpi_ranks())) # Level information (only show for AMR) if parameter("amr_interval", 0)::Int > 0 && is_mpi_root() @@ -664,7 +663,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso end -# OBS! Global results are only calculated on root domain +# OBS! Global results are only calculated on MPI root function calc_error_norms(func, dg::Dg2D, t, uses_mpi::Val{true}) l2_error, linf_error = calc_error_norms(func, dg, t, Val(false)) @@ -699,7 +698,7 @@ function calc_mhd_solenoid_condition(dg::Dg2D, t, mpi_parallel::Val{true}) end -# OBS! Global results are only calculated on root domain +# OBS! Global results are only calculated on MPI root function integrate(func, dg::Dg2D, uses_mpi::Val{true}, args...; normalize=true) integral = integrate(func, dg, Val(false), args...; normalize=normalize) integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm()) From 6444e9fa40e9d4b3a42b93b3896a05520605d032 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 26 Sep 2020 06:03:59 +0200 Subject: [PATCH 38/81] Collect all MPI initialization in `init_mpi()` --- src/Trixi.jl | 15 --------------- src/parallel/parallel.jl | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 18a9372e472..8a618f3e5d0 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -62,22 +62,7 @@ export examples_dir, get_examples, default_example function __init__() - # Initialize MPI init_mpi() - - # Initialize global MPI state - MPI_RANK[] = MPI.Comm_rank(MPI.COMM_WORLD) - MPI_SIZE[] = MPI.Comm_size(MPI.COMM_WORLD) - MPI_IS_PARALLEL[] = MPI_SIZE[] > 1 - MPI_IS_SERIAL[] = !MPI_IS_PARALLEL[] - MPI_IS_ROOT[] = MPI_IS_SERIAL[] || MPI_RANK[] == 0 - - # Initialize methods for dispatching on parallel execution - if MPI_IS_PARALLEL[] - eval(:(mpi_parallel() = Val(true))) - else - eval(:(mpi_parallel() = Val(false))) - end end diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index c5c47023fdc..738b045f237 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -5,16 +5,37 @@ Initialize MPI by calling `MPI.Initialized()`. The function will check if MPI is and if yes, do nothing, thus it is safe to call it multiple times. """ function init_mpi() + if MPI_INITIALIZED[] + return nothing + end + if !MPI.Initialized() # MPI.THREAD_FUNNELED: Only main thread makes MPI calls provided = MPI.Init_thread(MPI.THREAD_FUNNELED) @assert provided >= MPI.THREAD_FUNNELED "MPI library with insufficient threading support" end + # Initialize global MPI state + MPI_RANK[] = MPI.Comm_rank(MPI.COMM_WORLD) + MPI_SIZE[] = MPI.Comm_size(MPI.COMM_WORLD) + MPI_IS_PARALLEL[] = MPI_SIZE[] > 1 + MPI_IS_SERIAL[] = !MPI_IS_PARALLEL[] + MPI_IS_ROOT[] = MPI_IS_SERIAL[] || MPI_RANK[] == 0 + + # Initialize methods for dispatching on parallel execution + if MPI_IS_PARALLEL[] + eval(:(mpi_parallel() = Val(true))) + else + eval(:(mpi_parallel() = Val(false))) + end + + MPI_INITIALIZED[] = true + return nothing end +const MPI_INITIALIZED = Ref(false) const MPI_RANK = Ref(-1) const MPI_SIZE = Ref(-1) const MPI_IS_PARALLEL = Ref(false) From d79cadfd035380205fe5a88ed1baa26f58629e21 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 26 Sep 2020 06:26:38 +0200 Subject: [PATCH 39/81] Fix several parallel I/O issues --- src/auxiliary/auxiliary.jl | 2 +- src/io/io.jl | 2 +- src/io/parallel.jl | 12 +++++------ src/run.jl | 44 ++++++++++++++++++-------------------- 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 6d3172670e7..14d10bb0234 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -132,7 +132,7 @@ function print_startup_message() ██║ ██║ ██║██║██╔╝ ██╗██║ ╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═╝╚═╝ """ - println(s) + mpi_println(s) end diff --git a/src/io/io.jl b/src/io/io.jl index 41337e393d8..7572b79958e 100644 --- a/src/io/io.jl +++ b/src/io/io.jl @@ -168,7 +168,7 @@ end # Save current mesh with some context information as an HDF5 file. -save_mesh_file(mesh, mpi_parallel) = save_mesh_file(mesh, -1, mpi_parallel) +save_mesh_file(mesh, timestep=-1) = save_mesh_file(mesh, timestep, mpi_parallel()) function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{false}) # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") diff --git a/src/io/parallel.jl b/src/io/parallel.jl index 441c6994ea6..c55f934e677 100644 --- a/src/io/parallel.jl +++ b/src/io/parallel.jl @@ -206,14 +206,9 @@ end # Save current mesh with some context information as an HDF5 file. function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{true}) - # Since the mesh is replicated on all ranks, only save from MPI root - if !is_mpi_root() - return - end - # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") - mkpath(output_directory) + is_mpi_root() && mkpath(output_directory) # Determine file name based on existence of meaningful time step if timestep >= 0 @@ -222,6 +217,11 @@ function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{true}) filename = joinpath(output_directory, "mesh") end + # Since the mesh is replicated on all ranks, only save from MPI root + if !is_mpi_root() + return filename * ".h5" + end + # Create output directory (if it does not exist) # Open file (clobber existing content) h5open(filename * ".h5", "w") do file diff --git a/src/run.jl b/src/run.jl index 8464511142a..358e762e2d5 100644 --- a/src/run.jl +++ b/src/run.jl @@ -70,9 +70,7 @@ end function init_simulation() # Print starup message - if is_mpi_root() - print_startup_message() - end + print_startup_message() # Get number of dimensions ndims_ = parameter("ndims")::Int @@ -85,32 +83,32 @@ function init_simulation() # Initialize mesh if restart - is_mpi_root() && print("Loading mesh... ") + mpi_print("Loading mesh... ") @timeit timer() "mesh loading" mesh = load_mesh(restart_filename) is_parallel() && MPI.Barrier(mpi_comm()) - is_mpi_root() && println("done") + mpi_println("done") else - is_mpi_root() && print("Creating mesh... ") + mpi_print("Creating mesh... ") @timeit timer() "mesh creation" mesh = generate_mesh() mesh.current_filename = save_mesh_file(mesh) mesh.unsaved_changes = false is_parallel() && MPI.Barrier(mpi_comm()) - is_mpi_root() && println("done") + mpi_println("done") end # Initialize system of equations - is_mpi_root() && print("Initializing system of equations... ") + mpi_print("Initializing system of equations... ") equations_name = parameter("equations") equations = make_equations(equations_name, ndims_) is_parallel() && MPI.Barrier(mpi_comm()) - is_mpi_root() && println("done") + mpi_println("done") # Initialize solver - is_mpi_root() && print("Initializing solver... ") + mpi_print("Initializing solver... ") solver_name = parameter("solver", valid=["dg"]) solver = make_solver(solver_name, equations, mesh) is_parallel() && MPI.Barrier(mpi_comm()) - is_mpi_root() && println("done") + mpi_println("done") # Sanity checks # If DG volume integral type is weak form, volume flux type must be flux_central, @@ -128,18 +126,18 @@ function init_simulation() adapt_initial_conditions = parameter("adapt_initial_conditions", true) adapt_initial_conditions_only_refine = parameter("adapt_initial_conditions_only_refine", true) if restart - is_mpi_root() && print("Loading restart file...") + mpi_print("Loading restart file...") time, step = load_restart_file!(solver, restart_filename) is_parallel() && MPI.Barrier(mpi_comm()) - is_mpi_root() && println("done") + mpi_println("done") else - is_mpi_root() && print("Applying initial conditions... ") + mpi_print("Applying initial conditions... ") t_start = parameter("t_start") time = t_start step = 0 set_initial_conditions!(solver, time) is_parallel() && MPI.Barrier(mpi_comm()) - is_mpi_root() && println("done") + mpi_println("done") # If AMR is enabled, adapt mesh and re-apply ICs if amr_interval > 0 && adapt_initial_conditions @@ -233,8 +231,8 @@ function init_simulation() | | minimum dx: $min_dx | | maximum dx: $max_dx """ - is_mpi_root() && println() - is_mpi_root() && println(s) + mpi_println() + mpi_println(s) # Set up main loop save_final_solution = parameter("save_final_solution", true) @@ -360,11 +358,11 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function analysis_start_time = time_ns() output_time = 0.0 n_analysis_timesteps = 0 - if finalstep && is_mpi_root() - println("-"^80) - println("Trixi simulation run finished. Final time: $time Time steps: $step") - println("-"^80) - println() + if finalstep + mpi_println("-"^80) + mpi_println("Trixi simulation run finished. Final time: $time Time steps: $step") + mpi_println("-"^80) + mpi_println() end elseif alive_interval > 0 && step % alive_interval == 0 && is_mpi_root() runtime_absolute = (time_ns() - loop_start_time) / 10^9 @@ -464,7 +462,7 @@ function convtest(parameters_file, iterations; parameters...) # Run trixi and extract errors for i = 1:iterations - is_mpi_root() && println(string("Running convtest iteration ", i, "/", iterations)) + mpi_println(string("Running convtest iteration ", i, "/", iterations)) l2_error, linf_error, variablenames = run(parameters_file; refinement_level_increment = i - 1, parameters...) From 5d3dbce979135b94c50bab1d16ccf423a368252f Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 26 Sep 2020 06:30:44 +0200 Subject: [PATCH 40/81] Move partition! to parallel.jl --- src/mesh/mesh.jl | 37 ------------------------------------- src/mesh/parallel.jl | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index bb56f4f0528..e6a83e24a2e 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -173,40 +173,3 @@ function get_restart_mesh_filename(restart_filename) # Construct and return filename return joinpath(dirname, mesh_file) end - - -# Partition mesh using a static domain decomposition algorithm based on leaf cell count alone -function partition!(mesh) - # Determine number of leaf cells per rank - leaves = leaf_cells(mesh.tree) - @assert length(leaves) > n_mpi_ranks() - n_leaves_per_rank = OffsetArray(fill(div(length(leaves), n_mpi_ranks()), n_mpi_ranks()), - 0:(n_mpi_ranks() - 1)) - for d in 0:(rem(length(leaves), n_mpi_ranks()) - 1) - n_leaves_per_rank[d] += 1 - end - @assert sum(n_leaves_per_rank) == length(leaves) - - # Assign MPI ranks to all cells such that all ancestors of each cell - if not yet assigned to a - # rank - belong to the same rank - mesh.first_cell_by_rank = similar(n_leaves_per_rank) - mesh.n_cells_by_rank = similar(n_leaves_per_rank) - - leaf_count = 0 - last_id = leaves[n_leaves_per_rank[0]] - mesh.first_cell_by_rank[0] = 1 - mesh.n_cells_by_rank[0] = last_id - mesh.tree.mpi_ranks[1:last_id] .= 0 - for d in 1:(length(n_leaves_per_rank)-1) - leaf_count += n_leaves_per_rank[d-1] - last_id = leaves[leaf_count + n_leaves_per_rank[d]] - mesh.first_cell_by_rank[d] = mesh.first_cell_by_rank[d-1] + mesh.n_cells_by_rank[d-1] - mesh.n_cells_by_rank[d] = last_id - mesh.first_cell_by_rank[d] + 1 - mesh.tree.mpi_ranks[mesh.first_cell_by_rank[d]:last_id] .= d - end - - @assert all(x->x >= 0, mesh.tree.mpi_ranks[1:length(mesh.tree)]) - @assert sum(mesh.n_cells_by_rank) == length(mesh.tree) - - return nothing -end diff --git a/src/mesh/parallel.jl b/src/mesh/parallel.jl index 1069928fb78..14671ee8f7a 100644 --- a/src/mesh/parallel.jl +++ b/src/mesh/parallel.jl @@ -1,3 +1,40 @@ +# Partition mesh using a static domain decomposition algorithm based on leaf cell count alone +function partition!(mesh) + # Determine number of leaf cells per rank + leaves = leaf_cells(mesh.tree) + @assert length(leaves) > n_mpi_ranks() + n_leaves_per_rank = OffsetArray(fill(div(length(leaves), n_mpi_ranks()), n_mpi_ranks()), + 0:(n_mpi_ranks() - 1)) + for d in 0:(rem(length(leaves), n_mpi_ranks()) - 1) + n_leaves_per_rank[d] += 1 + end + @assert sum(n_leaves_per_rank) == length(leaves) + + # Assign MPI ranks to all cells such that all ancestors of each cell - if not yet assigned to a + # rank - belong to the same rank + mesh.first_cell_by_rank = similar(n_leaves_per_rank) + mesh.n_cells_by_rank = similar(n_leaves_per_rank) + + leaf_count = 0 + last_id = leaves[n_leaves_per_rank[0]] + mesh.first_cell_by_rank[0] = 1 + mesh.n_cells_by_rank[0] = last_id + mesh.tree.mpi_ranks[1:last_id] .= 0 + for d in 1:(length(n_leaves_per_rank)-1) + leaf_count += n_leaves_per_rank[d-1] + last_id = leaves[leaf_count + n_leaves_per_rank[d]] + mesh.first_cell_by_rank[d] = mesh.first_cell_by_rank[d-1] + mesh.n_cells_by_rank[d-1] + mesh.n_cells_by_rank[d] = last_id - mesh.first_cell_by_rank[d] + 1 + mesh.tree.mpi_ranks[mesh.first_cell_by_rank[d]:last_id] .= d + end + + @assert all(x->x >= 0, mesh.tree.mpi_ranks[1:length(mesh.tree)]) + @assert sum(mesh.n_cells_by_rank) == length(mesh.tree) + + return nothing +end + + function load_mesh(restart_filename, mpi_parallel::Val{true}) # Get number of spatial dimensions ndims_ = parameter("ndims") From f06da84daac3ce855dd9725bebd374c0b7c9fa71 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 26 Sep 2020 06:40:39 +0200 Subject: [PATCH 41/81] Fix several MPI calls --- src/run.jl | 11 ++++------- src/solvers/dg/2d/parallel.jl | 6 +++--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/run.jl b/src/run.jl index 358e762e2d5..454286f1fd3 100644 --- a/src/run.jl +++ b/src/run.jl @@ -320,9 +320,7 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function resid = maximum(abs, view(solver.elements.u_t, 1, .., :)) if is_parallel() - resid_buffer = [resid] - MPI.Allreduce!(resid_buffer, max, mpi_comm()) - resid = resid_buffer[1] + resid = MPI.Allreduce!(Ref(resid), max, mpi_comm())[] end if resid <= solver.equations.resid_tol @@ -340,11 +338,10 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function if analysis_interval > 0 && (step % analysis_interval == 0 || finalstep) # Calculate absolute and relative runtime if is_parallel() - total_dofs = ndofs(solver) + total_dofs = MPI.Reduce!(Ref(ndofs(solver)), +, mpi_root(), mpi_comm()) + total_dofs = is_mpi_root() ? total_dofs[] : -1 else - dofs_buffer = [ndofs(solver)] - MPI.Reduce!(dofs_buffer, +, mpi_root(), mpi_comm()) - total_dofs = dofs_buffer[1] + total_dofs = ndofs(solver) end runtime_absolute = (time_ns() - loop_start_time) / 10^9 runtime_relative = ((time_ns() - analysis_start_time - output_time) / 10^9 / diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index b76ddc1a191..3a279a4be18 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -471,9 +471,9 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso for v in 1:nvariables(equation) # Calculate maximum absolute value of Uₜ res = maximum(abs, view(dg.elements.u_t, v, :, :, :)) - res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm())[] - is_mpi_root() && @printf(" % 10.8e", res) - is_mpi_root() && dg.save_analysis && @printf(f, " % 10.8e", res) + res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm()) + is_mpi_root() && @printf(" % 10.8e", res[]) + is_mpi_root() && dg.save_analysis && @printf(f, " % 10.8e", res[]) end mpi_println() end From d43f3d4b3dd3d0a785aedf4c1ff70d266dd5fa41 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 26 Sep 2020 06:43:44 +0200 Subject: [PATCH 42/81] Parallel output fix --- src/run.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/run.jl b/src/run.jl index 454286f1fd3..c463135b120 100644 --- a/src/run.jl +++ b/src/run.jl @@ -324,12 +324,12 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function end if resid <= solver.equations.resid_tol - println() - println("-"^80) - println(" Steady state tolerance of ", solver.equations.resid_tol, - " reached at time ", time) - println("-"^80) - println() + mpi_println() + mpi_println("-"^80) + mpi_println(" Steady state tolerance of ", solver.equations.resid_tol, + " reached at time ", time) + mpi_println("-"^80) + mpi_println() finalstep = true end end From db34d04e29d62625ff82d2f5c7eeea07d261e9b9 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 26 Sep 2020 11:22:43 +0200 Subject: [PATCH 43/81] If MPI is already initialized, query for sufficient threading support --- src/parallel/parallel.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index 738b045f237..188b700698e 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -9,7 +9,9 @@ function init_mpi() return nothing end - if !MPI.Initialized() + if MPI.Initialized() + @assert MPI.Query_thread() >= MPI.THREAD_FUNNELED "MPI already initialized with insufficient threading support" + else # MPI.THREAD_FUNNELED: Only main thread makes MPI calls provided = MPI.Init_thread(MPI.THREAD_FUNNELED) @assert provided >= MPI.THREAD_FUNNELED "MPI library with insufficient threading support" From 2ecc439d9c5a9278c39bbf93dbf735ec915f6e36 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 26 Sep 2020 18:32:24 +0200 Subject: [PATCH 44/81] Split calc_dt in serial and parallel version --- src/solvers/dg/2d/dg.jl | 9 ++------- src/solvers/dg/2d/parallel.jl | 9 +++++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 0baef452f28..601120ecd28 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -2379,7 +2379,8 @@ end # Calculate stable time step size -function calc_dt(dg::Dg2D, cfl) +@inline calc_dt(dg, cfl) = calc_dt(dg, cfl, uses_mpi(dg)) +function calc_dt(dg::Dg2D, cfl, uses_mpi::Val{false}) min_dt = Inf for element_id in 1:dg.n_elements dt = calc_max_dt(dg.elements.u, element_id, @@ -2387,12 +2388,6 @@ function calc_dt(dg::Dg2D, cfl) min_dt = min(min_dt, dt) end - if is_parallel() - min_dt_buffer = [min_dt] - MPI.Allreduce!(min_dt_buffer, min, mpi_comm()) - min_dt = min_dt_buffer[1] - end - return min_dt end diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index 3a279a4be18..7ca3a1080ce 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -705,3 +705,12 @@ function integrate(func, dg::Dg2D, uses_mpi::Val{true}, args...; normalize=true) return is_mpi_root() ? integral[] : integral end + + +# Calculate stable time step size +function calc_dt(dg::Dg2D, cfl, uses_mpi::Val{true}) + min_dt = calc_dt(dg, cfl, Val(false)) + min_dt = MPI.Allreduce!(Ref(min_dt), min, mpi_comm())[] + + return min_dt +end From bcc48019c639bfb17c693a3f80fc1c8df1f16d14 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 27 Sep 2020 07:36:22 +0200 Subject: [PATCH 45/81] Fix AMR & allow shock capturing without smoothing in parallel --- src/solvers/dg/2d/amr.jl | 8 ++-- src/solvers/dg/2d/dg.jl | 96 ++++++++++++++++++++++------------------ src/solvers/dg/3d/amr.jl | 8 ++-- 3 files changed, 62 insertions(+), 50 deletions(-) diff --git a/src/solvers/dg/2d/amr.jl b/src/solvers/dg/2d/amr.jl index 52770ebde23..e0ddd09382b 100644 --- a/src/solvers/dg/2d/amr.jl +++ b/src/solvers/dg/2d/amr.jl @@ -1,8 +1,8 @@ # This file contains functions that are related to the AMR capabilities of the DG solver # Refine elements in the DG solver based on a list of cell_ids that should be refined -function refine!(dg::Dg2D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - cells_to_refine::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function refine!(dg::Dg2D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + cells_to_refine::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(cells_to_refine) return @@ -124,8 +124,8 @@ end # Coarsen elements in the DG solver based on a list of cell_ids that should be removed -function coarsen!(dg::Dg2D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function coarsen!(dg::Dg2D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(child_cells_to_coarsen) return diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 601120ecd28..ee3a5dac5fe 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -2395,6 +2395,12 @@ end function calc_blending_factors!(alpha, alpha_pre_smooth, u, alpha_max, alpha_min, do_smoothing, indicator_variable, thread_cache, dg::Dg2D) + calc_blending_factors!(alpha, alpha_pre_smooth, u, alpha_max, alpha_min, do_smoothing, + indicator_variable, thread_cache, dg, uses_mpi(dg)) +end +function calc_blending_factors!(alpha, alpha_pre_smooth, u, + alpha_max, alpha_min, do_smoothing, + indicator_variable, thread_cache, dg::Dg2D, uses_mpi::Val{false}) # temporary buffers @unpack indicator_threaded, modal_threaded, modal_tmp1_threaded = thread_cache # magic parameters @@ -2447,48 +2453,54 @@ function calc_blending_factors!(alpha, alpha_pre_smooth, u, end if (do_smoothing) - # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha - # Copy alpha values such that smoothing is indpedenent of the element access order - alpha_pre_smooth .= alpha - - # Loop over interfaces - for interface_id in 1:dg.n_interfaces - # Get neighboring element ids - left = dg.interfaces.neighbor_ids[1, interface_id] - right = dg.interfaces.neighbor_ids[2, interface_id] - - # Apply smoothing - alpha[left] = max(alpha_pre_smooth[left], 0.5 * alpha_pre_smooth[right], alpha[left]) - alpha[right] = max(alpha_pre_smooth[right], 0.5 * alpha_pre_smooth[left], alpha[right]) - end - - # Loop over L2 mortars - for l2mortar_id in 1:dg.n_l2mortars - # Get neighboring element ids - lower = dg.l2mortars.neighbor_ids[1, l2mortar_id] - upper = dg.l2mortars.neighbor_ids[2, l2mortar_id] - large = dg.l2mortars.neighbor_ids[3, l2mortar_id] - - # Apply smoothing - alpha[lower] = max(alpha_pre_smooth[lower], 0.5 * alpha_pre_smooth[large], alpha[lower]) - alpha[upper] = max(alpha_pre_smooth[upper], 0.5 * alpha_pre_smooth[large], alpha[upper]) - alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[lower], alpha[large]) - alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[upper], alpha[large]) - end - - # Loop over EC mortars - for ecmortar_id in 1:dg.n_ecmortars - # Get neighboring element ids - lower = dg.ecmortars.neighbor_ids[1, ecmortar_id] - upper = dg.ecmortars.neighbor_ids[2, ecmortar_id] - large = dg.ecmortars.neighbor_ids[3, ecmortar_id] - - # Apply smoothing - alpha[lower] = max(alpha_pre_smooth[lower], 0.5 * alpha_pre_smooth[large], alpha[lower]) - alpha[upper] = max(alpha_pre_smooth[upper], 0.5 * alpha_pre_smooth[large], alpha[upper]) - alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[lower], alpha[large]) - alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[upper], alpha[large]) - end + smooth_alpha!(alpha, alpha_pre_smooth, dg, uses_mpi) + end +end + + +smooth_alpha!(alpha, alpha_pre_smooth, dg::Dg2D) = smooth_alpha!(alpha, alpha_pre_smooth, dg, uses_mpi(dg)) +function smooth_alpha!(alpha, alpha_pre_smooth, dg::Dg2D, uses_mpi::Val{false}) + # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha + # Copy alpha values such that smoothing is indpedenent of the element access order + alpha_pre_smooth .= alpha + + # Loop over interfaces + for interface_id in 1:dg.n_interfaces + # Get neighboring element ids + left = dg.interfaces.neighbor_ids[1, interface_id] + right = dg.interfaces.neighbor_ids[2, interface_id] + + # Apply smoothing + alpha[left] = max(alpha_pre_smooth[left], 0.5 * alpha_pre_smooth[right], alpha[left]) + alpha[right] = max(alpha_pre_smooth[right], 0.5 * alpha_pre_smooth[left], alpha[right]) + end + + # Loop over L2 mortars + for l2mortar_id in 1:dg.n_l2mortars + # Get neighboring element ids + lower = dg.l2mortars.neighbor_ids[1, l2mortar_id] + upper = dg.l2mortars.neighbor_ids[2, l2mortar_id] + large = dg.l2mortars.neighbor_ids[3, l2mortar_id] + + # Apply smoothing + alpha[lower] = max(alpha_pre_smooth[lower], 0.5 * alpha_pre_smooth[large], alpha[lower]) + alpha[upper] = max(alpha_pre_smooth[upper], 0.5 * alpha_pre_smooth[large], alpha[upper]) + alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[lower], alpha[large]) + alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[upper], alpha[large]) + end + + # Loop over EC mortars + for ecmortar_id in 1:dg.n_ecmortars + # Get neighboring element ids + lower = dg.ecmortars.neighbor_ids[1, ecmortar_id] + upper = dg.ecmortars.neighbor_ids[2, ecmortar_id] + large = dg.ecmortars.neighbor_ids[3, ecmortar_id] + + # Apply smoothing + alpha[lower] = max(alpha_pre_smooth[lower], 0.5 * alpha_pre_smooth[large], alpha[lower]) + alpha[upper] = max(alpha_pre_smooth[upper], 0.5 * alpha_pre_smooth[large], alpha[upper]) + alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[lower], alpha[large]) + alpha[large] = max(alpha_pre_smooth[large], 0.5 * alpha_pre_smooth[upper], alpha[large]) end end diff --git a/src/solvers/dg/3d/amr.jl b/src/solvers/dg/3d/amr.jl index b05ccee0703..87cbf9e2bbe 100644 --- a/src/solvers/dg/3d/amr.jl +++ b/src/solvers/dg/3d/amr.jl @@ -1,8 +1,8 @@ # This file contains functions that are related to the AMR capabilities of the DG solver # Refine elements in the DG solver based on a list of cell_ids that should be refined -function refine!(dg::Dg3D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - cells_to_refine::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function refine!(dg::Dg3D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + cells_to_refine::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(cells_to_refine) return @@ -131,8 +131,8 @@ end # Coarsen elements in the DG solver based on a list of cell_ids that should be removed -function coarsen!(dg::Dg3D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function coarsen!(dg::Dg3D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(child_cells_to_coarsen) return From 3ce35b70f8bc562189677a0d542277dae4ac3a70 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 27 Sep 2020 08:11:01 +0200 Subject: [PATCH 46/81] Hopefully fix 3D simulation --- src/solvers/dg/3d/amr.jl | 2 ++ src/solvers/dg/3d/dg.jl | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/src/solvers/dg/3d/amr.jl b/src/solvers/dg/3d/amr.jl index 87cbf9e2bbe..f3f0fe6dee0 100644 --- a/src/solvers/dg/3d/amr.jl +++ b/src/solvers/dg/3d/amr.jl @@ -65,6 +65,7 @@ function refine!(dg::Dg3D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, # Update DG instance with new data dg.elements = elements dg.n_elements = n_elements + dg.n_elements_global = n_elements dg.interfaces = interfaces dg.n_interfaces = n_interfaces dg.boundaries = boundaries @@ -207,6 +208,7 @@ function coarsen!(dg::Dg3D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, # Update DG instance with new data dg.elements = elements dg.n_elements = n_elements + dg.n_elements_global = n_elements dg.interfaces = interfaces dg.n_interfaces = n_interfaces dg.boundaries = boundaries diff --git a/src/solvers/dg/3d/dg.jl b/src/solvers/dg/3d/dg.jl index 23571536b85..45acf099630 100644 --- a/src/solvers/dg/3d/dg.jl +++ b/src/solvers/dg/3d/dg.jl @@ -66,6 +66,8 @@ mutable struct Dg3D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, positivity_preserving_limiter_apply::Bool positivity_preserving_limiter_threshold::Float64 + n_elements_global::Int + element_variables::Dict{Symbol, Union{Vector{Float64}, Vector{Int}}} cache::Dict{Symbol, Any} thread_cache::Any # to make fully-typed output more readable @@ -168,6 +170,9 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v amr_indicator = Symbol(parameter("amr_indicator", "n/a", valid=["n/a", "gauss", "blob", "density_pulse", "sedov_self_gravity"])) + # Set global number of elements + n_elements_global = n_elements + # Initialize storage for element variables element_variables = Dict{Symbol, Union{Vector{Float64}, Vector{Int}}}() # maximum and minimum alpha for shock capturing @@ -246,6 +251,7 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v shock_indicator_variable, shock_alpha_max, shock_alpha_min, shock_alpha_smooth, amr_indicator, amr_alpha_max, amr_alpha_min, amr_alpha_smooth, positivity_preserving_limiter_apply, positivity_preserving_limiter_threshold, + n_elements_global, element_variables, cache, thread_cache, initial_state_integrals) From 8b1084c39125928ec32e0f8975a6a12404da06d9 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 27 Sep 2020 22:47:03 +0200 Subject: [PATCH 47/81] Add first documentation on how to run Trixi in parallel --- docs/make.jl | 1 + docs/src/parallelization.md | 99 +++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 docs/src/parallelization.md diff --git a/docs/make.jl b/docs/make.jl index 7e60a9fba79..c0dc5d00fef 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -40,6 +40,7 @@ makedocs( "Home" => "index.md", "Development" => "development.md", "Visualization" => "visualization.md", + "Parallelization" => "parallelization.md", "Style guide" => "styleguide.md", "GitHub & Git" => "github-git.md", "Reference" => [ diff --git a/docs/src/parallelization.md b/docs/src/parallelization.md new file mode 100644 index 00000000000..64843146b8b --- /dev/null +++ b/docs/src/parallelization.md @@ -0,0 +1,99 @@ +# Parallelization + +## Shared-memory parallelization with threads +Many compute-intensive loops in Trixi.jl are parallelized using the +[multi-threading](https://docs.julialang.org/en/v1/manual/multi-threading/) +support provided by Julia. You can recognize those loops by the +`Threads.@threads` macro prefixed to them, e.g., +```julia +Threads.@threads for element_id in 1:dg.n_elements + ... +end +``` +This will statically assign an equal iteration count to each available thread. + +To use multi-threading, you need to tell Julia at startup how many threads you +want to use by either setting the environment variable `JULIA_NUM_THREADS` or by +providing the `-t/--threads` command line argument. For example, to start Julia +with four threads, start Julia with +```bash +julia -t 4 +``` +If both the environment variable and the command line argument are specified at +the same time, the latter takes precedence. + + +## Distributed computing with MPI +In addition to the shared memory parallelization with multi-threading, Trixi.jl +supports distributed parallelism via +[MPI.jl](https://github.com/JuliaParallel/MPI.jl), which leverages the Message +Passing Interface (MPI). MPI.jl comes with its own MPI library binaries such +that there is no need to install MPI yourself. However, it is also possible to +instead use an existing MPI installation, which is recommended if you are +running MPI programs on a cluster or supercomputer +([see the MPI.jl docs](https://juliaparallel.github.io/MPI.jl/stable/configuration/) +to find out how to select the employed MPI library). + +To start Trixi in parallel with MPI, there are three options: + +1. **Run from the REPL with `mpiexec()`:** You can start a parallel execution directly from the + REPL by executing + ```julia + julia> using MPI + + julia> mpiexec() do cmd + run(`$cmd -n 3 $(Base.julia_cmd()) --project=. -e 'using Trixi; Trixi.run("examples/2d/parameters.toml")'`) + end + ``` + The parameter `-n 3` specifies that Trixi should run with three processes (or + *ranks* in MPI parlance) and should be adapted to your available + computing resources and problem size. The `$(Base.julia_cmd())` argument + ensures that Julia is executed in parallel with the same optimization level + etc. as you used for the REPL; if this is unnecessary or undesired, you can + also just use `julia`. Further, if you are not running Trixi from a local + clone but have installed it as a package, you need to omit the `--project=.`. +2. **Run from the command line with `mpiexecjl`:** Alternatively, you can + use the `mpiexecjl` script provided by MPI.jl, which allows you to start + Trixi in parallel directly from the command line. As a preparation, you need to + install the script *once* by running + ```julia + julia> using MPI + + julia> MPI.install_mpiexecjl(destdir="/somewhere/in/your/PATH") + ``` + Then, to execute a Trixi in parallel, execute the following command from your + command line: + ```bash + mpiexecjl -n 3 julia --project=. -e 'using Trixi; Trixi.run("examples/2d/parameters.toml")' + ``` +3. **Run interactively with `tmpi` (Linux/MacOS only):** If you are on a + Linux/macOS system, you have a third option which lets you run Julia in + parallel interactively from the REPL. This comes in handy especially during + development, as in contrast to the first two options, it allows to reuse the + compilation cache and thus facilitates much faster startup times after the + first execution. It requires [tmux](https://github.com/tmux/tmux) and the + [OpenMPI](https://www.open-mpi.org) library to be installed before, both of + which are usually available through a package manager. Once you have + installed both tools, you need to configure MPI.jl to use the OpenMPI for + your system, which is explained + [here](https://juliaparallel.github.io/MPI.jl/stable/configuration/#Using-a-system-provided-MPI). + Then, you can download and install the + [tmpi](https://github.com/Azrael3000/tmpi) + script by executing + ```bash + curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /somewhere/in/your/PATH/tmpi + ``` + Finally, you can start and control multiple Julia REPLs simultaneously by + running + ```bash + tmpi 3 julia --project=. + ``` + This will start Julia inside `tmux` three times and multiplexes all commands + you enter in one REPL to all other REPLs (try for yourself to understand what + it means). If you have no prior experience with `tmux`, handling the REPL + this way feels slightly weird in the beginning. However, there is a lot of + documentation for `tmux` + [available](https://github.com/tmux/tmux/wiki/Getting-Started) and once you + get the hang of it, developing Trixi in parallel becomes much smoother this + way. + From 40874dd746600bcf5bf2b1b5caffb0cfb2b5b68a Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 06:21:59 +0200 Subject: [PATCH 48/81] using MPI -> import MPI --- src/Trixi.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 8a618f3e5d0..9310902a011 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -22,7 +22,7 @@ using Random: seed! using EllipsisNotation using HDF5: h5open, attrs -using MPI # We use all symbols, but for now we always prefix with `MPI.`, e.g., `MPI.Init()` +import MPI using OffsetArrays: OffsetArray, OffsetVector using StaticArrays: @MVector, @SVector, MVector, MMatrix, MArray, SVector, SMatrix, SArray using TimerOutputs: @notimeit, @timeit, TimerOutput, print_timer, reset_timer! From ccc7056800e99fbbffd3b08847bed0909b325815 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 06:24:20 +0200 Subject: [PATCH 49/81] Simplify type hierarchy --- src/mesh/mesh.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index e6a83e24a2e..40258f6f92a 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -7,7 +7,7 @@ include("parallel.jl") # Composite type to hold the actual tree in addition to other mesh-related data # that is not strictly part of the tree. -mutable struct TreeMesh{TreeType<:AbstractTree{NDIMS} where NDIMS} +mutable struct TreeMesh{TreeType<:AbstractTree} tree::TreeType current_filename::String unsaved_changes::Bool From f69d3f875a089dc5d2d8610ce9c30dc5db637238 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 06:29:21 +0200 Subject: [PATCH 50/81] Reduce code duplication in `generate_mesh` Co-authored-by: Hendrik Ranocha --- src/mesh/mesh.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index 40258f6f92a..961620fd814 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -77,12 +77,12 @@ function generate_mesh() # Create mesh if is_parallel() - @timeit timer() "creation" mesh = TreeMesh(ParallelTree{ndims_}, n_cells_max, - domain_center, domain_length, periodicity) + tree_type = ParallelTree{ndims_} else - @timeit timer() "creation" mesh = TreeMesh(Tree{ndims_}, n_cells_max, domain_center, - domain_length, periodicity) + tree_type = Tree{ndims_} end + @timeit timer() "creation" mesh = TreeMesh(tree_type, n_cells_max, domain_center, + domain_length, periodicity) # Create initial refinement initial_refinement_level = parameter("initial_refinement_level") From cc29ac46682f57187e20078768bf6ea49de47739 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 08:38:54 +0200 Subject: [PATCH 51/81] Rename `Tree` -> `SerialTree` and move all generic tree functions to `AbstractTree` --- src/mesh/{tree.jl => abstract_tree.jl} | 339 +++------------ src/mesh/mesh.jl | 10 +- src/mesh/parallel_tree.jl | 549 +------------------------ src/mesh/serial_tree.jl | 265 ++++++++++++ src/solvers/dg/dg.jl | 2 +- 5 files changed, 325 insertions(+), 840 deletions(-) rename src/mesh/{tree.jl => abstract_tree.jl} (59%) create mode 100644 src/mesh/serial_tree.jl diff --git a/src/mesh/tree.jl b/src/mesh/abstract_tree.jl similarity index 59% rename from src/mesh/tree.jl rename to src/mesh/abstract_tree.jl index a9462e8df79..acb3516ca41 100644 --- a/src/mesh/tree.jl +++ b/src/mesh/abstract_tree.jl @@ -1,201 +1,62 @@ - -# Composite type that represents a NDIMS-dimensional tree. -# -# Implements everything required for AbstractContainer. -# -# Note: The way the data structures are set up and the way most algorithms -# work, it is *always* assumed that -# a) we have a balanced tree (= at most one level difference between -# neighboring cells, or 2:1 rule) -# b) we may not have all children (= some children may not exist) -# c) the tree is stored depth-first -# -# However, the way the refinement/coarsening algorithms are currently -# implemented, we only have fully refined cells. That is, a cell either has 2^NDIMS children or -# no children at all (= leaf cell). This restriction is also assumed at -# multiple positions in the refinement/coarsening algorithms. -# -# An exception to the 2:1 rule exists for the low-level `refine_unbalanced!` -# function, which is required for implementing level-wise refinement in a sane -# way. Also, depth-first ordering *might* not by guaranteed during -# refinement/coarsening operations. -mutable struct Tree{NDIMS} <: AbstractTree{NDIMS} - parent_ids::Vector{Int} - child_ids::Matrix{Int} - neighbor_ids::Matrix{Int} - levels::Vector{Int} - coordinates::Matrix{Float64} - original_cell_ids::Vector{Int} - - capacity::Int - length::Int - dummy::Int - - center_level_0::SVector{NDIMS, Float64} - length_level_0::Float64 - periodicity::NTuple{NDIMS, Bool} - - function Tree{NDIMS}(capacity::Integer) where NDIMS - # Verify that NDIMS is an integer - @assert NDIMS isa Integer - - # Create instance - t = new() - - # Initialize fields with defaults - # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations - t.parent_ids = fill(typemin(Int), capacity + 1) - t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) - t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1) - t.levels = fill(typemin(Int), capacity + 1) - t.coordinates = fill(NaN, NDIMS, capacity + 1) - t.original_cell_ids = fill(typemin(Int), capacity + 1) - - t.capacity = capacity - t.length = 0 - t.dummy = capacity + 1 - - t.center_level_0 = @SVector fill(NaN, NDIMS) - t.length_level_0 = NaN - - return t - end -end - - -# Constructor for passing the dimension as an argument -Tree(::Val{NDIMS}, args...) where NDIMS = Tree{NDIMS}(args...) - -# Create and initialize tree -function Tree{NDIMS}(capacity::Int, center::AbstractArray{Float64}, - length::Real, periodicity=true) where NDIMS - # Create instance - t = Tree{NDIMS}(capacity) - - # Initialize root cell - init!(t, center, length, periodicity) - - return t -end - -# Constructor accepting a single number as center (as opposed to an array) for 1D -Tree{1}(cap::Int, center::Real, len::Real, periodicity=true) = Tree{1}(cap, [convert(Float64, center)], len, periodicity) - - -# Clear tree with deleting data structures, store center and length, and create root cell -function init!(t::Tree, center::AbstractArray{Float64}, length::Real, periodicity=true) - clear!(t) - - # Set domain information - t.center_level_0 = center - t.length_level_0 = length - - # Create root cell - t.length += 1 - t.parent_ids[1] = 0 - t.child_ids[:, 1] .= 0 - t.levels[1] = 0 - t.coordinates[:, 1] .= t.center_level_0 - t.original_cell_ids[1] = 0 - - # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor - if all(periodicity) - # Also catches case where periodicity = true - t.neighbor_ids[:, 1] .= 1 - t.periodicity = ntuple(x->true, ndims(t)) - elseif !any(periodicity) - # Also catches case where periodicity = false - t.neighbor_ids[:, 1] .= 0 - t.periodicity = ntuple(x->false, ndims(t)) - else - # Default case if periodicity is an iterable - for dimension in 1:ndims(t) - if periodicity[dimension] - t.neighbor_ids[2 * dimension - 1, 1] = 1 - t.neighbor_ids[2 * dimension - 0, 1] = 1 - else - t.neighbor_ids[2 * dimension - 1, 1] = 0 - t.neighbor_ids[2 * dimension - 0, 1] = 0 - end - end - - t.periodicity = Tuple(periodicity) - end -end - - -# Convenience output for debugging -function Base.show(io::IO, t::Tree{NDIMS}) where NDIMS - l = t.length - println(io, '*'^20) - println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") - println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") - println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") - println(io, "t.levels[1:l] = $(t.levels[1:l])") - println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") - println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") - println(io, "t.capacity = $(t.capacity)") - println(io, "t.length = $(t.length)") - println(io, "t.dummy = $(t.dummy)") - println(io, "t.center_level_0 = $(t.center_level_0)") - println(io, "t.length_level_0 = $(t.length_level_0)") - println(io, '*'^20) -end +abstract type AbstractTree{NDIMS} <: AbstractContainer end # Type traits to obtain dimension -@inline Base.ndims(t::Type{Tree{NDIMS}}) where NDIMS = NDIMS -@inline Base.ndims(t::Tree) = ndims(typeof(t)) +@inline Base.ndims(::Type{AbstractTree{NDIMS}}) where NDIMS = NDIMS +@inline Base.ndims(t::AbstractTree{NDIMS}) where NDIMS = NDIMS # Auxiliary methods to allow semantic queries on the tree # Check whether cell has parent cell -has_parent(t::Tree, cell_id::Int) = t.parent_ids[cell_id] > 0 +has_parent(t::AbstractTree, cell_id::Int) = t.parent_ids[cell_id] > 0 # Count number of children for a given cell -n_children(t::Tree, cell_id::Int) = count(x -> (x > 0), @view t.child_ids[:, cell_id]) +n_children(t::AbstractTree, cell_id::Int) = count(x -> (x > 0), @view t.child_ids[:, cell_id]) # Check whether cell has any child cell -has_children(t::Tree, cell_id::Int) = n_children(t, cell_id) > 0 +has_children(t::AbstractTree, cell_id::Int) = n_children(t, cell_id) > 0 # Check whether cell is leaf cell -is_leaf(t::Tree, cell_id::Int) = !has_children(t, cell_id) +is_leaf(t::AbstractTree, cell_id::Int) = !has_children(t, cell_id) # Check whether cell has specific child cell -has_child(t::Tree, cell_id::Int, child::Int) = t.child_ids[child, cell_id] > 0 +has_child(t::AbstractTree, cell_id::Int, child::Int) = t.child_ids[child, cell_id] > 0 # Check if cell has a neighbor at the same refinement level in the given direction -has_neighbor(t::Tree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0 +has_neighbor(t::AbstractTree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0 # Check if cell has a coarse neighbor, i.e., with one refinement level lower -function has_coarse_neighbor(t::Tree, cell_id::Int, direction::Int) +function has_coarse_neighbor(t::AbstractTree, cell_id::Int, direction::Int) return has_parent(t, cell_id) && has_neighbor(t, t.parent_ids[cell_id], direction) end # Check if cell has any neighbor (same-level or lower-level) -function has_any_neighbor(t::Tree, cell_id::Int, direction::Int) +function has_any_neighbor(t::AbstractTree, cell_id::Int, direction::Int) return has_neighbor(t, cell_id, direction) || has_coarse_neighbor(t, cell_id, direction) end +# Check if cell is own cell, i.e., belongs to this MPI rank +is_own_cell(t::AbstractTree, cell_id) = true + # Return cell length for a given level -length_at_level(t::Tree, level::Int) = t.length_level_0 / 2^level +length_at_level(t::AbstractTree, level::Int) = t.length_level_0 / 2^level # Return cell length for a given cell -length_at_cell(t::Tree, cell_id::Int) = length_at_level(t, t.levels[cell_id]) +length_at_cell(t::AbstractTree, cell_id::Int) = length_at_level(t, t.levels[cell_id]) # Return minimum level of any leaf cell -minimum_level(t::Tree) = minimum(t.levels[leaf_cells(t)]) +minimum_level(t::AbstractTree) = minimum(t.levels[leaf_cells(t)]) # Return maximum level of any leaf cell -maximum_level(t::Tree) = maximum(t.levels[leaf_cells(t)]) +maximum_level(t::AbstractTree) = maximum(t.levels[leaf_cells(t)]) # Check if tree is periodic -isperiodic(t::Tree) = all(t.periodicity) -isperiodic(t::Tree, dimension) = t.periodicity[dimension] +isperiodic(t::AbstractTree) = all(t.periodicity) +isperiodic(t::AbstractTree, dimension) = t.periodicity[dimension] # Auxiliary methods for often-required calculations # Number of potential child cells -n_children_per_cell(::Tree{NDIMS}) where NDIMS = 2^NDIMS +n_children_per_cell(::AbstractTree{NDIMS}) where NDIMS = 2^NDIMS n_children_per_cell(dims::Integer) = 2^dims # Number of directions @@ -207,7 +68,7 @@ n_children_per_cell(dims::Integer) = 2^dims # 4 -> +y # 5 -> -z # 6 -> +z -n_directions(::Tree{NDIMS}) where NDIMS = 2 * NDIMS +n_directions(::AbstractTree{NDIMS}) where NDIMS = 2 * NDIMS # For a given direction, return its opposite direction # @@ -260,7 +121,7 @@ end # # The function `f` is passed the cell id of each leaf cell # as an argument. -function filter_leaf_cells(f, t::Tree) +function filter_leaf_cells(f, t::AbstractTree) filtered = Vector{Int}(undef, length(t)) count = 0 for cell_id in 1:length(t) @@ -275,21 +136,29 @@ end # Return an array with the ids of all leaf cells -leaf_cells(t::Tree) = filter_leaf_cells((cell_id)->true, t) +leaf_cells(t::AbstractTree) = filter_leaf_cells((cell_id)->true, t) + + +# Return an array with the ids of all leaf cells for a given rank +leaf_cells_by_rank(t::AbstractTree, rank) = leaf_cells(t) + + +# Return an array with the ids of all local leaf cells +local_leaf_cells(t::AbstractTree) = leaf_cells_by_rank(t, mpi_rank()) # Count the number of leaf cells. -count_leaf_cells(t::Tree) = length(leaf_cells(t)) +count_leaf_cells(t::AbstractTree) = length(leaf_cells(t)) # Store cell id in each cell to use for post-AMR analysis -function reset_original_cell_ids!(t::Tree) +function reset_original_cell_ids!(t::AbstractTree) t.original_cell_ids[1:length(t)] .= 1:length(t) end # Refine entire tree by one level -refine!(t::Tree) = refine!(t, leaf_cells(t)) +refine!(t::AbstractTree) = refine!(t, leaf_cells(t)) # Refine given cells and rebalance tree. @@ -298,7 +167,7 @@ refine!(t::Tree) = refine!(t, leaf_cells(t)) # otherwise the 2:1 rule would be violated, which can cause more # refinements. # Note 2: Rebalancing currently only considers *Cartesian* neighbors, not diagonal neighbors! -function refine!(t::Tree, cell_ids) +function refine!(t::AbstractTree, cell_ids) # Reset original cell ids such that each cell knows its current id reset_original_cell_ids!(t) @@ -328,7 +197,7 @@ end # Refine all leaf cells with coordinates in a given rectangular box -function refine_box!(t::Tree{NDIMS}, coordinates_min::AbstractArray{Float64}, +function refine_box!(t::AbstractTree{NDIMS}, coordinates_min::AbstractArray{Float64}, coordinates_max::AbstractArray{Float64}) where NDIMS for dim in 1:NDIMS @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." @@ -345,7 +214,7 @@ function refine_box!(t::Tree{NDIMS}, coordinates_min::AbstractArray{Float64}, end # Convenience method for 1D -function refine_box!(t::Tree{1}, coordinates_min::Real, coordinates_max::Real) +function refine_box!(t::AbstractTree{1}, coordinates_min::Real, coordinates_max::Real) return refine_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) end @@ -356,7 +225,7 @@ end # Note 2: The current algorithm assumes that a previous refinement step has # created level differences of at most 2. That is, before the previous # refinement step, the tree was balanced. -function rebalance!(t::Tree, refined_cell_ids) +function rebalance!(t::AbstractTree, refined_cell_ids) # Create buffer for newly refined cells to_refine = zeros(Int, n_directions(t) * length(refined_cell_ids)) count = 0 @@ -396,83 +265,14 @@ end # Refine given cells without rebalancing tree. # # Note: After a call to this method the tree may be unbalanced! -function refine_unbalanced!(t::Tree, cell_ids) - # Store actual ids refined cells (shifted due to previous insertions) - refined = zeros(Int, length(cell_ids)) - - # Loop over all cells that are to be refined - for (count, original_cell_id) in enumerate(sort(unique(cell_ids))) - # Determine actual cell id, taking into account previously inserted cells - n_children = n_children_per_cell(t) - cell_id = original_cell_id + (count - 1) * n_children - refined[count] = cell_id - - @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined" - - # Insert new cells directly behind parent (depth-first) - insert!(t, cell_id + 1, n_children) - - # Flip sign of refined cell such that we can easily find it later - t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id] - - # Initialize child cells - for child in 1:n_children - # Set child information based on parent - child_id = cell_id + child - t.parent_ids[child_id] = cell_id - t.child_ids[child, cell_id] = child_id - t.neighbor_ids[:, child_id] .= 0 - t.child_ids[:, child_id] .= 0 - t.levels[child_id] = t.levels[cell_id] + 1 - t.coordinates[:, child_id] .= child_coordinates( - t, t.coordinates[:, cell_id], length_at_cell(t, cell_id), child) - t.original_cell_ids[child_id] = 0 - - # For determining neighbors, use neighbor connections of parent cell - for direction in 1:n_directions(t) - # If neighbor is a sibling, establish one-sided connectivity - # Note: two-sided is not necessary, as each sibling will do this - if has_sibling(child, direction) - adjacent = adjacent_child(child, direction) - neighbor_id = cell_id + adjacent - - t.neighbor_ids[direction, child_id] = neighbor_id - continue - end - - # Skip if original cell does have no neighbor in direction - if !has_neighbor(t, cell_id, direction) - continue - end - - # Otherwise, check if neighbor has children - if not, skip again - neighbor_id = t.neighbor_ids[direction, cell_id] - if !has_children(t, neighbor_id) - continue - end - - # Check if neighbor has corresponding child and if yes, establish connectivity - adjacent = adjacent_child(child, direction) - if has_child(t, neighbor_id, adjacent) - neighbor_child_id = t.child_ids[adjacent, neighbor_id] - opposite = opposite_direction(direction) - - t.neighbor_ids[direction, child_id] = neighbor_child_id - t.neighbor_ids[opposite, neighbor_child_id] = child_id - end - end - end - end - - return refined -end +function refine_unbalanced!(t::AbstractTree, cell_ids) end # Wrap single-cell refinements such that `sort(...)` does not complain -refine_unbalanced!(t::Tree, cell_id::Int) = refine_unbalanced!(t, [cell_id]) +refine_unbalanced!(t::AbstractTree, cell_id::Int) = refine_unbalanced!(t, [cell_id]) # Coarsen entire tree by one level -function coarsen!(t::Tree) +function coarsen!(t::AbstractTree) # Special case: if there is only one cell (root), there is nothing to do if length(t) == 1 return Int[] @@ -491,7 +291,7 @@ end # was already refined. Since it is generally not desired that cells are # coarsened without specifically asking for it, these cells will then *not* be # coarsened. -function coarsen!(t::Tree, cell_ids::AbstractArray{Int}) +function coarsen!(t::AbstractTree, cell_ids::AbstractArray{Int}) # Return early if array is empty if length(cell_ids) == 0 return Int[] @@ -608,11 +408,11 @@ function coarsen!(t::Tree, cell_ids::AbstractArray{Int}) end # Wrap single-cell coarsening such that `sort(...)` does not complain -coarsen!(t::Tree, cell_id::Int) = coarsen!(t::Tree, [cell_id]) +coarsen!(t::AbstractTree, cell_id::Int) = coarsen!(t::AbstractTree, [cell_id]) # Coarsen all viable parent cells with coordinates in a given rectangular box -function coarsen_box!(t::Tree{NDIMS}, coordinates_min::AbstractArray{Float64}, +function coarsen_box!(t::AbstractTree{NDIMS}, coordinates_min::AbstractArray{Float64}, coordinates_max::AbstractArray{Float64}) where NDIMS for dim in 1:NDIMS @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." @@ -638,13 +438,13 @@ function coarsen_box!(t::Tree{NDIMS}, coordinates_min::AbstractArray{Float64}, end # Convenience method for 1D -function coarsen_box!(t::Tree{1}, coordinates_min::Real, coordinates_max::Real) +function coarsen_box!(t::AbstractTree{1}, coordinates_min::Real, coordinates_max::Real) return coarsen_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) end # Return coordinates of a child cell based on its relative position to the parent. -function child_coordinates(::Tree{NDIMS}, parent_coordinates, parent_length::Number, child::Int) where NDIMS +function child_coordinates(::AbstractTree{NDIMS}, parent_coordinates, parent_length::Number, child::Int) where NDIMS # Calculate length of child cells and set up data structure child_length = parent_length / 2 coordinates = MVector{NDIMS, Float64}(undef) @@ -661,26 +461,13 @@ end # Reset range of cells to values that are prone to cause errors as soon as they are used. # # Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible. -function invalidate!(t::Tree, first::Int, last::Int) - @assert first > 0 - @assert last <= t.capacity + 1 - - # Integer values are set to smallest negative value, floating point values to NaN - t.parent_ids[first:last] .= typemin(Int) - t.child_ids[:, first:last] .= typemin(Int) - t.neighbor_ids[:, first:last] .= typemin(Int) - t.levels[first:last] .= typemin(Int) - t.coordinates[:, first:last] .= NaN - t.original_cell_ids[first:last] .= typemin(Int) - - return nothing -end -invalidate!(t::Tree, id::Int) = invalidate!(t, id, id) -invalidate!(t::Tree) = invalidate!(t, 1, length(t)) +function invalidate!(t::AbstractTree, first::Int, last::Int) end +invalidate!(t::AbstractTree, id::Int) = invalidate!(t, id, id) +invalidate!(t::AbstractTree) = invalidate!(t, 1, length(t)) # Delete connectivity with parents/children/neighbors before cells are erased -function delete_connectivity!(t::Tree, first::Int, last::Int) +function delete_connectivity!(t::AbstractTree, first::Int, last::Int) @assert first > 0 @assert first <= last @assert last <= t.capacity + 1 @@ -716,7 +503,7 @@ end # Move connectivity with parents/children/neighbors after cells have been moved -function move_connectivity!(t::Tree, first::Int, last::Int, destination::Int) +function move_connectivity!(t::AbstractTree, first::Int, last::Int, destination::Int) @assert first > 0 @assert first <= last @assert last <= t.capacity + 1 @@ -788,26 +575,8 @@ end # Raw copy operation for ranges of cells. # # This method is used by the higher-level copy operations for AbstractContainer -function raw_copy!(target::Tree, source::Tree, first::Int, last::Int, destination::Int) - copy_data!(target.parent_ids, source.parent_ids, first, last, destination) - copy_data!(target.child_ids, source.child_ids, first, last, destination, - n_children_per_cell(target)) - copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, - destination, n_directions(target)) - copy_data!(target.levels, source.levels, first, last, destination) - copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) - copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) -end +function raw_copy!(target::AbstractTree, source::AbstractTree, first::Int, last::Int, destination::Int) end # Reset data structures by recreating all internal storage containers and invalidating all elements -function reset_data_structures!(t::Tree{NDIMS}) where NDIMS - t.parent_ids = Vector{Int}(undef, t.capacity + 1) - t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) - t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1) - t.levels = Vector{Int}(undef, t.capacity + 1) - t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) - t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) - - invalidate!(t, 1, capacity(t) + 1) -end +function reset_data_structures!(t::AbstractTree{NDIMS}) where NDIMS end diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index 961620fd814..6694bf343f8 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -1,7 +1,5 @@ -abstract type AbstractTree{NDIMS} <: AbstractContainer end -@inline Base.ndims(::AbstractTree{NDIMS}) where NDIMS = NDIMS - -include("tree.jl") +include("abstract_tree.jl") +include("serial_tree.jl") include("parallel_tree.jl") include("parallel.jl") @@ -79,7 +77,7 @@ function generate_mesh() if is_parallel() tree_type = ParallelTree{ndims_} else - tree_type = Tree{ndims_} + tree_type = SerialTree{ndims_} end @timeit timer() "creation" mesh = TreeMesh(tree_type, n_cells_max, domain_center, domain_length, periodicity) @@ -129,7 +127,7 @@ function load_mesh(restart_filename, mpi_parallel::Val{false}) n_cells_max = parameter("n_cells_max") # Create mesh - @timeit timer() "creation" mesh = TreeMesh(Tree{ndims_}, n_cells_max) + @timeit timer() "creation" mesh = TreeMesh(SerialTree{ndims_}, n_cells_max) # Determine mesh filename filename = get_restart_mesh_filename(restart_filename) diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl index cac57149178..62e46c5f550 100644 --- a/src/mesh/parallel_tree.jl +++ b/src/mesh/parallel_tree.jl @@ -1,5 +1,5 @@ -# Composite type that represents a NDIMS-dimensional tree. +# Composite type that represents a NDIMS-dimensional tree (parallel version). # # Implements everything required for AbstractContainer. # @@ -146,144 +146,10 @@ function Base.show(io::IO, t::ParallelTree{NDIMS}) where NDIMS println(io, '*'^20) end -# Type traits to obtain dimension -@inline Base.ndims(t::Type{ParallelTree{NDIMS}}) where NDIMS = NDIMS -@inline Base.ndims(t::ParallelTree) = ndims(typeof(t)) - - -# Auxiliary methods to allow semantic queries on the tree -# Check whether cell has parent cell -has_parent(t::ParallelTree, cell_id::Int) = t.parent_ids[cell_id] > 0 - -# Count number of children for a given cell -n_children(t::ParallelTree, cell_id::Int) = count(x -> (x > 0), @view t.child_ids[:, cell_id]) - -# Check whether cell has any child cell -has_children(t::ParallelTree, cell_id::Int) = n_children(t, cell_id) > 0 - -# Check whether cell is leaf cell -is_leaf(t::ParallelTree, cell_id::Int) = !has_children(t, cell_id) - -# Check whether cell has specific child cell -has_child(t::ParallelTree, cell_id::Int, child::Int) = t.child_ids[child, cell_id] > 0 - -# Check if cell has a neighbor at the same refinement level in the given direction -has_neighbor(t::ParallelTree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0 # Check if cell is own cell, i.e., belongs to this MPI rank is_own_cell(t::ParallelTree, cell_id) = t.mpi_ranks[cell_id] == mpi_rank() -# Check if cell has a coarse neighbor, i.e., with one refinement level lower -function has_coarse_neighbor(t::ParallelTree, cell_id::Int, direction::Int) - return has_parent(t, cell_id) && has_neighbor(t, t.parent_ids[cell_id], direction) -end - -# Check if cell has any neighbor (same-level or lower-level) -function has_any_neighbor(t::ParallelTree, cell_id::Int, direction::Int) - return has_neighbor(t, cell_id, direction) || has_coarse_neighbor(t, cell_id, direction) -end - -# Return cell length for a given level -length_at_level(t::ParallelTree, level::Int) = t.length_level_0 / 2^level - -# Return cell length for a given cell -length_at_cell(t::ParallelTree, cell_id::Int) = length_at_level(t, t.levels[cell_id]) - -# Return minimum level of any leaf cell -minimum_level(t::ParallelTree) = minimum(t.levels[leaf_cells(t)]) - -# Return maximum level of any leaf cell -maximum_level(t::ParallelTree) = maximum(t.levels[leaf_cells(t)]) - -# Check if tree is periodic -isperiodic(t::ParallelTree) = all(t.periodicity) -isperiodic(t::ParallelTree, dimension) = t.periodicity[dimension] - - -# Auxiliary methods for often-required calculations -# Number of potential child cells -n_children_per_cell(::ParallelTree{NDIMS}) where NDIMS = 2^NDIMS -# n_children_per_cell(dims::Integer) = 2^dims - -# Number of directions -# -# Directions are indicated by numbers from 1 to 2*ndims: -# 1 -> -x -# 2 -> +x -# 3 -> -y -# 4 -> +y -# 5 -> -z -# 6 -> +z -n_directions(::ParallelTree{NDIMS}) where NDIMS = 2 * NDIMS - -# For a given direction, return its opposite direction -# -# dir -> opp -# 1 -> 2 -# 2 -> 1 -# 3 -> 4 -# 4 -> 3 -# 5 -> 6 -# 6 -> 5 -# opposite_direction(direction::Int) = direction + 1 - 2 * ((direction + 1) % 2) - -# For a given child position (from 1 to 8) and dimension (from 1 to 3), -# calculate a child cell's position relative to its parent cell. -# -# Essentially calculates the following -# dim=1 dim=2 dim=3 -# child x y z -# 1 - - - -# 2 + - - -# 3 - + - -# 4 + + - -# 5 - - + -# 6 + - + -# 7 - + + -# 8 + + + -# child_sign(child::Int, dim::Int) = 1 - 2 * (div(child + 2^(dim - 1) - 1, 2^(dim-1)) % 2) - - -# For each child position (1 to 8) and a given direction (from 1 to 6), return -# neighboring child position. -# adjacent_child(child::Int, direction::Int) = [2 2 3 3 5 5; -# 1 1 4 4 6 6; -# 4 4 1 1 7 7; -# 3 3 2 2 8 8; -# 6 6 7 7 1 1; -# 5 5 8 8 2 2; -# 8 8 5 5 3 3; -# 7 7 6 6 4 4][child, direction] - - -# For each child position (1 to 8) and a given direction (from 1 to 6), return -# if neighbor is a sibling -# function has_sibling(child::Int, direction::Int) -# return (child_sign(child, div(direction + 1, 2)) * (-1)^(direction - 1)) > 0 -# end - - -# Obtain leaf cells that fulfill a given criterion. -# -# The function `f` is passed the cell id of each leaf cell -# as an argument. -function filter_leaf_cells(f, t::ParallelTree) - filtered = Vector{Int}(undef, length(t)) - count = 0 - for cell_id in 1:length(t) - if is_leaf(t, cell_id) && f(cell_id) - count += 1 - filtered[count] = cell_id - end - end - - return filtered[1:count] -end - - -# Return an array with the ids of all leaf cells -leaf_cells(t::ParallelTree) = filter_leaf_cells((cell_id)->true, t) - # Return an array with the ids of all leaf cells for a given rank leaf_cells_by_rank(t::ParallelTree, rank) = filter_leaf_cells(t) do cell_id @@ -294,121 +160,6 @@ leaf_cells_by_rank(t::ParallelTree, rank) = filter_leaf_cells(t) do cell_id local_leaf_cells(t::ParallelTree) = leaf_cells_by_rank(t, mpi_rank()) -# Count the number of leaf cells. -count_leaf_cells(t::ParallelTree) = length(leaf_cells(t)) - - -# Store cell id in each cell to use for post-AMR analysis -function reset_original_cell_ids!(t::ParallelTree) - t.original_cell_ids[1:length(t)] .= 1:length(t) -end - - -# Refine entire tree by one level -refine!(t::ParallelTree) = refine!(t, leaf_cells(t)) - - -# Refine given cells and rebalance tree. -# -# Note 1: Rebalancing is iterative, i.e., neighboring cells are refined if -# otherwise the 2:1 rule would be violated, which can cause more -# refinements. -# Note 2: Rebalancing currently only considers *Cartesian* neighbors, not diagonal neighbors! -function refine!(t::ParallelTree, cell_ids) - # Reset original cell ids such that each cell knows its current id - reset_original_cell_ids!(t) - - # Refine all requested cells - refined = refine_unbalanced!(t, cell_ids) - refinement_count = length(refined) - - # Iteratively rebalance the tree until it does not change anymore - while length(refined) > 0 - refined = rebalance!(t, refined) - refinement_count += length(refined) - end - - # Determine list of *original* cell ids that were refined - # Note: original_cell_ids contains the cell_id *before* refinement. At - # refinement, the refined cell's original_cell_ids value has its sign flipped - # to easily find it now. - @views refined_original_cells = ( - -t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0]) - - # Check if count of refinement cells matches information in original_cell_ids - @assert refinement_count == length(refined_original_cells) ( - "Mismatch in number of refined cells") - - return refined_original_cells -end - - -# Refine all leaf cells with coordinates in a given rectangular box -function refine_box!(t::ParallelTree{NDIMS}, coordinates_min::AbstractArray{Float64}, - coordinates_max::AbstractArray{Float64}) where NDIMS - for dim in 1:NDIMS - @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." - end - - # Find all leaf cells within box - cells = filter_leaf_cells(t) do cell_id - return (all(coordinates_min .< t.coordinates[:, cell_id]) && - all(coordinates_max .> t.coordinates[:, cell_id])) - end - - # Refine cells - refine!(t, cells) -end - -# Convenience method for 1D -function refine_box!(t::ParallelTree{1}, coordinates_min::Real, coordinates_max::Real) - return refine_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) -end - - -# For the given cell ids, check if neighbors need to be refined to restore a rebalanced tree. -# -# Note 1: Rebalancing currently only considers *Cartesian* neighbors, not diagonal neighbors! -# Note 2: The current algorithm assumes that a previous refinement step has -# created level differences of at most 2. That is, before the previous -# refinement step, the tree was balanced. -function rebalance!(t::ParallelTree, refined_cell_ids) - # Create buffer for newly refined cells - to_refine = zeros(Int, n_directions(t) * length(refined_cell_ids)) - count = 0 - - # Iterate over cell ids that have previously been refined - for cell_id in refined_cell_ids - # Go over all potential neighbors of child cell - for direction in 1:n_directions(t) - # Continue if refined cell has a neighbor in that direction - if has_neighbor(t, cell_id, direction) - continue - end - - # Continue if refined cell has no coarse neighbor, since that would - # mean it there is no neighbor in that direction at all (domain - # boundary) - if !has_coarse_neighbor(t, cell_id, direction) - continue - end - - # Otherwise, the coarse neighbor exists and is not refined, thus it must - # be marked for refinement - coarse_neighbor_id = t.neighbor_ids[direction, t.parent_ids[cell_id]] - count += 1 - to_refine[count] = coarse_neighbor_id - end - end - - # Finally, refine all marked cells... - refined = refine_unbalanced!(t, unique(to_refine[1:count])) - - # ...and return list of refined cells - return refined -end - - # Refine given cells without rebalancing tree. # # Note: After a call to this method the tree may be unbalanced! @@ -484,196 +235,6 @@ function refine_unbalanced!(t::ParallelTree, cell_ids) return refined end -# Wrap single-cell refinements such that `sort(...)` does not complain -refine_unbalanced!(t::ParallelTree, cell_id::Int) = refine_unbalanced!(t, [cell_id]) - - -# Coarsen entire tree by one level -function coarsen!(t::ParallelTree) - # Special case: if there is only one cell (root), there is nothing to do - if length(t) == 1 - return Int[] - end - - # Get list of unique parent ids for all leaf cells - parent_ids = unique(t.parent_ids[leaf_cells(t)]) - coarsen!(t, parent_ids) -end - - -# Coarsen given *parent* cells (= these cells must have children who are all -# leaf cells) while retaining a balanced tree. -# -# A cell to be coarsened might cause an unbalanced tree if the neighboring cell -# was already refined. Since it is generally not desired that cells are -# coarsened without specifically asking for it, these cells will then *not* be -# coarsened. -function coarsen!(t::ParallelTree, cell_ids::AbstractArray{Int}) - # Return early if array is empty - if length(cell_ids) == 0 - return Int[] - end - - # Reset original cell ids such that each cell knows its current id - reset_original_cell_ids!(t) - - # To maximize the number of cells that may be coarsened, start with the cells at the highest level - sorted_by_level = sort(cell_ids, by = i -> t.levels[i]) - - # Keep track of number of cells that were actually coarsened - n_coarsened = 0 - - # Local function to adjust cell ids after some cells have been removed - function adjust_cell_ids!(cell_ids, coarsened_cell_id, count) - for (id, cell_id) in enumerate(cell_ids) - if cell_id > coarsened_cell_id - cell_ids[id] = cell_id - count - end - end - end - - # Iterate backwards over cells to coarsen - while true - # Retrieve next cell or quit - if length(sorted_by_level) > 0 - coarse_cell_id = pop!(sorted_by_level) - else - break - end - - # Ensure that cell has children (violation is an error) - if !has_children(t, coarse_cell_id) - error("cell is leaf and cannot be coarsened to: $coarse_cell_id") - end - - # Ensure that all child cells are leaf cells (violation is an error) - for child in 1:n_children_per_cell(t) - if has_child(t, coarse_cell_id, child) - if !is_leaf(t, t.child_ids[child, coarse_cell_id]) - error("cell $coarse_cell_id has child cell at position $child that is not a leaf cell") - end - end - end - - # Check if coarse cell has refined neighbors that would prevent coarsening - skip = false - # Iterate over all children (which are to be removed) - for child in 1:n_children_per_cell(t) - # Continue if child does not exist - if !has_child(t, coarse_cell_id, child) - continue - end - child_id = t.child_ids[child, coarse_cell_id] - - # Go over all neighbors of child cell. If it has a neighbor that is *not* - # a sibling and that is not a leaf cell, we cannot coarsen its parent - # without creating an unbalanced tree. - for direction in 1:n_directions(t) - # Continue if neighbor would be a sibling - if has_sibling(child, direction) - continue - end - - # Continue if child cell has no neighbor in that direction - if !has_neighbor(t, child_id, direction) - continue - end - neighbor_id = t.neighbor_ids[direction, child_id] - - if !has_children(t, neighbor_id) - continue - end - - # If neighbor is not a sibling, is existing, and has children, do not coarsen - skip = true - break - end - end - # Skip if a neighboring cell prevents coarsening - if skip - continue - end - - # Flip sign of cell to be coarsened to such that we can easily find it - t.original_cell_ids[coarse_cell_id] = -t.original_cell_ids[coarse_cell_id] - - # If a coarse cell has children that are all leaf cells, they must follow - # immediately due to depth-first ordering of the tree - count = n_children(t, coarse_cell_id) - @assert count == n_children_per_cell(t) "cell $coarse_cell_id does not have all child cells" - remove_shift!(t, coarse_cell_id + 1, coarse_cell_id + count) - - # Take into account shifts in tree that alters cell ids - adjust_cell_ids!(sorted_by_level, coarse_cell_id, count) - - # Keep track of number of coarsened cells - n_coarsened += 1 - end - - # Determine list of *original* cell ids that were coarsened to - # Note: original_cell_ids contains the cell_id *before* coarsening. At - # coarsening, the coarsened parent cell's original_cell_ids value has its sign flipped - # to easily find it now. - @views coarsened_original_cells = ( - -t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0]) - - # Check if count of coarsened cells matches information in original_cell_ids - @assert n_coarsened == length(coarsened_original_cells) ( - "Mismatch in number of coarsened cells") - - return coarsened_original_cells -end - -# Wrap single-cell coarsening such that `sort(...)` does not complain -coarsen!(t::ParallelTree, cell_id::Int) = coarsen!(t::ParallelTree, [cell_id]) - - -# Coarsen all viable parent cells with coordinates in a given rectangular box -function coarsen_box!(t::ParallelTree{NDIMS}, coordinates_min::AbstractArray{Float64}, - coordinates_max::AbstractArray{Float64}) where NDIMS - for dim in 1:NDIMS - @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum." - end - - # Find all leaf cells within box - leaves = filter_leaf_cells(t) do cell_id - return (all(coordinates_min .< t.coordinates[:, cell_id]) && - all(coordinates_max .> t.coordinates[:, cell_id])) - end - - # Get list of unique parent ids for all leaf cells - parent_ids = unique(t.parent_ids[leaves]) - - # Filter parent ids to be within box - parents = filter(parent_ids) do cell_id - return (all(coordinates_min .< t.coordinates[:, cell_id]) && - all(coordinates_max .> t.coordinates[:, cell_id])) - end - - # Coarsen cells - coarsen!(t, parents) -end - -# Convenience method for 1D -function coarsen_box!(t::ParallelTree{1}, coordinates_min::Real, coordinates_max::Real) - return coarsen_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)]) -end - - -# Return coordinates of a child cell based on its relative position to the parent. -function child_coordinates(::ParallelTree{NDIMS}, parent_coordinates, parent_length::Number, child::Int) where NDIMS - # Calculate length of child cells and set up data structure - child_length = parent_length / 2 - coordinates = MVector{NDIMS, Float64}(undef) - - # For each dimension, calculate coordinate as parent coordinate + relative position x length/2 - for d in 1:NDIMS - coordinates[d] = parent_coordinates[d] + child_sign(child, d) * child_length / 2 - end - - return coordinates -end - # Reset range of cells to values that are prone to cause errors as soon as they are used. # @@ -693,114 +254,6 @@ function invalidate!(t::ParallelTree, first::Int, last::Int) return nothing end -invalidate!(t::ParallelTree, id::Int) = invalidate!(t, id, id) -invalidate!(t::ParallelTree) = invalidate!(t, 1, length(t)) - - -# Delete connectivity with parents/children/neighbors before cells are erased -function delete_connectivity!(t::ParallelTree, first::Int, last::Int) - @assert first > 0 - @assert first <= last - @assert last <= t.capacity + 1 - - # Iterate over all cells - for cell_id in first:last - # Delete connectivity from parent cell - if has_parent(t, cell_id) - parent_id = t.parent_ids[cell_id] - for child in 1:n_children_per_cell(t) - if t.child_ids[child, parent_id] == cell_id - t.child_ids[child, parent_id] = 0 - break - end - end - end - - # Delete connectivity from child cells - for child in 1:n_children_per_cell(t) - if has_child(t, cell_id, child) - t.parent_ids[t._child_ids[child, cell_id]] = 0 - end - end - - # Delete connectivity from neighboring cells - for direction in 1:n_directions(t) - if has_neighbor(t, cell_id, direction) - t.neighbor_ids[opposite_direction(direction), t.neighbor_ids[direction, cell_id]] = 0 - end - end - end -end - - -# Move connectivity with parents/children/neighbors after cells have been moved -function move_connectivity!(t::ParallelTree, first::Int, last::Int, destination::Int) - @assert first > 0 - @assert first <= last - @assert last <= t.capacity + 1 - @assert destination > 0 - @assert destination <= t.capacity + 1 - - # Strategy - # 1) Loop over moved cells (at target location) - # 2) Check if parent/children/neighbors connections are to a cell that was moved - # a) if cell was moved: apply offset to current cell - # b) if cell was not moved: go to connected cell and update connectivity there - - offset = destination - first - has_moved(n) = (first <= n <= last) - - for source in first:last - target = source + offset - - # Update parent - if has_parent(t, target) - # Get parent cell - parent_id = t.parent_ids[target] - if has_moved(parent_id) - # If parent itself was moved, just update parent id accordingly - t.parent_ids[target] += offset - else - # If parent was not moved, update its corresponding child id - for child in 1:n_children_per_cell(t) - if t.child_ids[child, parent_id] == source - t.child_ids[child, parent_id] = target - end - end - end - end - - # Update children - for child in 1:n_children_per_cell(t) - if has_child(t, target, child) - # Get child cell - child_id = t.child_ids[child, target] - if has_moved(child_id) - # If child itself was moved, just update child id accordingly - t.child_ids[child, target] += offset - else - # If child was not moved, update its parent id - t.parent_ids[child_id] = target - end - end - end - - # Update neighbors - for direction in 1:n_directions(t) - if has_neighbor(t, target, direction) - # Get neighbor cell - neighbor_id = t.neighbor_ids[direction, target] - if has_moved(neighbor_id) - # If neighbor itself was moved, just update neighbor id accordingly - t.neighbor_ids[direction, target] += offset - else - # If neighbor was not moved, update its opposing neighbor id - t.neighbor_ids[opposite_direction(direction), neighbor_id] = target - end - end - end - end -end # Raw copy operation for ranges of cells. diff --git a/src/mesh/serial_tree.jl b/src/mesh/serial_tree.jl new file mode 100644 index 00000000000..8bc697d6b5e --- /dev/null +++ b/src/mesh/serial_tree.jl @@ -0,0 +1,265 @@ + +# Composite type that represents a NDIMS-dimensional tree (serial version). +# +# Implements everything required for AbstractContainer. +# +# Note: The way the data structures are set up and the way most algorithms +# work, it is *always* assumed that +# a) we have a balanced tree (= at most one level difference between +# neighboring cells, or 2:1 rule) +# b) we may not have all children (= some children may not exist) +# c) the tree is stored depth-first +# +# However, the way the refinement/coarsening algorithms are currently +# implemented, we only have fully refined cells. That is, a cell either has 2^NDIMS children or +# no children at all (= leaf cell). This restriction is also assumed at +# multiple positions in the refinement/coarsening algorithms. +# +# An exception to the 2:1 rule exists for the low-level `refine_unbalanced!` +# function, which is required for implementing level-wise refinement in a sane +# way. Also, depth-first ordering *might* not by guaranteed during +# refinement/coarsening operations. +mutable struct SerialTree{NDIMS} <: AbstractTree{NDIMS} + parent_ids::Vector{Int} + child_ids::Matrix{Int} + neighbor_ids::Matrix{Int} + levels::Vector{Int} + coordinates::Matrix{Float64} + original_cell_ids::Vector{Int} + + capacity::Int + length::Int + dummy::Int + + center_level_0::SVector{NDIMS, Float64} + length_level_0::Float64 + periodicity::NTuple{NDIMS, Bool} + + function SerialTree{NDIMS}(capacity::Integer) where NDIMS + # Verify that NDIMS is an integer + @assert NDIMS isa Integer + + # Create instance + t = new() + + # Initialize fields with defaults + # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations + t.parent_ids = fill(typemin(Int), capacity + 1) + t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1) + t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1) + t.levels = fill(typemin(Int), capacity + 1) + t.coordinates = fill(NaN, NDIMS, capacity + 1) + t.original_cell_ids = fill(typemin(Int), capacity + 1) + + t.capacity = capacity + t.length = 0 + t.dummy = capacity + 1 + + t.center_level_0 = @SVector fill(NaN, NDIMS) + t.length_level_0 = NaN + + return t + end +end + + +# Constructor for passing the dimension as an argument +SerialTree(::Val{NDIMS}, args...) where NDIMS = SerialTree{NDIMS}(args...) + +# Create and initialize tree +function SerialTree{NDIMS}(capacity::Int, center::AbstractArray{Float64}, + length::Real, periodicity=true) where NDIMS + # Create instance + t = SerialTree{NDIMS}(capacity) + + # Initialize root cell + init!(t, center, length, periodicity) + + return t +end + +# Constructor accepting a single number as center (as opposed to an array) for 1D +SerialTree{1}(cap::Int, center::Real, len::Real, periodicity=true) = SerialTree{1}(cap, [convert(Float64, center)], len, periodicity) + + +# Clear tree with deleting data structures, store center and length, and create root cell +function init!(t::SerialTree, center::AbstractArray{Float64}, length::Real, periodicity=true) + clear!(t) + + # Set domain information + t.center_level_0 = center + t.length_level_0 = length + + # Create root cell + t.length += 1 + t.parent_ids[1] = 0 + t.child_ids[:, 1] .= 0 + t.levels[1] = 0 + t.coordinates[:, 1] .= t.center_level_0 + t.original_cell_ids[1] = 0 + + # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor + if all(periodicity) + # Also catches case where periodicity = true + t.neighbor_ids[:, 1] .= 1 + t.periodicity = ntuple(x->true, ndims(t)) + elseif !any(periodicity) + # Also catches case where periodicity = false + t.neighbor_ids[:, 1] .= 0 + t.periodicity = ntuple(x->false, ndims(t)) + else + # Default case if periodicity is an iterable + for dimension in 1:ndims(t) + if periodicity[dimension] + t.neighbor_ids[2 * dimension - 1, 1] = 1 + t.neighbor_ids[2 * dimension - 0, 1] = 1 + else + t.neighbor_ids[2 * dimension - 1, 1] = 0 + t.neighbor_ids[2 * dimension - 0, 1] = 0 + end + end + + t.periodicity = Tuple(periodicity) + end +end + + +# Convenience output for debugging +function Base.show(io::IO, t::SerialTree{NDIMS}) where NDIMS + l = t.length + println(io, '*'^20) + println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") + println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))") + println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))") + println(io, "t.levels[1:l] = $(t.levels[1:l])") + println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))") + println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])") + println(io, "t.capacity = $(t.capacity)") + println(io, "t.length = $(t.length)") + println(io, "t.dummy = $(t.dummy)") + println(io, "t.center_level_0 = $(t.center_level_0)") + println(io, "t.length_level_0 = $(t.length_level_0)") + println(io, '*'^20) +end + + +# Refine given cells without rebalancing tree. +# +# Note: After a call to this method the tree may be unbalanced! +function refine_unbalanced!(t::SerialTree, cell_ids) + # Store actual ids refined cells (shifted due to previous insertions) + refined = zeros(Int, length(cell_ids)) + + # Loop over all cells that are to be refined + for (count, original_cell_id) in enumerate(sort(unique(cell_ids))) + # Determine actual cell id, taking into account previously inserted cells + n_children = n_children_per_cell(t) + cell_id = original_cell_id + (count - 1) * n_children + refined[count] = cell_id + + @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined" + + # Insert new cells directly behind parent (depth-first) + insert!(t, cell_id + 1, n_children) + + # Flip sign of refined cell such that we can easily find it later + t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id] + + # Initialize child cells + for child in 1:n_children + # Set child information based on parent + child_id = cell_id + child + t.parent_ids[child_id] = cell_id + t.child_ids[child, cell_id] = child_id + t.neighbor_ids[:, child_id] .= 0 + t.child_ids[:, child_id] .= 0 + t.levels[child_id] = t.levels[cell_id] + 1 + t.coordinates[:, child_id] .= child_coordinates( + t, t.coordinates[:, cell_id], length_at_cell(t, cell_id), child) + t.original_cell_ids[child_id] = 0 + + # For determining neighbors, use neighbor connections of parent cell + for direction in 1:n_directions(t) + # If neighbor is a sibling, establish one-sided connectivity + # Note: two-sided is not necessary, as each sibling will do this + if has_sibling(child, direction) + adjacent = adjacent_child(child, direction) + neighbor_id = cell_id + adjacent + + t.neighbor_ids[direction, child_id] = neighbor_id + continue + end + + # Skip if original cell does have no neighbor in direction + if !has_neighbor(t, cell_id, direction) + continue + end + + # Otherwise, check if neighbor has children - if not, skip again + neighbor_id = t.neighbor_ids[direction, cell_id] + if !has_children(t, neighbor_id) + continue + end + + # Check if neighbor has corresponding child and if yes, establish connectivity + adjacent = adjacent_child(child, direction) + if has_child(t, neighbor_id, adjacent) + neighbor_child_id = t.child_ids[adjacent, neighbor_id] + opposite = opposite_direction(direction) + + t.neighbor_ids[direction, child_id] = neighbor_child_id + t.neighbor_ids[opposite, neighbor_child_id] = child_id + end + end + end + end + + return refined +end + + +# Reset range of cells to values that are prone to cause errors as soon as they are used. +# +# Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible. +function invalidate!(t::SerialTree, first::Int, last::Int) + @assert first > 0 + @assert last <= t.capacity + 1 + + # Integer values are set to smallest negative value, floating point values to NaN + t.parent_ids[first:last] .= typemin(Int) + t.child_ids[:, first:last] .= typemin(Int) + t.neighbor_ids[:, first:last] .= typemin(Int) + t.levels[first:last] .= typemin(Int) + t.coordinates[:, first:last] .= NaN + t.original_cell_ids[first:last] .= typemin(Int) + + return nothing +end + + +# Raw copy operation for ranges of cells. +# +# This method is used by the higher-level copy operations for AbstractContainer +function raw_copy!(target::SerialTree, source::SerialTree, first::Int, last::Int, destination::Int) + copy_data!(target.parent_ids, source.parent_ids, first, last, destination) + copy_data!(target.child_ids, source.child_ids, first, last, destination, + n_children_per_cell(target)) + copy_data!(target.neighbor_ids, source.neighbor_ids, first, last, + destination, n_directions(target)) + copy_data!(target.levels, source.levels, first, last, destination) + copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target)) + copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination) +end + + +# Reset data structures by recreating all internal storage containers and invalidating all elements +function reset_data_structures!(t::SerialTree{NDIMS}) where NDIMS + t.parent_ids = Vector{Int}(undef, t.capacity + 1) + t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1) + t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1) + t.levels = Vector{Int}(undef, t.capacity + 1) + t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1) + t.original_cell_ids = Vector{Int}(undef, t.capacity + 1) + + invalidate!(t, 1, capacity(t) + 1) +end diff --git a/src/solvers/dg/dg.jl b/src/solvers/dg/dg.jl index 4e3d086c257..c916ebf08ee 100644 --- a/src/solvers/dg/dg.jl +++ b/src/solvers/dg/dg.jl @@ -20,7 +20,7 @@ abstract type AbstractDg{NDIMS, POLYDEG, MeshType} <: AbstractSolver{NDIMS} end @inline ndofs(dg::AbstractDg) = dg.n_elements * nnodes(dg)^ndims(dg) @inline uses_mpi(::AbstractDg{NDIMS, POLYDEG, TreeMesh{ParallelTree{NDIMS}}}) where {NDIMS, POLYDEG}= Val(true) -@inline uses_mpi(::AbstractDg{NDIMS, POLYDEG, TreeMesh{Tree{NDIMS}}}) where {NDIMS, POLYDEG} = Val(false) +@inline uses_mpi(::AbstractDg{NDIMS, POLYDEG, TreeMesh{SerialTree{NDIMS}}}) where {NDIMS, POLYDEG} = Val(false) """ get_node_coords(x, dg::AbstractDg, indices...) From 786cc74c5120a0ab82df77d5ac3c12767cdbb4fb Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 08:49:21 +0200 Subject: [PATCH 52/81] Refactor get_restart_mesh_filename --- src/mesh/mesh.jl | 5 +++-- src/mesh/parallel.jl | 36 +++++++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index 6694bf343f8..261c74fbb5c 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -130,7 +130,7 @@ function load_mesh(restart_filename, mpi_parallel::Val{false}) @timeit timer() "creation" mesh = TreeMesh(SerialTree{ndims_}, n_cells_max) # Determine mesh filename - filename = get_restart_mesh_filename(restart_filename) + filename = get_restart_mesh_filename(restart_filename, Val(false)) mesh.current_filename = filename mesh.unsaved_changes = false @@ -158,7 +158,8 @@ end # Obtain the mesh filename from a restart file -function get_restart_mesh_filename(restart_filename) +get_restart_mesh_filename(restart_filename) = get_restart_mesh_filename(restart_filename, mpi_parallel()) +function get_restart_mesh_filename(restart_filename, mpi_parallel::Val{false}) # Get directory name dirname, _ = splitdir(restart_filename) diff --git a/src/mesh/parallel.jl b/src/mesh/parallel.jl index 14671ee8f7a..a310350f748 100644 --- a/src/mesh/parallel.jl +++ b/src/mesh/parallel.jl @@ -46,17 +46,7 @@ function load_mesh(restart_filename, mpi_parallel::Val{true}) @timeit timer() "creation" mesh = TreeMesh(ParallelTree{ndims_}, n_cells_max) # Determine mesh filename - if is_mpi_root() - filename = get_restart_mesh_filename(restart_filename) - buffer = Vector{UInt8}(filename) - MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm()) - MPI.Bcast!(buffer, mpi_root(), mpi_comm()) - else # non-root ranks - count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm()) - buffer = Vector{UInt8}(undef, count[]) - MPI.Bcast!(buffer, mpi_root(), mpi_comm()) - filename = String(buffer) - end + filename = get_restart_mesh_filename(restart_filename, Val(true)) mesh.current_filename = filename mesh.unsaved_changes = false @@ -112,3 +102,27 @@ function load_mesh(restart_filename, mpi_parallel::Val{true}) return mesh end +function get_restart_mesh_filename(restart_filename, mpi_parallel::Val{true}) + # Get directory name + dirname, _ = splitdir(restart_filename) + + if is_mpi_root() + # Read mesh filename from restart file + mesh_file = "" + h5open(restart_filename, "r") do file + mesh_file = read(attrs(file)["mesh_file"]) + end + + buffer = Vector{UInt8}(mesh_file) + MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm()) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + else # non-root ranks + count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm()) + buffer = Vector{UInt8}(undef, count[]) + MPI.Bcast!(buffer, mpi_root(), mpi_comm()) + mesh_file = String(buffer) + end + + # Construct and return filename + return joinpath(dirname, mesh_file) +end From 1fa802faaab8f26ec5d6c24bf3b3dbf5de8273ca Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 08:52:39 +0200 Subject: [PATCH 53/81] Add MIME"text/plain" to multi-line `Base.show` methods --- src/mesh/parallel_tree.jl | 2 +- src/mesh/serial_tree.jl | 2 +- src/solvers/dg/2d/containers.jl | 2 +- src/solvers/dg/3d/containers.jl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesh/parallel_tree.jl b/src/mesh/parallel_tree.jl index 62e46c5f550..a9c6d71b624 100644 --- a/src/mesh/parallel_tree.jl +++ b/src/mesh/parallel_tree.jl @@ -128,7 +128,7 @@ end # Convenience output for debugging -function Base.show(io::IO, t::ParallelTree{NDIMS}) where NDIMS +function Base.show(io::IO, ::MIME"text/plain", t::ParallelTree{NDIMS}) where NDIMS l = t.length println(io, '*'^20) println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") diff --git a/src/mesh/serial_tree.jl b/src/mesh/serial_tree.jl index 8bc697d6b5e..7f3296fe32a 100644 --- a/src/mesh/serial_tree.jl +++ b/src/mesh/serial_tree.jl @@ -125,7 +125,7 @@ end # Convenience output for debugging -function Base.show(io::IO, t::SerialTree{NDIMS}) where NDIMS +function Base.show(io::IO, ::MIME"text/plain", t::SerialTree{NDIMS}) where NDIMS l = t.length println(io, '*'^20) println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])") diff --git a/src/solvers/dg/2d/containers.jl b/src/solvers/dg/2d/containers.jl index 95d83de3a61..9444c3ddbb8 100644 --- a/src/solvers/dg/2d/containers.jl +++ b/src/solvers/dg/2d/containers.jl @@ -161,7 +161,7 @@ nmortars(l2mortars::L2MortarContainer2D) = length(l2mortars.orientations) # Allow printing container contents -function Base.show(io::IO, c::L2MortarContainer2D{NVARS, POLYDEG}) where {NVARS, POLYDEG} +function Base.show(io::IO, ::MIME"text/plain", c::L2MortarContainer2D{NVARS, POLYDEG}) where {NVARS, POLYDEG} println(io, '*'^20) for idx in CartesianIndices(c.u_upper) println(io, "c.u_upper[$idx] = $(c.u_upper[idx])") diff --git a/src/solvers/dg/3d/containers.jl b/src/solvers/dg/3d/containers.jl index 6b1f83eb101..c70473a45e0 100644 --- a/src/solvers/dg/3d/containers.jl +++ b/src/solvers/dg/3d/containers.jl @@ -151,7 +151,7 @@ nmortars(l2mortars::L2MortarContainer3D) = length(l2mortars.orientations) # Allow printing container contents -function Base.show(io::IO, c::L2MortarContainer3D{NVARS, POLYDEG}) where {NVARS, POLYDEG} +function Base.show(io::IO, ::MIME"text/plain", c::L2MortarContainer3D{NVARS, POLYDEG}) where {NVARS, POLYDEG} println(io, '*'^20) for idx in CartesianIndices(c.u_upper_left) println(io, "c.u_upper_left[$idx] = $(c.u_upper_left[idx])") From 223b08132fafefdf4dd30efea6c58eb0b3ecb79b Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 08:54:38 +0200 Subject: [PATCH 54/81] Avoid constructing another `Val(false)` Co-authored-by: Hendrik Ranocha --- src/solvers/dg/2d/dg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index ee3a5dac5fe..4303ce8542e 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -857,7 +857,7 @@ function integrate(func, u, dg::Dg2D, uses_mpi::Val{false}; normalize=true) u_local = get_node_vars(u, dg, i, j, element_id) return func(u_local) end - return integrate(func_wrapped, dg, Val(false), u; normalize=normalize) + return integrate(func_wrapped, dg, uses_mpi, u; normalize=normalize) end integrate(u, dg::Dg2D; normalize=true) = integrate(identity, u, dg; normalize=normalize) From 3c392ca10e632dcffa0a31b0a9d76d74c95abb5f Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 08:57:15 +0200 Subject: [PATCH 55/81] Further improve potential for overlapping communication & computation --- src/solvers/dg/2d/parallel.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index 7ca3a1080ce..eaabae820e5 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -3,15 +3,15 @@ function rhs!(dg::Dg2D, t_stage, uses_mpi::Val{true}) # Start to receive MPI data @timeit timer() "start MPI receive" start_mpi_receive!(dg) - # Reset u_t - @timeit timer() "reset ∂u/∂t" dg.elements.u_t .= 0 - # Prolong solution to MPI interfaces @timeit timer() "prolong2mpiinterfaces" prolong2mpiinterfaces!(dg) # Start to send MPI data @timeit timer() "start MPI send" start_mpi_send!(dg) + # Reset u_t + @timeit timer() "reset ∂u/∂t" dg.elements.u_t .= 0 + # Calculate volume integral @timeit timer() "volume integral" calc_volume_integral!(dg) From d635cbd2c279e069a332e422ba65fa31c0f77512 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 08:57:57 +0200 Subject: [PATCH 56/81] Update docs/src/parallelization.md Co-authored-by: Hendrik Ranocha --- docs/src/parallelization.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/src/parallelization.md b/docs/src/parallelization.md index 64843146b8b..3909cd2c6b7 100644 --- a/docs/src/parallelization.md +++ b/docs/src/parallelization.md @@ -61,7 +61,7 @@ To start Trixi in parallel with MPI, there are three options: julia> MPI.install_mpiexecjl(destdir="/somewhere/in/your/PATH") ``` - Then, to execute a Trixi in parallel, execute the following command from your + Then, to execute Trixi in parallel, execute the following command from your command line: ```bash mpiexecjl -n 3 julia --project=. -e 'using Trixi; Trixi.run("examples/2d/parameters.toml")' @@ -96,4 +96,3 @@ To start Trixi in parallel with MPI, there are three options: [available](https://github.com/tmux/tmux/wiki/Getting-Started) and once you get the hang of it, developing Trixi in parallel becomes much smoother this way. - From b84fe79099bc6d0ffc3b62a3697be341a4481a3c Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Mon, 28 Sep 2020 16:21:54 +0200 Subject: [PATCH 57/81] Remove unused and non-canonical overload --- src/mesh/abstract_tree.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesh/abstract_tree.jl b/src/mesh/abstract_tree.jl index acb3516ca41..91ee3e9b920 100644 --- a/src/mesh/abstract_tree.jl +++ b/src/mesh/abstract_tree.jl @@ -1,8 +1,7 @@ abstract type AbstractTree{NDIMS} <: AbstractContainer end # Type traits to obtain dimension -@inline Base.ndims(::Type{AbstractTree{NDIMS}}) where NDIMS = NDIMS -@inline Base.ndims(t::AbstractTree{NDIMS}) where NDIMS = NDIMS +@inline Base.ndims(::AbstractTree{NDIMS}) where NDIMS = NDIMS # Auxiliary methods to allow semantic queries on the tree From b1f1057b231cff4c37af0817b511fdf6bf07e668 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Thu, 1 Oct 2020 06:37:41 +0200 Subject: [PATCH 58/81] Add 1D tests to Travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 49a3d6d038b..960b5db1f4a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,6 +32,7 @@ env: global: - COVERALLS_PARALLEL=true jobs: + - TRIXI_TEST=1D - TRIXI_TEST=2D - TRIXI_TEST=3D - TRIXI_TEST=misc From 33dc1b089c164d4a01bb0afb972f138148698a18 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 2 Oct 2020 22:57:47 +0200 Subject: [PATCH 59/81] Adapt 1D solver to new mesh type infrastructur --- src/solvers/dg/1d/amr.jl | 2 ++ src/solvers/dg/1d/dg.jl | 67 ++++++++++++++++++++++++++++------------ src/solvers/dg/3d/dg.jl | 2 +- 3 files changed, 50 insertions(+), 21 deletions(-) diff --git a/src/solvers/dg/1d/amr.jl b/src/solvers/dg/1d/amr.jl index e2ad5385c9e..86ae261c8ef 100644 --- a/src/solvers/dg/1d/amr.jl +++ b/src/solvers/dg/1d/amr.jl @@ -60,6 +60,7 @@ function refine!(dg::Dg1D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, # Update DG instance with new data dg.elements = elements dg.n_elements = n_elements + dg.n_elements_global = n_elements dg.interfaces = interfaces dg.n_interfaces = n_interfaces dg.boundaries = boundaries @@ -166,6 +167,7 @@ function coarsen!(dg::Dg1D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, # Update DG instance with new data dg.elements = elements dg.n_elements = n_elements + dg.n_elements_global = n_elements dg.interfaces = interfaces dg.n_interfaces = n_interfaces dg.boundaries = boundaries diff --git a/src/solvers/dg/1d/dg.jl b/src/solvers/dg/1d/dg.jl index da3b13a0d31..626338a03dd 100644 --- a/src/solvers/dg/1d/dg.jl +++ b/src/solvers/dg/1d/dg.jl @@ -1,10 +1,10 @@ # Main DG data structure that contains all relevant data for the DG solver -mutable struct Dg1D{Eqn<:AbstractEquation, NVARS, POLYDEG, +mutable struct Dg1D{Eqn<:AbstractEquation, MeshType, NVARS, POLYDEG, SurfaceFlux, VolumeFlux, InitialConditions, SourceTerms, BoundaryConditions, VolumeIntegralType, ShockIndicatorVariable, VectorNnodes, MatrixNnodes, MatrixNnodes2, InverseVandermondeLegendre, MortarMatrix, - VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{1, POLYDEG} + VectorAnalysisNnodes, AnalysisVandermonde} <: AbstractDg{1, POLYDEG, MeshType} equations::Eqn surface_flux_function::SurfaceFlux @@ -62,6 +62,8 @@ mutable struct Dg1D{Eqn<:AbstractEquation, NVARS, POLYDEG, amr_alpha_min::Float64 amr_alpha_smooth::Bool + n_elements_global::Int + element_variables::Dict{Symbol, Union{Vector{Float64}, Vector{Int}}} cache::Dict{Symbol, Any} thread_cache::Any # to make fully-typed output more readable @@ -70,7 +72,7 @@ end # Convenience constructor to create DG solver instance -function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh{NDIMS}, POLYDEG) where {NDIMS, NVARS} +function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh1D, POLYDEG) where {NDIMS, NVARS} # Get cells for which an element needs to be created (i.e., all leaf cells) leaf_cell_ids = leaf_cells(mesh.tree) @@ -155,6 +157,9 @@ function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v amr_indicator = Symbol(parameter("amr_indicator", "n/a", valid=["n/a", "gauss", "blast_wave"])) + # Set global number of elements + n_elements_global = n_elements + # Initialize storage for element variables element_variables = Dict{Symbol, Union{Vector{Float64}, Vector{Int}}}() @@ -186,8 +191,29 @@ function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v # Store initial state integrals for conservation error calculation initial_state_integrals = Vector{Float64}() + # Convert all performance-critical fields to StaticArrays types + nodes = SVector{POLYDEG+1}(nodes) + weights = SVector{POLYDEG+1}(weights) + inverse_weights = SVector{POLYDEG+1}(inverse_weights) + lhat = SMatrix{POLYDEG+1,2}(lhat) + dhat = SMatrix{POLYDEG+1,POLYDEG+1}(dhat) + dsplit = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit) + dsplit_transposed = SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed) + amr_refine_right = SMatrix{POLYDEG+1,POLYDEG+1}(amr_refine_right) + amr_refine_left = SMatrix{POLYDEG+1,POLYDEG+1}(amr_refine_left) + amr_coarsen_right = SMatrix{POLYDEG+1,POLYDEG+1}(amr_coarsen_right) + amr_coarsen_left = SMatrix{POLYDEG+1,POLYDEG+1}(amr_coarsen_left) + analysis_nodes = SVector{analysis_polydeg+1}(analysis_nodes) + analysis_weights = SVector{analysis_polydeg+1}(analysis_weights) + analysis_weights_volume = SVector{analysis_polydeg+1}(analysis_weights_volume) + # Create actual DG solver instance - dg = Dg1D( + dg = Dg1D{typeof(equation), typeof(mesh), NVARS, POLYDEG, + typeof(surface_flux_function), typeof(volume_flux_function), typeof(initial_conditions), + typeof(source_terms), typeof(boundary_conditions), + typeof(volume_integral_type), typeof(shock_indicator_variable), + typeof(nodes), typeof(dhat), typeof(lhat), typeof(inverse_vandermonde_legendre), + typeof(amr_refine_right), typeof(analysis_nodes), typeof(analysis_vandermonde)}( equation, surface_flux_function, volume_flux_function, initial_conditions, source_terms, @@ -195,18 +221,19 @@ function Dg1D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v interfaces, n_interfaces, boundaries, n_boundaries, n_boundaries_per_direction, n_l2mortars, - Tuple(boundary_conditions), - SVector{POLYDEG+1}(nodes), SVector{POLYDEG+1}(weights), SVector{POLYDEG+1}(inverse_weights), - inverse_vandermonde_legendre, SMatrix{POLYDEG+1,2}(lhat), + boundary_conditions, + nodes, weights, inverse_weights, + inverse_vandermonde_legendre, lhat, volume_integral_type, - SMatrix{POLYDEG+1,POLYDEG+1}(dhat), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit), SMatrix{POLYDEG+1,POLYDEG+1}(dsplit_transposed), - SMatrix{POLYDEG+1,POLYDEG+1}(amr_refine_right), SMatrix{POLYDEG+1,POLYDEG+1}(amr_refine_left), - SMatrix{POLYDEG+1,POLYDEG+1}(amr_coarsen_right), SMatrix{POLYDEG+1,POLYDEG+1}(amr_coarsen_left), - SVector{analysis_polydeg+1}(analysis_nodes), SVector{analysis_polydeg+1}(analysis_weights), SVector{analysis_polydeg+1}(analysis_weights_volume), + dhat, dsplit, dsplit_transposed, + amr_refine_right, amr_refine_left, + amr_coarsen_right, amr_coarsen_left, + analysis_nodes, analysis_weights, analysis_weights_volume, analysis_vandermonde, analysis_total_volume, analysis_quantities, save_analysis, analysis_filename, shock_indicator_variable, shock_alpha_max, shock_alpha_min, shock_alpha_smooth, amr_indicator, amr_alpha_max, amr_alpha_min, amr_alpha_smooth, + n_elements_global, element_variables, cache, thread_cache, initial_state_integrals) @@ -236,7 +263,7 @@ end # Count the number of interfaces that need to be created -function count_required_interfaces(mesh::TreeMesh{1}, cell_ids) +function count_required_interfaces(mesh::TreeMesh1D, cell_ids) count = 0 # Iterate over all cells @@ -261,7 +288,7 @@ end # Count the number of boundaries that need to be created -function count_required_boundaries(mesh::TreeMesh{1}, cell_ids) +function count_required_boundaries(mesh::TreeMesh1D, cell_ids) count = 0 # Iterate over all cells @@ -290,7 +317,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_elements(cell_ids, mesh::TreeMesh{1}, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} +function init_elements(cell_ids, mesh::TreeMesh1D, ::Val{NVARS}, ::Val{POLYDEG}) where {NVARS, POLYDEG} # Initialize container n_elements = length(cell_ids) elements = ElementContainer1D{NVARS, POLYDEG}(n_elements) @@ -328,7 +355,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_interfaces(cell_ids, mesh::TreeMesh{1}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_interfaces(cell_ids, mesh::TreeMesh1D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_interfaces = count_required_interfaces(mesh, cell_ids) interfaces = InterfaceContainer1D{NVARS, POLYDEG}(n_interfaces) @@ -344,7 +371,7 @@ end # # NVARS: number of variables # POLYDEG: polynomial degree -function init_boundaries(cell_ids, mesh::TreeMesh{1}, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} +function init_boundaries(cell_ids, mesh::TreeMesh1D, ::Val{NVARS}, ::Val{POLYDEG}, elements) where {NVARS, POLYDEG} # Initialize container n_boundaries = count_required_boundaries(mesh, cell_ids) boundaries = BoundaryContainer1D{NVARS, POLYDEG}(n_boundaries) @@ -357,7 +384,7 @@ end # Initialize connectivity between elements and interfaces -function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh{1}) +function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh1D) # Construct cell -> element mapping for easier algorithm implementation tree = mesh.tree c2e = zeros(Int, length(tree)) @@ -412,7 +439,7 @@ end # Initialize connectivity between elements and boundaries -function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{1}) +function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh1D) # Reset boundaries count count = 0 @@ -476,7 +503,7 @@ function init_boundary_connectivity!(elements, boundaries, mesh::TreeMesh{1}) return SVector(counts_per_direction) end -function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{1}) +function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh1D) # "eval is evil" # This is a temporary hack until we have switched to a library based approach # with pure Julia code instead of parameter files. @@ -505,7 +532,7 @@ function init_boundary_conditions(n_boundaries_per_direction, mesh::TreeMesh{1}) end end - return boundary_conditions + return Tuple(boundary_conditions) end diff --git a/src/solvers/dg/3d/dg.jl b/src/solvers/dg/3d/dg.jl index 45acf099630..ebd6b1c58e8 100644 --- a/src/solvers/dg/3d/dg.jl +++ b/src/solvers/dg/3d/dg.jl @@ -240,7 +240,7 @@ function Dg3D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v l2mortars, n_l2mortars, boundary_conditions, nodes, weights, inverse_weights, - inverse_vandermonde_legendre, SMatrix{POLYDEG+1,2}(lhat), + inverse_vandermonde_legendre, lhat, volume_integral_type, dhat, dsplit, dsplit_transposed, mortar_forward_upper, mortar_forward_lower, From 04537ba4264c989f1107d1179928a6a1ee6a978f Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 2 Oct 2020 23:24:24 +0200 Subject: [PATCH 60/81] Ensure correct return values (l2, linf) on all ranks --- src/run.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/run.jl b/src/run.jl index 9a89685b5a2..a8800e4b35f 100644 --- a/src/run.jl +++ b/src/run.jl @@ -432,6 +432,12 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function println() end + # Distribute l2_errors from root such that all ranks have correct return value + if is_parallel() + l2_error = convert(typeof(l2_error), MPI.Bcast!(collect(l2_error), mpi_root(), mpi_comm())) + linf_error = convert(typeof(linf_error), MPI.Bcast!(collect(linf_error), mpi_root(), mpi_comm())) + end + # Return error norms for EOC calculation return l2_error, linf_error, varnames_cons(solver.equations) end From 2b54b04c4802c6f0fc4f31ef9f71c7f2b6113116 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 2 Oct 2020 23:24:45 +0200 Subject: [PATCH 61/81] First attempt at MPI-parallel Trixi tests --- test/Project.toml | 1 + test/runtests.jl | 8 ++++ test/test_examples_parallel_2d.jl | 79 +++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) create mode 100644 test/test_examples_parallel_2d.jl diff --git a/test/Project.toml b/test/Project.toml index a2cd2f8d848..1a807f83808 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,3 +1,4 @@ [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index 9d9d4c3926e..be730000e40 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,8 +1,10 @@ using Test +using MPI: mpiexec # run tests on Travis CI in parallel const TRIXI_TEST = get(ENV, "TRIXI_TEST", "all") const ON_APPVEYOR = lowercase(get(ENV, "APPVEYOR", "false")) == "true" +const TRIXI_MPI_NPROCS = 3 @time @testset "Trixi.jl tests" begin @time if TRIXI_TEST == "all" || TRIXI_TEST == "1D" @@ -25,4 +27,10 @@ const ON_APPVEYOR = lowercase(get(ENV, "APPVEYOR", "false")) == "true" @time if (TRIXI_TEST == "all" && !ON_APPVEYOR) || TRIXI_TEST == "paper-self-gravitating-gas-dynamics" include("test_paper-self-gravitating-gas-dynamics.jl") end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "parallel_2d" + mpiexec() do cmd + run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) test_examples_parallel_2d.jl`) + end + end end diff --git a/test/test_examples_parallel_2d.jl b/test/test_examples_parallel_2d.jl new file mode 100644 index 00000000000..ccde292d517 --- /dev/null +++ b/test/test_examples_parallel_2d.jl @@ -0,0 +1,79 @@ +module TestExamplesParallel2D + +using Test +using Trixi + +include("test_trixi.jl") + +# Start with a clean environment: remove Trixi output directory if it exists +outdir = "out" +Trixi.is_mpi_root() && isdir(outdir) && rm(outdir, recursive=true) + +# pathof(Trixi) returns /path/to/Trixi/src/Trixi.jl, dirname gives the parent directory +const EXAMPLES_DIR = joinpath(pathof(Trixi) |> dirname |> dirname, "examples", "2d") + +# Run basic tests +@testset "Examples 2D" begin + @testset "parameters.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters.toml"), + l2 = [9.144681765639205e-6], + linf = [6.437440532547356e-5]) + end + @testset "parameters.toml with polydeg=1" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters.toml"), + l2 = [0.05264106093598111], + linf = [0.08754218386076518], + polydeg=1) + end + @testset "parameters_ec.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_ec.toml"), + l2 = [0.06159341742582756, 0.05012484425381723, 0.05013298724507752, 0.22537740506116724], + linf = [0.29912627861573327, 0.30886767304359375, 0.3088108573487326, 1.0657556075017878]) + end + @testset "parameters_density_wave.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_density_wave.toml"), + l2 = [0.001060077845747576, 0.00010600778457107525, 0.00021201556914875742, 2.6501946139091318e-5], + linf = [0.0065356386867677085, 0.0006535638688170142, 0.0013071277374487877, 0.0001633909674296774], + extra_analysis_quantities=["l2_error_primitive", "linf_error_primitive"], t_end=0.5) + end + @testset "parameters_ec_mhd.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_ec_mhd.toml"), + l2 = [0.03607862694368351, 0.04281395008247395, 0.04280207686965749, 0.025746770192645763, 0.1611518499414067, 0.017455917249117023, 0.017456981264942977, 0.02688321120361229, 0.00015024027267648003], + linf = [0.23502083666166018, 0.3156846367743936, 0.31227895161037256, 0.2118146956106238, 0.9743049414302711, 0.09050624115026618, 0.09131633488909774, 0.15693063355520998, 0.0038394720095667593]) + end + @testset "parameters_hyp_diff_harmonic_nonperiodic.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_hyp_diff_harmonic_nonperiodic.toml"), + l2 = [8.618132353932638e-8, 5.619399844708813e-7, 5.619399845476024e-7], + linf = [1.124861862326869e-6, 8.622436471483752e-6, 8.622436469707395e-6]) + end + @testset "parameters_hyp_diff_llf.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_hyp_diff_llf.toml"), + l2 = [0.00015687751088073104, 0.0010259867353397119, 0.0010259867353398994], + linf = [0.001198695640053704, 0.006423873515701395, 0.006423873515686296]) + end + @testset "parameters_hyp_diff_nonperiodic.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_hyp_diff_nonperiodic.toml"), + l2 = [8.523077654037775e-6, 2.877932365308637e-5, 5.454942769137812e-5], + linf = [5.484978959957587e-5, 0.00014544895979200218, 0.000324491268921534]) + end + @testset "parameters_hyp_diff_upwind.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_hyp_diff_upwind.toml"), + l2 = [5.868147556488962e-6, 3.8051792732628014e-5, 3.8051792732620214e-5], + linf = [3.70196549871471e-5, 0.0002072058411455302, 0.00020720584114464202]) + end + @testset "parameters_nonperiodic.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_nonperiodic.toml"), + l2 = [2.3652137675654753e-6, 2.1386731303685556e-6, 2.138673130413185e-6, 6.009920290578574e-6], + linf = [1.4080448659026246e-5, 1.7581818010814487e-5, 1.758181801525538e-5, 5.9568540361709665e-5]) + end + @testset "parameters_source_terms.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_source_terms.toml"), + l2 = [8.517783186497567e-7, 1.2350199409361865e-6, 1.2350199409828616e-6, 4.277884398786315e-6], + linf = [8.357934254688004e-6, 1.0326389653148027e-5, 1.0326389654924384e-5, 4.4961900057316484e-5]) + end +end + +# Clean up afterwards: delete Trixi output directory +Trixi.is_mpi_root() && @test_nowarn rm(outdir, recursive=true) + +end #module From d91a7ff1cba907b358684b9f11a93f7d7c2c3ff8 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 3 Oct 2020 16:52:31 +0200 Subject: [PATCH 62/81] Enable parallel 2D tests in Travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 4e2877ee528..3b4e1f02e63 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,6 +37,7 @@ env: - TRIXI_TEST=3D - TRIXI_TEST=misc - TRIXI_TEST=paper-self-gravitating-gas-dynamics + - TRIXI_TEST=parallel_2d notifications: webhooks: https://coveralls.io/webhook email: false From 18a65d4ef467394c53e052830fb33dad128a3e3a Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 3 Oct 2020 16:56:03 +0200 Subject: [PATCH 63/81] Fix AMR for 1D (hopefully) --- src/solvers/dg/1d/amr.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/solvers/dg/1d/amr.jl b/src/solvers/dg/1d/amr.jl index 86ae261c8ef..d629b8f332f 100644 --- a/src/solvers/dg/1d/amr.jl +++ b/src/solvers/dg/1d/amr.jl @@ -1,8 +1,8 @@ # This file contains functions that are related to the AMR capabilities of the DG solver # Refine elements in the DG solver based on a list of cell_ids that should be refined -function refine!(dg::Dg1D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - cells_to_refine::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function refine!(dg::Dg1D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + cells_to_refine::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(cells_to_refine) return @@ -98,8 +98,8 @@ end # Coarsen elements in the DG solver based on a list of cell_ids that should be removed -function coarsen!(dg::Dg1D{Eqn, NVARS, POLYDEG}, mesh::TreeMesh, - child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, NVARS, POLYDEG} +function coarsen!(dg::Dg1D{Eqn, MeshType, NVARS, POLYDEG}, mesh::TreeMesh, + child_cells_to_coarsen::AbstractArray{Int}) where {Eqn, MeshType, NVARS, POLYDEG} # Return early if there is nothing to do if isempty(child_cells_to_coarsen) return From 11e3d5b517d7335cfb5205f8173ce551220eedb6 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sat, 3 Oct 2020 17:11:47 +0200 Subject: [PATCH 64/81] Update manual tests --- test/test_manual.jl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test/test_manual.jl b/test/test_manual.jl index 1ad79bdd9ee..8bb098f1af0 100644 --- a/test/test_manual.jl +++ b/test/test_manual.jl @@ -9,16 +9,16 @@ isdir(outdir) && rm(outdir, recursive=true) # Run various manual (= non-parameter-file-triggered tests) @testset "Manual tests" begin - @testset "Tree" begin + @testset "SerialTree" begin @testset "constructors" begin - @test_nowarn Trixi.Tree(Val(1), 10, 0.0, 1.0) + @test_nowarn Trixi.SerialTree(Val(1), 10, 0.0, 1.0) end @testset "helper functions" begin - t = Trixi.Tree(Val(1), 10, 0.0, 1.0) + t = Trixi.SerialTree(Val(1), 10, 0.0, 1.0) @test_nowarn show(t) @test Trixi.ndims(t) == 1 - @test Trixi.ndims(Trixi.Tree{1}) == 1 + @test Trixi.ndims(Trixi.SerialTree{1}) == 1 @test Trixi.has_any_neighbor(t, 1, 1) == true @test Trixi.isperiodic(t, 1) == true @test Trixi.n_children_per_cell(t) == 2 @@ -27,7 +27,7 @@ isdir(outdir) && rm(outdir, recursive=true) end @testset "refine!/coarsen!" begin - t = Trixi.Tree(Val(1), 10, 0.0, 1.0) + t = Trixi.SerialTree(Val(1), 10, 0.0, 1.0) @test Trixi.refine!(t) == [1] @test Trixi.coarsen!(t) == [1] @test Trixi.refine!(t) == [1] @@ -41,6 +41,12 @@ isdir(outdir) && rm(outdir, recursive=true) end end + @testset "ParallelTree" begin + @testset "constructors" begin + @test_nowarn Trixi.ParallelTree(Val(1), 10, 0.0, 1.0) + end + end + @testset "interpolation" begin @testset "nodes and weights" begin @test Trixi.gauss_nodes_weights(1) == ([0.0], [2.0]) From 4fd55698ec53353d568a4a43f399dda90fe9a103 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 4 Oct 2020 06:15:57 +0200 Subject: [PATCH 65/81] Disable module precompilation for MPI tests --- test/runtests.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index be730000e40..1a9c39efa0a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -29,8 +29,10 @@ const TRIXI_MPI_NPROCS = 3 end @time if TRIXI_TEST == "all" || TRIXI_TEST == "parallel_2d" + # Based on `runtests.jl` from `MPI.jl` and `PencilArrays.jl` + # Precompilation disabled to prevent race conditions when loading packages mpiexec() do cmd - run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) test_examples_parallel_2d.jl`) + run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --compiled-modules=no test_examples_parallel_2d.jl`) end end end From 68d10e1db1c334034f41443ff18d354410e7a286 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 4 Oct 2020 06:21:24 +0200 Subject: [PATCH 66/81] Remove test for non-existent `ndims` method --- test/test_manual.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_manual.jl b/test/test_manual.jl index 8bb098f1af0..eb8dcd2db8c 100644 --- a/test/test_manual.jl +++ b/test/test_manual.jl @@ -18,7 +18,6 @@ isdir(outdir) && rm(outdir, recursive=true) t = Trixi.SerialTree(Val(1), 10, 0.0, 1.0) @test_nowarn show(t) @test Trixi.ndims(t) == 1 - @test Trixi.ndims(Trixi.SerialTree{1}) == 1 @test Trixi.has_any_neighbor(t, 1, 1) == true @test Trixi.isperiodic(t, 1) == true @test Trixi.n_children_per_cell(t) == 2 From 7eb678cc6d3d5590b0beb1b81ccc414cdaa92266 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 06:23:13 +0200 Subject: [PATCH 67/81] Disable MHD test in parallel since calc_mpi_interface_flux is not yet implemented for non-conservative terms --- test/runtests.jl | 1 + test/test_examples_parallel_2d.jl | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 1a9c39efa0a..9fe9ae3d04f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -29,6 +29,7 @@ const TRIXI_MPI_NPROCS = 3 end @time if TRIXI_TEST == "all" || TRIXI_TEST == "parallel_2d" + println("wololo") # Based on `runtests.jl` from `MPI.jl` and `PencilArrays.jl` # Precompilation disabled to prevent race conditions when loading packages mpiexec() do cmd diff --git a/test/test_examples_parallel_2d.jl b/test/test_examples_parallel_2d.jl index ccde292d517..61853e54aaf 100644 --- a/test/test_examples_parallel_2d.jl +++ b/test/test_examples_parallel_2d.jl @@ -36,11 +36,12 @@ const EXAMPLES_DIR = joinpath(pathof(Trixi) |> dirname |> dirname, "examples", " linf = [0.0065356386867677085, 0.0006535638688170142, 0.0013071277374487877, 0.0001633909674296774], extra_analysis_quantities=["l2_error_primitive", "linf_error_primitive"], t_end=0.5) end - @testset "parameters_ec_mhd.toml" begin - test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_ec_mhd.toml"), - l2 = [0.03607862694368351, 0.04281395008247395, 0.04280207686965749, 0.025746770192645763, 0.1611518499414067, 0.017455917249117023, 0.017456981264942977, 0.02688321120361229, 0.00015024027267648003], - linf = [0.23502083666166018, 0.3156846367743936, 0.31227895161037256, 0.2118146956106238, 0.9743049414302711, 0.09050624115026618, 0.09131633488909774, 0.15693063355520998, 0.0038394720095667593]) - end + # MHD + MPI not yet implemented + # @testset "parameters_ec_mhd.toml" begin + # test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_ec_mhd.toml"), + # l2 = [0.03607862694368351, 0.04281395008247395, 0.04280207686965749, 0.025746770192645763, 0.1611518499414067, 0.017455917249117023, 0.017456981264942977, 0.02688321120361229, 0.00015024027267648003], + # linf = [0.23502083666166018, 0.3156846367743936, 0.31227895161037256, 0.2118146956106238, 0.9743049414302711, 0.09050624115026618, 0.09131633488909774, 0.15693063355520998, 0.0038394720095667593]) + # end @testset "parameters_hyp_diff_harmonic_nonperiodic.toml" begin test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_hyp_diff_harmonic_nonperiodic.toml"), l2 = [8.618132353932638e-8, 5.619399844708813e-7, 5.619399845476024e-7], From 98468a38f6d4b8f7784add05ae1a5aeb8cf47b47 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 18:18:47 +0200 Subject: [PATCH 68/81] Remove wololo --- test/runtests.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 9fe9ae3d04f..1a9c39efa0a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -29,7 +29,6 @@ const TRIXI_MPI_NPROCS = 3 end @time if TRIXI_TEST == "all" || TRIXI_TEST == "parallel_2d" - println("wololo") # Based on `runtests.jl` from `MPI.jl` and `PencilArrays.jl` # Precompilation disabled to prevent race conditions when loading packages mpiexec() do cmd From 2c021bad3c60283cf3082f6d83b8e1a5bdd7d579 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 19:46:52 +0200 Subject: [PATCH 69/81] Restrict MPI ranks to 2 or 3 for testing --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 1a9c39efa0a..1430011a95a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,7 +4,7 @@ using MPI: mpiexec # run tests on Travis CI in parallel const TRIXI_TEST = get(ENV, "TRIXI_TEST", "all") const ON_APPVEYOR = lowercase(get(ENV, "APPVEYOR", "false")) == "true" -const TRIXI_MPI_NPROCS = 3 +const TRIXI_MPI_NPROCS = clamp(Sys.CPU_THREADS, 2, 3) @time @testset "Trixi.jl tests" begin @time if TRIXI_TEST == "all" || TRIXI_TEST == "1D" From a2ea737213363eda5e58bdba82fdd3c92fed2ab2 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 22:24:42 +0200 Subject: [PATCH 70/81] Improve coverage on `show` for mortar container --- test/test_manual.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_manual.jl b/test/test_manual.jl index eb8dcd2db8c..7294e4a3bde 100644 --- a/test/test_manual.jl +++ b/test/test_manual.jl @@ -193,9 +193,9 @@ isdir(outdir) && rm(outdir, recursive=true) @testset "DG L2 mortar container debug output" begin c2d = Trixi.L2MortarContainer2D{1, 1}(1) - @test isnothing(show(c2d)) + @test isnothing(display(c2d)) c3d = Trixi.L2MortarContainer3D{1, 1}(1) - @test isnothing(show(c3d)) + @test isnothing(display(c3d)) end end From 4d43e51c452a4863209504f339976784efc2bece Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 22:35:24 +0200 Subject: [PATCH 71/81] Improve TreeMesh coverage --- test/test_manual.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_manual.jl b/test/test_manual.jl index 7294e4a3bde..0956dd0dc54 100644 --- a/test/test_manual.jl +++ b/test/test_manual.jl @@ -46,6 +46,12 @@ isdir(outdir) && rm(outdir, recursive=true) end end + @testset "TreeMesh" begin + @testset "constructors" begin + Trixi.TreeMesh{Trixi.SerialTree{1}}(1, 5.0, 2.0) isa Trixi.TreeMesh + end + end + @testset "interpolation" begin @testset "nodes and weights" begin @test Trixi.gauss_nodes_weights(1) == ([0.0], [2.0]) From bd3874f4e60f3ab0bc063155ba3b436d7182edff Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 22:35:32 +0200 Subject: [PATCH 72/81] Remove unused methods --- src/parallel/parallel.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index 188b700698e..970af283cd0 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -47,19 +47,14 @@ const MPI_IS_ROOT = Ref(true) @inline mpi_comm() = MPI.COMM_WORLD -@inline mpi_rank(comm) = MPI.Comm_rank(comm) @inline mpi_rank() = MPI_RANK[] -@inline n_mpi_ranks(comm) = MPI.Comm_size(comm) @inline n_mpi_ranks() = MPI_SIZE[] -@inline is_parallel(comm) = n_mpi_ranks(comm) > 1 @inline is_parallel() = MPI_IS_PARALLEL[] -@inline is_serial(comm) = !is_parallel(comm) @inline is_serial() = MPI_IS_SERIAL[] -@inline is_mpi_root(comm) = is_serial() || mpi_rank(comm) == 0 @inline is_mpi_root() = MPI_IS_ROOT[] @inline mpi_root() = 0 From fa329ce90791f7cdc477e4dadc001c84a59e23b2 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 22:38:21 +0200 Subject: [PATCH 73/81] Improve coverage for `show` of ParallelTree --- test/test_manual.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/test_manual.jl b/test/test_manual.jl index 0956dd0dc54..81ad0086e1d 100644 --- a/test/test_manual.jl +++ b/test/test_manual.jl @@ -16,7 +16,7 @@ isdir(outdir) && rm(outdir, recursive=true) @testset "helper functions" begin t = Trixi.SerialTree(Val(1), 10, 0.0, 1.0) - @test_nowarn show(t) + @test isnothing(display(t)) @test Trixi.ndims(t) == 1 @test Trixi.has_any_neighbor(t, 1, 1) == true @test Trixi.isperiodic(t, 1) == true @@ -44,6 +44,11 @@ isdir(outdir) && rm(outdir, recursive=true) @testset "constructors" begin @test_nowarn Trixi.ParallelTree(Val(1), 10, 0.0, 1.0) end + + @testset "helper functions" begin + t = Trixi.ParallelTree(Val(1), 10, 0.0, 1.0) + @test isnothing(display(t)) + end end @testset "TreeMesh" begin From e5f9a31bf7f3effa7efe7eca03c89c5682488ca3 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 22:41:59 +0200 Subject: [PATCH 74/81] Add parallel restart test --- test/test_examples_parallel_2d.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_examples_parallel_2d.jl b/test/test_examples_parallel_2d.jl index 61853e54aaf..a9154ddb676 100644 --- a/test/test_examples_parallel_2d.jl +++ b/test/test_examples_parallel_2d.jl @@ -72,6 +72,14 @@ const EXAMPLES_DIR = joinpath(pathof(Trixi) |> dirname |> dirname, "examples", " l2 = [8.517783186497567e-7, 1.2350199409361865e-6, 1.2350199409828616e-6, 4.277884398786315e-6], linf = [8.357934254688004e-6, 1.0326389653148027e-5, 1.0326389654924384e-5, 4.4961900057316484e-5]) end + @testset "parameters.toml with restart and t_end=2" begin + Trixi.run(joinpath(EXAMPLES_DIR, "parameters.toml")) + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters.toml"), + l2 = [1.2148032444677485e-5], + linf = [6.495644794757283e-5], + t_end = 2, + restart = true, restart_filename = "out/restart_000040.h5") + end end # Clean up afterwards: delete Trixi output directory From f296a8f98b637267e1c97c09c6036cef3f998e51 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 22:42:15 +0200 Subject: [PATCH 75/81] Test for reset_data_structures! for ParallelTree --- test/test_manual.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_manual.jl b/test/test_manual.jl index 81ad0086e1d..bedbf0be8f7 100644 --- a/test/test_manual.jl +++ b/test/test_manual.jl @@ -48,6 +48,7 @@ isdir(outdir) && rm(outdir, recursive=true) @testset "helper functions" begin t = Trixi.ParallelTree(Val(1), 10, 0.0, 1.0) @test isnothing(display(t)) + @test isnothing(Trixi.reset_data_structures!(t)) end end From f754fb8f5e79bec55bb0988371bee962644abb00 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Fri, 9 Oct 2020 22:44:17 +0200 Subject: [PATCH 76/81] Comment AMR-specific I/O for parallel cse --- src/solvers/dg/2d/parallel.jl | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index eaabae820e5..ab49015e73b 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -380,22 +380,22 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso " " * " PID × #ranks: " * @sprintf("%10.8e s", runtime_relative * n_mpi_ranks())) - # Level information (only show for AMR) - if parameter("amr_interval", 0)::Int > 0 && is_mpi_root() - levels = Vector{Int}(undef, dg.n_elements) - for element_id in 1:dg.n_elements - levels[element_id] = mesh.tree.levels[dg.elements.cell_ids[element_id]] - end - min_level = minimum(levels) - max_level = maximum(levels) - - mpi_println(" #elements: " * @sprintf("% 14d", dg.n_elements)) - for level = max_level:-1:min_level+1 - mpi_println(" ├── level $level: " * @sprintf("% 14d", count(x->x==level, levels))) - end - mpi_println(" └── level $min_level: " * @sprintf("% 14d", count(x->x==min_level, levels))) - end - mpi_println() + # Level information (only show for AMR) #TODO MPI add when AMR is enabled + # if parameter("amr_interval", 0)::Int > 0 && is_mpi_root() + # levels = Vector{Int}(undef, dg.n_elements) + # for element_id in 1:dg.n_elements + # levels[element_id] = mesh.tree.levels[dg.elements.cell_ids[element_id]] + # end + # min_level = minimum(levels) + # max_level = maximum(levels) + + # mpi_println(" #elements: " * @sprintf("% 14d", dg.n_elements)) + # for level = max_level:-1:min_level+1 + # mpi_println(" ├── level $level: " * @sprintf("% 14d", count(x->x==level, levels))) + # end + # mpi_println(" └── level $min_level: " * @sprintf("% 14d", count(x->x==min_level, levels))) + # end + # mpi_println() # Open file for appending and store time step and time information if dg.save_analysis && is_mpi_root() From 346239a9aac2db1c4f8ea993bd148f78b74362e8 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 11 Oct 2020 05:52:33 +0200 Subject: [PATCH 77/81] Add additional tests and comment out unused methods for MPI + MHD and MPI + Euler-gravity --- src/solvers/dg/2d/parallel.jl | 161 +++++++++++++++--------------- test/test_examples_parallel_2d.jl | 5 + 2 files changed, 86 insertions(+), 80 deletions(-) diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index ab49015e73b..2b8eaaab26b 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -578,74 +578,74 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso end end - # Magnetic energy - if :energy_magnetic in dg.analysis_quantities - e_magnetic = integrate(dg, dg.elements.u) do i, j, element_id, dg, u - cons = get_node_vars(u, dg, i, j, element_id) - return energy_magnetic(cons, equations(dg)) - end - if is_mpi_root() - print(" ∑e_magnetic: ") - @printf(" % 10.8e", e_magnetic) - dg.save_analysis && @printf(f, " % 10.8e", e_magnetic) - println() - end - end + # Magnetic energy #TODO MPI add when MHD is enabled + # if :energy_magnetic in dg.analysis_quantities + # e_magnetic = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + # cons = get_node_vars(u, dg, i, j, element_id) + # return energy_magnetic(cons, equations(dg)) + # end + # if is_mpi_root() + # print(" ∑e_magnetic: ") + # @printf(" % 10.8e", e_magnetic) + # dg.save_analysis && @printf(f, " % 10.8e", e_magnetic) + # println() + # end + # end - # Potential energy - if :energy_potential in dg.analysis_quantities - # FIXME: This should be implemented properly for multiple coupled solvers - @assert !isnothing(solver_gravity) "Only works if gravity solver is supplied" - @assert dg.initial_conditions == initial_conditions_jeans_instability "Only works with Jeans instability setup" - - e_potential = integrate(dg, dg.elements.u, solver_gravity.elements.u) do i, j, element_id, dg, u_euler, u_gravity - cons_euler = get_node_vars(u_euler, dg, i, j, element_id) - cons_gravity = get_node_vars(u_gravity, solver_gravity, i, j, element_id) - # OBS! subtraction is specific to Jeans instability test where rho_0 = 1.5e7 - return (cons_euler[1] - 1.5e7) * cons_gravity[1] - end - if is_mpi_root() - print(" ∑e_pot: ") - @printf(" % 10.8e", e_potential) - dg.save_analysis && @printf(f, " % 10.8e", e_potential) - println() - end - end + # Potential energy #TODO MPI add when Euler-gravity is enabled + # if :energy_potential in dg.analysis_quantities + # # FIXME: This should be implemented properly for multiple coupled solvers + # @assert !isnothing(solver_gravity) "Only works if gravity solver is supplied" + # @assert dg.initial_conditions == initial_conditions_jeans_instability "Only works with Jeans instability setup" + + # e_potential = integrate(dg, dg.elements.u, solver_gravity.elements.u) do i, j, element_id, dg, u_euler, u_gravity + # cons_euler = get_node_vars(u_euler, dg, i, j, element_id) + # cons_gravity = get_node_vars(u_gravity, solver_gravity, i, j, element_id) + # # OBS! subtraction is specific to Jeans instability test where rho_0 = 1.5e7 + # return (cons_euler[1] - 1.5e7) * cons_gravity[1] + # end + # if is_mpi_root() + # print(" ∑e_pot: ") + # @printf(" % 10.8e", e_potential) + # dg.save_analysis && @printf(f, " % 10.8e", e_potential) + # println() + # end + # end - # Solenoidal condition ∇ ⋅ B = 0 - if :l2_divb in dg.analysis_quantities || :linf_divb in dg.analysis_quantities - l2_divb, linf_divb = calc_mhd_solenoid_condition(dg, time) - end - if is_mpi_root() - # L2 norm of ∇ ⋅ B - if :l2_divb in dg.analysis_quantities - print(" L2 ∇ ⋅B: ") - @printf(" % 10.8e", l2_divb) - dg.save_analysis && @printf(f, " % 10.8e", l2_divb) - println() - end - # Linf norm of ∇ ⋅ B - if :linf_divb in dg.analysis_quantities - print(" Linf ∇ ⋅B: ") - @printf(" % 10.8e", linf_divb) - dg.save_analysis && @printf(f, " % 10.8e", linf_divb) - println() - end - end + # Solenoidal condition ∇ ⋅ B = 0 #TODO MPI add when MHD is enabled + # if :l2_divb in dg.analysis_quantities || :linf_divb in dg.analysis_quantities + # l2_divb, linf_divb = calc_mhd_solenoid_condition(dg, time) + # end + # if is_mpi_root() + # # L2 norm of ∇ ⋅ B + # if :l2_divb in dg.analysis_quantities + # print(" L2 ∇ ⋅B: ") + # @printf(" % 10.8e", l2_divb) + # dg.save_analysis && @printf(f, " % 10.8e", l2_divb) + # println() + # end + # # Linf norm of ∇ ⋅ B + # if :linf_divb in dg.analysis_quantities + # print(" Linf ∇ ⋅B: ") + # @printf(" % 10.8e", linf_divb) + # dg.save_analysis && @printf(f, " % 10.8e", linf_divb) + # println() + # end + # end - # Cross helicity - if :cross_helicity in dg.analysis_quantities - h_c = integrate(dg, dg.elements.u) do i, j, element_id, dg, u - cons = get_node_vars(u, dg, i, j, element_id) - return cross_helicity(cons, equations(dg)) - end - if is_mpi_root() - print(" ∑H_c: ") - @printf(" % 10.8e", h_c) - dg.save_analysis && @printf(f, " % 10.8e", h_c) - println() - end - end + # Cross helicity #TODO MPI add when MHD is enabled + # if :cross_helicity in dg.analysis_quantities + # h_c = integrate(dg, dg.elements.u) do i, j, element_id, dg, u + # cons = get_node_vars(u, dg, i, j, element_id) + # return cross_helicity(cons, equations(dg)) + # end + # if is_mpi_root() + # print(" ∑H_c: ") + # @printf(" % 10.8e", h_c) + # dg.save_analysis && @printf(f, " % 10.8e", h_c) + # println() + # end + # end if is_mpi_root() println("-"^80) @@ -681,21 +681,22 @@ function calc_error_norms(func, dg::Dg2D, t, uses_mpi::Val{true}) end -function calc_mhd_solenoid_condition(dg::Dg2D, t, mpi_parallel::Val{true}) - l2_divb, linf_divb = calc_mhd_solenoid_condition(func, dg, t, Val(false)) - - # Since the local L2 norm is already normalized and square-rooted, we need to undo this first - global_l2_divb = Vector(l2_divb.^2 .* dg.analysis_total_volume) - global_linf_divb = Vector(linf_divb) - MPI.Reduce!(global_l2_divb, +, mpi_root(), mpi_comm()) - MPI.Reduce!(global_linf_divb, max, mpi_root(), mpi_comm()) - l2_divb = convert(typeof(l2_divb), global_l2_divb) - linf_divb = convert(typeof(linf_divb), global_linf_divb) - - l2_divb = @. sqrt(l2_divb / dg.analysis_total_volume) - - return l2_divb, linf_divb -end +#TODO MPI add when MHD is enabled +# function calc_mhd_solenoid_condition(dg::Dg2D, t, mpi_parallel::Val{true}) +# l2_divb, linf_divb = calc_mhd_solenoid_condition(func, dg, t, Val(false)) +# +# # Since the local L2 norm is already normalized and square-rooted, we need to undo this first +# global_l2_divb = Vector(l2_divb.^2 .* dg.analysis_total_volume) +# global_linf_divb = Vector(linf_divb) +# MPI.Reduce!(global_l2_divb, +, mpi_root(), mpi_comm()) +# MPI.Reduce!(global_linf_divb, max, mpi_root(), mpi_comm()) +# l2_divb = convert(typeof(l2_divb), global_l2_divb) +# linf_divb = convert(typeof(linf_divb), global_linf_divb) +# +# l2_divb = @. sqrt(l2_divb / dg.analysis_total_volume) +# +# return l2_divb, linf_divb +# end # OBS! Global results are only calculated on MPI root diff --git a/test/test_examples_parallel_2d.jl b/test/test_examples_parallel_2d.jl index a9154ddb676..6873928f99a 100644 --- a/test/test_examples_parallel_2d.jl +++ b/test/test_examples_parallel_2d.jl @@ -36,6 +36,11 @@ const EXAMPLES_DIR = joinpath(pathof(Trixi) |> dirname |> dirname, "examples", " linf = [0.0065356386867677085, 0.0006535638688170142, 0.0013071277374487877, 0.0001633909674296774], extra_analysis_quantities=["l2_error_primitive", "linf_error_primitive"], t_end=0.5) end + @testset "parameters_vortex.toml" begin + test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_vortex.toml"), + l2 = [3.6343138447409784e-6, 0.0032111379843728876, 0.0032111482778261658, 0.004545715889714643], + linf = [7.901869034399045e-5, 0.030511158864742205, 0.030451936462313256, 0.04361908901631395]) + end # MHD + MPI not yet implemented # @testset "parameters_ec_mhd.toml" begin # test_trixi_run(joinpath(EXAMPLES_DIR, "parameters_ec_mhd.toml"), From 73c02a599b3268f207dd128ebc1a0e6bfa628192 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 11 Oct 2020 07:49:20 +0200 Subject: [PATCH 78/81] Fix `integrate` and `calc_error_norms` for MPI --- src/solvers/dg/2d/dg.jl | 2 +- src/solvers/dg/2d/parallel.jl | 11 +++++++---- src/solvers/solvers.jl | 3 ++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 4303ce8542e..5eb8f04f69b 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -852,7 +852,7 @@ state_integrals = integrate(dg.elements.u, dg) """ integrate(func, u, dg::Dg2D; normalize=true) = integrate(func, u, dg, uses_mpi(dg); normalize=normalize) -function integrate(func, u, dg::Dg2D, uses_mpi::Val{false}; normalize=true) +function integrate(func, u, dg::Dg2D, uses_mpi; normalize=true) func_wrapped = function(i, j, element_id, dg, u) u_local = get_node_vars(u, dg, i, j, element_id) return func(u_local) diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index 2b8eaaab26b..f14605b3bae 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -419,7 +419,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso end # Calculate L2/Linf errors, which are also returned by analyze_solution - l2_error, linf_error = calc_error_norms(dg, time) + l2_error, linf_error = calc_error_norms(dg, time, Val(true)) if is_mpi_root() # L2 error @@ -451,7 +451,10 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # Store initial state integrals at first invocation if isempty(dg.initial_state_integrals) dg.initial_state_integrals = zeros(nvariables(equation)) - dg.initial_state_integrals .= state_integrals + if is_mpi_root() + # Only set on MPI root; all other ranks do not get any value from `integrate` + dg.initial_state_integrals .= state_integrals + end end if is_mpi_root() @@ -480,7 +483,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # L2/L∞ errors of the primitive variables if :l2_error_primitive in dg.analysis_quantities || :linf_error_primitive in dg.analysis_quantities - l2_error_prim, linf_error_prim = calc_error_norms(cons2prim, dg, time) + l2_error_prim, linf_error_prim = calc_error_norms(cons2prim, dg, time, Val(true)) if is_mpi_root() print(" Variable: ") @@ -699,7 +702,7 @@ end # end -# OBS! Global results are only calculated on MPI root +# OBS! Global results are only calculated on MPI root, all other domains receive `nothing` function integrate(func, dg::Dg2D, uses_mpi::Val{true}, args...; normalize=true) integral = integrate(func, dg, Val(false), args...; normalize=normalize) integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm()) diff --git a/src/solvers/solvers.jl b/src/solvers/solvers.jl index 5f1050fcb55..53be5f97a7a 100644 --- a/src/solvers/solvers.jl +++ b/src/solvers/solvers.jl @@ -57,7 +57,8 @@ the problem encapsulated by `solver` at time `t`, where `func` is called as `fun """ function calc_error_norms end -@inline calc_error_norms(solver::AbstractSolver, t) = calc_error_norms(cons2cons, solver, t) +@inline calc_error_norms(solver::AbstractSolver, t) = calc_error_norms(solver, t, uses_mpi(solver)) +@inline calc_error_norms(solver::AbstractSolver, t, uses_mpi) = calc_error_norms(cons2cons, solver, t, uses_mpi) #################################################################################################### From 166f06dec6c1f16ba4fb1e2a9d9e82e8766f3540 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 11 Oct 2020 08:14:34 +0200 Subject: [PATCH 79/81] Fix calc_error_norms for MPI/non-2D runs --- src/solvers/dg/2d/parallel.jl | 1 + src/solvers/solvers.jl | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index f14605b3bae..ed7bdf34fee 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -667,6 +667,7 @@ end # OBS! Global results are only calculated on MPI root +@inline calc_error_norms(dg::Dg2D, t, uses_mpi) = calc_error_norms(cons2cons, dg, t, uses_mpi) function calc_error_norms(func, dg::Dg2D, t, uses_mpi::Val{true}) l2_error, linf_error = calc_error_norms(func, dg, t, Val(false)) diff --git a/src/solvers/solvers.jl b/src/solvers/solvers.jl index 53be5f97a7a..5f1050fcb55 100644 --- a/src/solvers/solvers.jl +++ b/src/solvers/solvers.jl @@ -57,8 +57,7 @@ the problem encapsulated by `solver` at time `t`, where `func` is called as `fun """ function calc_error_norms end -@inline calc_error_norms(solver::AbstractSolver, t) = calc_error_norms(solver, t, uses_mpi(solver)) -@inline calc_error_norms(solver::AbstractSolver, t, uses_mpi) = calc_error_norms(cons2cons, solver, t, uses_mpi) +@inline calc_error_norms(solver::AbstractSolver, t) = calc_error_norms(cons2cons, solver, t) #################################################################################################### From e8ce36c1d27eaa484c2cc4011871a99166ed1c9d Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 11 Oct 2020 09:01:20 +0200 Subject: [PATCH 80/81] Sort Travis jobs by descending run time (such that longer-running jobs start first) --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3b4e1f02e63..829b6bfeff8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,12 +32,12 @@ env: global: - COVERALLS_PARALLEL=true jobs: - - TRIXI_TEST=1D - TRIXI_TEST=2D - TRIXI_TEST=3D - - TRIXI_TEST=misc - TRIXI_TEST=paper-self-gravitating-gas-dynamics - TRIXI_TEST=parallel_2d + - TRIXI_TEST=1D + - TRIXI_TEST=misc notifications: webhooks: https://coveralls.io/webhook email: false From a7011af5a6516dfd7d9e21968a9255e52793a7c6 Mon Sep 17 00:00:00 2001 From: Michael Schlottke-Lakemper Date: Sun, 11 Oct 2020 14:38:17 +0200 Subject: [PATCH 81/81] Prefix MPI-related methods with `mpi_` --- src/auxiliary/auxiliary.jl | 2 +- src/io/parallel.jl | 12 ++++---- src/mesh/mesh.jl | 8 +++--- src/mesh/parallel.jl | 12 ++++---- src/parallel/parallel.jl | 12 ++++---- src/run.jl | 30 ++++++++++---------- src/run_euler_gravity.jl | 4 +-- src/solvers/dg/2d/dg.jl | 16 +++++------ src/solvers/dg/2d/parallel.jl | 46 +++++++++++++++---------------- test/test_examples_parallel_2d.jl | 4 +-- 10 files changed, 73 insertions(+), 73 deletions(-) diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 14d10bb0234..6e288c55c7e 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -19,7 +19,7 @@ function parse_parameters_file(filename, mpi_parallel::Val{false}) parameters[:default]["parameters_file"] = filename end function parse_parameters_file(filename, mpi_parallel::Val{true}) - if is_mpi_root() + if mpi_isroot() buffer = read(filename) MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm()) MPI.Bcast!(buffer, mpi_root(), mpi_comm()) diff --git a/src/io/parallel.jl b/src/io/parallel.jl index c55f934e677..90bc821fa0a 100644 --- a/src/io/parallel.jl +++ b/src/io/parallel.jl @@ -9,7 +9,7 @@ function load_restart_file!(dg::AbstractDg, restart_filename, mpi_parallel::Val{ element_size = nnodes(dg)^ndims(dg) node_counts = convert(Vector{Cint}, collect(dg.n_elements_by_rank)) * Cint(element_size) - if is_mpi_root() + if mpi_isroot() # Open file h5open(restart_filename, "r") do file # Read attributes to perform some sanity checks @@ -68,10 +68,10 @@ function save_restart_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, varnames = varnames_cons(equations(dg)) # Only write from MPI root (poor man's version of parallel I/O) - if is_mpi_root() + if mpi_isroot() # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") - if is_mpi_root() + if mpi_isroot() mkpath(output_directory) end @@ -141,7 +141,7 @@ function save_solution_file(dg::AbstractDg, mesh::TreeMesh, time, dt, timestep, end # Only write from MPI root (poor man's version of parallel I/O) - if is_mpi_root() + if mpi_isroot() # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") mkpath(output_directory) @@ -208,7 +208,7 @@ end function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{true}) # Create output directory (if it does not exist) output_directory = parameter("output_directory", "out") - is_mpi_root() && mkpath(output_directory) + mpi_isroot() && mkpath(output_directory) # Determine file name based on existence of meaningful time step if timestep >= 0 @@ -218,7 +218,7 @@ function save_mesh_file(mesh::TreeMesh, timestep, mpi_parallel::Val{true}) end # Since the mesh is replicated on all ranks, only save from MPI root - if !is_mpi_root() + if !mpi_isroot() return filename * ".h5" end diff --git a/src/mesh/mesh.jl b/src/mesh/mesh.jl index 261c74fbb5c..f41f8f4bd73 100644 --- a/src/mesh/mesh.jl +++ b/src/mesh/mesh.jl @@ -74,7 +74,7 @@ function generate_mesh() periodicity = parameter("periodicity", true) # Create mesh - if is_parallel() + if mpi_isparallel() tree_type = ParallelTree{ndims_} else tree_type = SerialTree{ndims_} @@ -90,7 +90,7 @@ function generate_mesh() # Apply refinement patches @timeit timer() "refinement patches" for patch in parameter("refinement_patches", []) - is_parallel() && error("non-uniform meshes not supported in parallel") + mpi_isparallel() && error("non-uniform meshes not supported in parallel") if patch["type"] == "box" refine_box!(mesh.tree, patch["coordinates_min"], patch["coordinates_max"]) else @@ -100,7 +100,7 @@ function generate_mesh() # Apply coarsening patches @timeit timer() "coarsening patches" for patch in parameter("coarsening_patches", []) - is_parallel() && error("non-uniform meshes not supported in parallel") + mpi_isparallel() && error("non-uniform meshes not supported in parallel") if patch["type"] == "box" coarsen_box!(mesh.tree, patch["coordinates_min"], patch["coordinates_max"]) else @@ -109,7 +109,7 @@ function generate_mesh() end # Partition mesh - if is_parallel() + if mpi_isparallel() partition!(mesh) end diff --git a/src/mesh/parallel.jl b/src/mesh/parallel.jl index a310350f748..fca6ad1fd84 100644 --- a/src/mesh/parallel.jl +++ b/src/mesh/parallel.jl @@ -2,10 +2,10 @@ function partition!(mesh) # Determine number of leaf cells per rank leaves = leaf_cells(mesh.tree) - @assert length(leaves) > n_mpi_ranks() - n_leaves_per_rank = OffsetArray(fill(div(length(leaves), n_mpi_ranks()), n_mpi_ranks()), - 0:(n_mpi_ranks() - 1)) - for d in 0:(rem(length(leaves), n_mpi_ranks()) - 1) + @assert length(leaves) > mpi_nranks() + n_leaves_per_rank = OffsetArray(fill(div(length(leaves), mpi_nranks()), mpi_nranks()), + 0:(mpi_nranks() - 1)) + for d in 0:(rem(length(leaves), mpi_nranks()) - 1) n_leaves_per_rank[d] += 1 end @assert sum(n_leaves_per_rank) == length(leaves) @@ -51,7 +51,7 @@ function load_mesh(restart_filename, mpi_parallel::Val{true}) mesh.unsaved_changes = false # Read mesh file - if is_mpi_root() + if mpi_isroot() h5open(filename, "r") do file # Set domain information mesh.tree.center_level_0 = read(attrs(file)["center_level_0"]) @@ -106,7 +106,7 @@ function get_restart_mesh_filename(restart_filename, mpi_parallel::Val{true}) # Get directory name dirname, _ = splitdir(restart_filename) - if is_mpi_root() + if mpi_isroot() # Read mesh filename from restart file mesh_file = "" h5open(restart_filename, "r") do file diff --git a/src/parallel/parallel.jl b/src/parallel/parallel.jl index 970af283cd0..6c44c28ff45 100644 --- a/src/parallel/parallel.jl +++ b/src/parallel/parallel.jl @@ -49,15 +49,15 @@ const MPI_IS_ROOT = Ref(true) @inline mpi_rank() = MPI_RANK[] -@inline n_mpi_ranks() = MPI_SIZE[] +@inline mpi_nranks() = MPI_SIZE[] -@inline is_parallel() = MPI_IS_PARALLEL[] +@inline mpi_isparallel() = MPI_IS_PARALLEL[] -@inline is_serial() = MPI_IS_SERIAL[] +@inline mpi_isserial() = MPI_IS_SERIAL[] -@inline is_mpi_root() = MPI_IS_ROOT[] +@inline mpi_isroot() = MPI_IS_ROOT[] @inline mpi_root() = 0 -@inline mpi_println(args...) = is_mpi_root() && println(args...) -@inline mpi_print(args...) = is_mpi_root() && print(args...) +@inline mpi_println(args...) = mpi_isroot() && println(args...) +@inline mpi_print(args...) = mpi_isroot() && print(args...) diff --git a/src/run.jl b/src/run.jl index a8800e4b35f..6b4fed6b5b6 100644 --- a/src/run.jl +++ b/src/run.jl @@ -85,14 +85,14 @@ function init_simulation() if restart mpi_print("Loading mesh... ") @timeit timer() "mesh loading" mesh = load_mesh(restart_filename) - is_parallel() && MPI.Barrier(mpi_comm()) + mpi_isparallel() && MPI.Barrier(mpi_comm()) mpi_println("done") else mpi_print("Creating mesh... ") @timeit timer() "mesh creation" mesh = generate_mesh() mesh.current_filename = save_mesh_file(mesh) mesh.unsaved_changes = false - is_parallel() && MPI.Barrier(mpi_comm()) + mpi_isparallel() && MPI.Barrier(mpi_comm()) mpi_println("done") end @@ -100,14 +100,14 @@ function init_simulation() mpi_print("Initializing system of equations... ") equations_name = parameter("equations") equations = make_equations(equations_name, ndims_) - is_parallel() && MPI.Barrier(mpi_comm()) + mpi_isparallel() && MPI.Barrier(mpi_comm()) mpi_println("done") # Initialize solver mpi_print("Initializing solver... ") solver_name = parameter("solver", valid=["dg"]) solver = make_solver(solver_name, equations, mesh) - is_parallel() && MPI.Barrier(mpi_comm()) + mpi_isparallel() && MPI.Barrier(mpi_comm()) mpi_println("done") # Sanity checks @@ -128,7 +128,7 @@ function init_simulation() if restart mpi_print("Loading restart file...") time, step = load_restart_file!(solver, restart_filename) - is_parallel() && MPI.Barrier(mpi_comm()) + mpi_isparallel() && MPI.Barrier(mpi_comm()) mpi_println("done") else mpi_print("Applying initial conditions... ") @@ -136,7 +136,7 @@ function init_simulation() time = t_start step = 0 set_initial_conditions!(solver, time) - is_parallel() && MPI.Barrier(mpi_comm()) + mpi_isparallel() && MPI.Barrier(mpi_comm()) mpi_println("done") # If AMR is enabled, adapt mesh and re-apply ICs @@ -205,7 +205,7 @@ function init_simulation() | time integration: $(get_name(time_integration_function)) | restart interval: $restart_interval | solution interval: $solution_interval - | #MPI ranks: $(n_mpi_ranks()) + | #MPI ranks: $(mpi_nranks()) | #threads/rank: $(Threads.nthreads()) | | Solver (local) @@ -319,7 +319,7 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function if solver.equations isa AbstractHyperbolicDiffusionEquations resid = maximum(abs, view(solver.elements.u_t, 1, .., :)) - if is_parallel() + if mpi_isparallel() resid = MPI.Allreduce!(Ref(resid), max, mpi_comm())[] end @@ -337,9 +337,9 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function # Analyze solution errors if analysis_interval > 0 && (step % analysis_interval == 0 || finalstep) # Calculate absolute and relative runtime - if is_parallel() + if mpi_isparallel() total_dofs = MPI.Reduce!(Ref(ndofs(solver)), +, mpi_root(), mpi_comm()) - total_dofs = is_mpi_root() ? total_dofs[] : -1 + total_dofs = mpi_isroot() ? total_dofs[] : -1 else total_dofs = ndofs(solver) end @@ -361,7 +361,7 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function mpi_println("-"^80) mpi_println() end - elseif alive_interval > 0 && step % alive_interval == 0 && is_mpi_root() + elseif alive_interval > 0 && step % alive_interval == 0 && mpi_isroot() runtime_absolute = (time_ns() - loop_start_time) / 10^9 @printf("#t/s: %6d | dt: %.4e | Sim. time: %.4e | Run time: %.4e s\n", step, dt, time, runtime_absolute) @@ -427,13 +427,13 @@ function run_simulation(mesh, solver, time_parameters, time_integration_function end # Print timer information - if is_mpi_root() + if mpi_isroot() print_timer(timer(), title="Trixi.jl", allocations=true, linechars=:ascii, compact=false) println() end # Distribute l2_errors from root such that all ranks have correct return value - if is_parallel() + if mpi_isparallel() l2_error = convert(typeof(l2_error), MPI.Bcast!(collect(l2_error), mpi_root(), mpi_comm())) linf_error = convert(typeof(linf_error), MPI.Bcast!(collect(linf_error), mpi_root(), mpi_comm())) end @@ -453,7 +453,7 @@ refinement level will be increased by 1. Parameters can be overriden by specifyi additional keyword arguments, which are passed to the respective call to `run`.. """ function convtest(parameters_file, iterations; parameters...) - if is_mpi_root() + if mpi_isroot() @assert(iterations > 1, "Number of iterations must be bigger than 1 for a convergence analysis") end @@ -486,7 +486,7 @@ function convtest(parameters_file, iterations; parameters...) eocs = Dict(kind => log.(error[2:end, :] ./ error[1:end-1, :]) ./ log(1 / 2) for (kind, error) in errorsmatrix) - if is_mpi_root() + if mpi_isroot() for (kind, error) in errorsmatrix println(kind) diff --git a/src/run_euler_gravity.jl b/src/run_euler_gravity.jl index 11aae83f16c..77ab67576d0 100644 --- a/src/run_euler_gravity.jl +++ b/src/run_euler_gravity.jl @@ -1,5 +1,5 @@ function init_simulation_euler_gravity() - is_parallel() && error("coupled simulations are not yet tested for parallel runs") # TODO parallel + mpi_isparallel() && error("coupled simulations are not yet tested for parallel runs") # TODO parallel # Print startup message print_startup_message() @@ -207,7 +207,7 @@ end function run_simulation_euler_gravity(mesh, solvers, time_parameters, time_integration_function) - is_parallel() && error("coupled simulations are not yet tested for parallel runs") # TODO parallel + mpi_isparallel() && error("coupled simulations are not yet tested for parallel runs") # TODO parallel @unpack time, step, t_end, cfl, n_steps_max, save_final_solution, save_final_restart, diff --git a/src/solvers/dg/2d/dg.jl b/src/solvers/dg/2d/dg.jl index 5eb8f04f69b..503423e7b38 100644 --- a/src/solvers/dg/2d/dg.jl +++ b/src/solvers/dg/2d/dg.jl @@ -91,7 +91,7 @@ end # Convenience constructor to create DG solver instance function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, volume_flux_function, initial_conditions, source_terms, mesh::TreeMesh, POLYDEG) where {NDIMS, NVARS} # Get local cells for which an element needs to be created (i.e., all leaf cells) - if is_parallel() + if mpi_isparallel() leaf_cell_ids = local_leaf_cells(mesh.tree) else leaf_cell_ids = leaf_cells(mesh.tree) @@ -120,7 +120,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v n_ecmortars = nmortars(ecmortars) # Sanity checks - if isperiodic(mesh.tree) && n_l2mortars == 0 && n_ecmortars == 0 && is_serial() + if isperiodic(mesh.tree) && n_l2mortars == 0 && n_ecmortars == 0 && mpi_isserial() @assert n_interfaces == 2*n_elements ("For 2D and periodic domains and conforming elements, " * "n_surf must be the same as 2*n_elem") end @@ -215,7 +215,7 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v amr_alpha_smooth = parameter("amr_alpha_smooth", false) # Set up MPI neighbor connectivity and communication data structures - if is_parallel() + if mpi_isparallel() (mpi_neighbor_ranks, mpi_neighbor_interfaces) = init_mpi_neighbor_connectivity(elements, mpi_interfaces, mesh) (mpi_send_buffers, @@ -225,16 +225,16 @@ function Dg2D(equation::AbstractEquation{NDIMS, NVARS}, surface_flux_function, v Val(NDIMS), Val(NVARS), Val(POLYDEG)) # Determine local and total number of elements - n_elements_by_rank = Vector{Int}(undef, n_mpi_ranks()) + n_elements_by_rank = Vector{Int}(undef, mpi_nranks()) n_elements_by_rank[mpi_rank() + 1] = n_elements MPI.Allgather!(n_elements_by_rank, 1, mpi_comm()) - n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(n_mpi_ranks() - 1)) + n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1)) n_elements_global = MPI.Allreduce(n_elements, +, mpi_comm()) @assert n_elements_global == sum(n_elements_by_rank) "error in total number of elements" # Determine the global element id of the first element first_element_global_id = MPI.Exscan(n_elements, +, mpi_comm()) - if is_mpi_root() + if mpi_isroot() # With Exscan, the result on the first rank is undefined first_element_global_id = 1 else @@ -378,7 +378,7 @@ function count_required_interfaces(mesh::TreeMesh2D, cell_ids) end # Skip if neighbor is on different rank -> create MPI interface instead - if is_parallel() && !is_own_cell(mesh.tree, neighbor_cell_id) + if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id) continue end @@ -584,7 +584,7 @@ function init_interface_connectivity!(elements, interfaces, mesh::TreeMesh2D) end # Skip if neighbor is on different rank -> create MPI interface instead - if is_parallel() && !is_own_cell(mesh.tree, neighbor_cell_id) + if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id) continue end diff --git a/src/solvers/dg/2d/parallel.jl b/src/solvers/dg/2d/parallel.jl index ed7bdf34fee..4c7eb07f046 100644 --- a/src/solvers/dg/2d/parallel.jl +++ b/src/solvers/dg/2d/parallel.jl @@ -72,7 +72,7 @@ function count_required_mpi_interfaces(mesh::TreeMesh2D, cell_ids) end # Skip if neighbor is on this rank -> create regular interface instead - if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) + if mpi_isparallel() && is_own_cell(mesh.tree, neighbor_cell_id) continue end @@ -128,7 +128,7 @@ function init_mpi_interface_connectivity!(elements, mpi_interfaces, mesh::TreeMe end # Skip if neighbor is on this MPI rank -> create regular interface instead - if is_parallel() && is_own_cell(mesh.tree, neighbor_cell_id) + if mpi_isparallel() && is_own_cell(mesh.tree, neighbor_cell_id) continue end @@ -378,10 +378,10 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso " PID: " * @sprintf("%10.8e s", runtime_relative)) mpi_println(" sim. time: " * @sprintf("%10.8e", time) * " " * - " PID × #ranks: " * @sprintf("%10.8e s", runtime_relative * n_mpi_ranks())) + " PID × #ranks: " * @sprintf("%10.8e s", runtime_relative * mpi_nranks())) # Level information (only show for AMR) #TODO MPI add when AMR is enabled - # if parameter("amr_interval", 0)::Int > 0 && is_mpi_root() + # if parameter("amr_interval", 0)::Int > 0 && mpi_isroot() # levels = Vector{Int}(undef, dg.n_elements) # for element_id in 1:dg.n_elements # levels[element_id] = mesh.tree.levels[dg.elements.cell_ids[element_id]] @@ -398,7 +398,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # mpi_println() # Open file for appending and store time step and time information - if dg.save_analysis && is_mpi_root() + if dg.save_analysis && mpi_isroot() f = open(dg.analysis_filename, "a") @printf(f, "% 9d", step) @printf(f, " %10.8e", time) @@ -407,7 +407,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # Calculate and print derived quantities (error norms, entropy etc.) # Variable names required for L2 error, Linf error, and conservation error - if is_mpi_root() + if mpi_isroot() if any(q in dg.analysis_quantities for q in (:l2_error, :linf_error, :conservation_error, :residual)) print(" Variable: ") @@ -421,7 +421,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # Calculate L2/Linf errors, which are also returned by analyze_solution l2_error, linf_error = calc_error_norms(dg, time, Val(true)) - if is_mpi_root() + if mpi_isroot() # L2 error if :l2_error in dg.analysis_quantities print(" L2 error: ") @@ -451,13 +451,13 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # Store initial state integrals at first invocation if isempty(dg.initial_state_integrals) dg.initial_state_integrals = zeros(nvariables(equation)) - if is_mpi_root() + if mpi_isroot() # Only set on MPI root; all other ranks do not get any value from `integrate` dg.initial_state_integrals .= state_integrals end end - if is_mpi_root() + if mpi_isroot() print(" |∑U - ∑U₀|: ") for v in 1:nvariables(equation) err = abs(state_integrals[v] - dg.initial_state_integrals[v]) @@ -475,8 +475,8 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # Calculate maximum absolute value of Uₜ res = maximum(abs, view(dg.elements.u_t, v, :, :, :)) res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm()) - is_mpi_root() && @printf(" % 10.8e", res[]) - is_mpi_root() && dg.save_analysis && @printf(f, " % 10.8e", res[]) + mpi_isroot() && @printf(" % 10.8e", res[]) + mpi_isroot() && dg.save_analysis && @printf(f, " % 10.8e", res[]) end mpi_println() end @@ -485,7 +485,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso if :l2_error_primitive in dg.analysis_quantities || :linf_error_primitive in dg.analysis_quantities l2_error_prim, linf_error_prim = calc_error_norms(cons2prim, dg, time, Val(true)) - if is_mpi_root() + if mpi_isroot() print(" Variable: ") for v in 1:nvariables(equation) @printf(" %-14s", varnames_prim(equation)[v]) @@ -517,7 +517,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # Entropy time derivative if :dsdu_ut in dg.analysis_quantities dsdu_ut = calc_entropy_timederivative(dg, time) - if is_mpi_root() + if mpi_isroot() print(" ∑∂S/∂U ⋅ Uₜ: ") @printf(" % 10.8e", dsdu_ut) dg.save_analysis && @printf(f, " % 10.8e", dsdu_ut) @@ -531,7 +531,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso cons = get_node_vars(u, dg, i, j, element_id) return entropy(cons, equations(dg)) end - if is_mpi_root() + if mpi_isroot() print(" ∑S: ") @printf(" % 10.8e", s) dg.save_analysis && @printf(f, " % 10.8e", s) @@ -545,7 +545,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso cons = get_node_vars(u, dg, i, j, element_id) return energy_total(cons, equations(dg)) end - if is_mpi_root() + if mpi_isroot() print(" ∑e_total: ") @printf(" % 10.8e", e_total) dg.save_analysis && @printf(f, " % 10.8e", e_total) @@ -559,7 +559,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso cons = get_node_vars(u, dg, i, j, element_id) return energy_kinetic(cons, equations(dg)) end - if is_mpi_root() + if mpi_isroot() print(" ∑e_kinetic: ") @printf(" % 10.8e", e_kinetic) dg.save_analysis && @printf(f, " % 10.8e", e_kinetic) @@ -573,7 +573,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso cons = get_node_vars(u, dg, i, j, element_id) return energy_internal(cons, equations(dg)) end - if is_mpi_root() + if mpi_isroot() print(" ∑e_internal: ") @printf(" % 10.8e", e_internal) dg.save_analysis && @printf(f, " % 10.8e", e_internal) @@ -587,7 +587,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # cons = get_node_vars(u, dg, i, j, element_id) # return energy_magnetic(cons, equations(dg)) # end - # if is_mpi_root() + # if mpi_isroot() # print(" ∑e_magnetic: ") # @printf(" % 10.8e", e_magnetic) # dg.save_analysis && @printf(f, " % 10.8e", e_magnetic) @@ -607,7 +607,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # # OBS! subtraction is specific to Jeans instability test where rho_0 = 1.5e7 # return (cons_euler[1] - 1.5e7) * cons_gravity[1] # end - # if is_mpi_root() + # if mpi_isroot() # print(" ∑e_pot: ") # @printf(" % 10.8e", e_potential) # dg.save_analysis && @printf(f, " % 10.8e", e_potential) @@ -619,7 +619,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # if :l2_divb in dg.analysis_quantities || :linf_divb in dg.analysis_quantities # l2_divb, linf_divb = calc_mhd_solenoid_condition(dg, time) # end - # if is_mpi_root() + # if mpi_isroot() # # L2 norm of ∇ ⋅ B # if :l2_divb in dg.analysis_quantities # print(" L2 ∇ ⋅B: ") @@ -642,7 +642,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # cons = get_node_vars(u, dg, i, j, element_id) # return cross_helicity(cons, equations(dg)) # end - # if is_mpi_root() + # if mpi_isroot() # print(" ∑H_c: ") # @printf(" % 10.8e", h_c) # dg.save_analysis && @printf(f, " % 10.8e", h_c) @@ -650,7 +650,7 @@ function analyze_solution(dg::Dg2D, mesh::TreeMesh, time, dt, step, runtime_abso # end # end - if is_mpi_root() + if mpi_isroot() println("-"^80) println() @@ -708,7 +708,7 @@ function integrate(func, dg::Dg2D, uses_mpi::Val{true}, args...; normalize=true) integral = integrate(func, dg, Val(false), args...; normalize=normalize) integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm()) - return is_mpi_root() ? integral[] : integral + return mpi_isroot() ? integral[] : integral end diff --git a/test/test_examples_parallel_2d.jl b/test/test_examples_parallel_2d.jl index 6873928f99a..013be763820 100644 --- a/test/test_examples_parallel_2d.jl +++ b/test/test_examples_parallel_2d.jl @@ -7,7 +7,7 @@ include("test_trixi.jl") # Start with a clean environment: remove Trixi output directory if it exists outdir = "out" -Trixi.is_mpi_root() && isdir(outdir) && rm(outdir, recursive=true) +Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true) # pathof(Trixi) returns /path/to/Trixi/src/Trixi.jl, dirname gives the parent directory const EXAMPLES_DIR = joinpath(pathof(Trixi) |> dirname |> dirname, "examples", "2d") @@ -88,6 +88,6 @@ const EXAMPLES_DIR = joinpath(pathof(Trixi) |> dirname |> dirname, "examples", " end # Clean up afterwards: delete Trixi output directory -Trixi.is_mpi_root() && @test_nowarn rm(outdir, recursive=true) +Trixi.mpi_isroot() && @test_nowarn rm(outdir, recursive=true) end #module