Skip to content

Commit

Permalink
Merge 23b646c into 819e4b7
Browse files Browse the repository at this point in the history
  • Loading branch information
efaulhaber authored Jun 27, 2024
2 parents 819e4b7 + 23b646c commit 1708a10
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 52 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ authors = ["Erik Faulhaber <erik.faulhaber@uni-koeln.de>"]
version = "0.3.2-dev"

[deps]
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[compat]
Atomix = "0.1"
LinearAlgebra = "1"
Polyester = "0.7.5"
Reexport = "1"
Expand Down
1 change: 1 addition & 0 deletions src/PointNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module PointNeighbors

using Reexport: @reexport

using Atomix: Atomix
using LinearAlgebra: dot
using Polyester: @batch
@reexport using StaticArrays: SVector
Expand Down
2 changes: 2 additions & 0 deletions src/cell_lists/dictionary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ struct DictionaryCellList{NDIMS} <: AbstractCellList
end
end

supported_update_strategies(::DictionaryCellList) = (:semi_parallel, :serial)

function Base.empty!(cell_list::DictionaryCellList)
Base.empty!(cell_list.hashtable)

Expand Down
17 changes: 17 additions & 0 deletions src/cell_lists/full_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ struct FullGridCellList{C, LI, MC} <: AbstractCellList
end
end

function supported_update_strategies(::FullGridCellList{<:DynamicVectorOfVectors})
(:parallel, :semi_parallel, :serial)
end

supported_update_strategies(::FullGridCellList) = (:semi_parallel, :serial)

function FullGridCellList(; min_corner, max_corner, search_radius = 0.0,
periodicity = false, backend = DynamicVectorOfVectors{Int32},
max_points_per_cell = 100)
Expand Down Expand Up @@ -113,6 +119,17 @@ function push_cell!(cell_list::FullGridCellList{Nothing}, cell, particle)
throw(UndefRefError("`search_radius` is not defined for this cell list"))
end

@inline function push_cell_atomic!(cell_list::FullGridCellList, cell, particle)
(; cells) = cell_list

# `push!(cell_list[cell], particle)`, but for all backends.
# The atomic version of `pushat!` uses atomics to avoid race conditions when `pushat!`
# is used in a parallel loop.
pushat_atomic!(cells, cell_index(cell_list, cell), particle)

return cell_list
end

function deleteat_cell!(cell_list::FullGridCellList, cell, i)
(; cells) = cell_list

Expand Down
155 changes: 117 additions & 38 deletions src/nhs_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
periodic_box = nothing,
cell_list = DictionaryCellList{NDIMS}(),
threaded_update = true)
update_strategy = nothing)
Simple grid-based neighborhood search with uniform search radius.
The domain is divided into a regular grid.
Expand Down Expand Up @@ -36,10 +36,20 @@ since not sorting makes our implementation a lot faster (although less paralleli
[`PeriodicBox`](@ref).
- `cell_list`: The cell list that maps a cell index to a list of points inside
the cell. By default, a [`DictionaryCellList`](@ref) is used.
- `threaded_update = true`: Can be used to deactivate thread parallelization in the
neighborhood search update. This can be one of the largest
sources of variations between simulations with different
thread numbers due to neighbor ordering changes.
- `update_strategy = nothing`: Strategy to parallelize `update!`. Available options are:
- `nothing`: Automatically choose the best available option.
- `:parallel`: Fully parallel update by using atomic operations to avoid race
conditions when adding points into the same cell. This is not
available for all cell list implementations, but is the default when
available.
- `:semi-parallel`: Loop over all cells in parallel to mark cells with points that now
belong to a different cell. Then, move points of affected cells
serially. This is available for all cell list implementations and is
the default when `:parallel` is not available.
- `:serial`: Deactivate parallelization in the neighborhood search update.
Parallel update can be one of the largest sources of variations
between simulations with different thread numbers due to neighbor
ordering changes.
## References
- M. Chalela, E. Sillero, L. Pereyra, M.A. Garcia, J.B. Cabral, M. Lares, M. Merchán.
Expand All @@ -51,25 +61,44 @@ since not sorting makes our implementation a lot faster (although less paralleli
In: Computer Graphics Forum 30.1 (2011), pages 99–112.
[doi: 10.1111/J.1467-8659.2010.01832.X](https://doi.org/10.1111/J.1467-8659.2010.01832.X)
"""
struct GridNeighborhoodSearch{NDIMS, ELTYPE, CL, PB, UB} <: AbstractNeighborhoodSearch
cell_list :: CL
search_radius :: ELTYPE
periodic_box :: PB
n_cells :: NTuple{NDIMS, Int} # Required to calculate periodic cell index
cell_size :: NTuple{NDIMS, ELTYPE} # Required to calculate cell index
update_buffer :: UB # Multithreaded buffer for `update!`
threaded_update :: Bool
struct GridNeighborhoodSearch{NDIMS, update_strategy,
ELTYPE, CL, PB, UB} <: AbstractNeighborhoodSearch
# Store `update_strategy` as type parameter to dispatch `update!`
cell_list :: CL
search_radius :: ELTYPE
periodic_box :: PB
n_cells :: NTuple{NDIMS, Int} # Required to calculate periodic cell index
cell_size :: NTuple{NDIMS, ELTYPE} # Required to calculate cell index
update_buffer :: UB # Multithreaded buffer for `update!`

function GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
periodic_box = nothing,
cell_list = DictionaryCellList{NDIMS}(),
threaded_update = true) where {NDIMS}
update_strategy = nothing) where {NDIMS}
ELTYPE = typeof(search_radius)

# Create update buffer and initialize it with empty vectors
update_buffer = DynamicVectorOfVectors{index_type(cell_list)}(max_outer_length = Threads.nthreads(),
max_inner_length = n_points)
push!(update_buffer, (NTuple{NDIMS, Int}[] for _ in 1:Threads.nthreads())...)
if isnothing(update_strategy) && Threads.nthreads == 1
# Use serial update on one thread to avoid a second loop over all particles
# when `:parallel` is picked.
update_strategy = :serial
elseif isnothing(update_strategy)
# Automatically choose best available update option for this cell list
update_strategy = first(supported_update_strategies(cell_list))
elseif !(update_strategy in supported_update_strategies(cell_list))
throw(ArgumentError("$update_strategy is not a valid update strategy for " *
"this cell list. Available options are " *
"$(supported_update_strategies(cell_list))"))
end

if update_strategy in (:semi_parallel, :serial)
# Create update buffer and initialize it with empty vectors
update_buffer = DynamicVectorOfVectors{index_type(cell_list)}(max_outer_length = Threads.nthreads(),
max_inner_length = n_points)
push!(update_buffer, (NTuple{NDIMS, Int}[] for _ in 1:Threads.nthreads())...)
else
# No update buffer needed for fully parallel update
update_buffer = nothing
end

if search_radius < eps() || isnothing(periodic_box)
# No periodicity
Expand All @@ -91,13 +120,15 @@ struct GridNeighborhoodSearch{NDIMS, ELTYPE, CL, PB, UB} <: AbstractNeighborhood
end
end

new{NDIMS, ELTYPE, typeof(cell_list), typeof(periodic_box),
new{NDIMS, update_strategy, ELTYPE,
typeof(cell_list), typeof(periodic_box),
typeof(update_buffer)}(cell_list, search_radius, periodic_box, n_cells,
cell_size, update_buffer, threaded_update)
cell_size, update_buffer)
end
end

@inline Base.ndims(::GridNeighborhoodSearch{NDIMS}) where {NDIMS} = NDIMS
@inline update_strategy(::GridNeighborhoodSearch{<:Any, strategy}) where {strategy} = strategy

function initialize!(neighborhood_search::GridNeighborhoodSearch,
x::AbstractMatrix, y::AbstractMatrix)
Expand Down Expand Up @@ -137,19 +168,24 @@ function update_grid!(neighborhood_search::GridNeighborhoodSearch{NDIMS},
update_grid!(neighborhood_search, i -> extract_svector(y, Val(NDIMS), i))
end

# Modify the existing cell lists by moving points into their new cells
function update_grid!(neighborhood_search::GridNeighborhoodSearch, coords_fun)
(; cell_list, update_buffer, threaded_update) = neighborhood_search
# Serial and semi-parallel update
function update_grid!(neighborhood_search::Union{GridNeighborhoodSearch{<:Any, :serial},
GridNeighborhoodSearch{<:Any,
:semi_parallel}},
coords_fun::Function)
(; cell_list, update_buffer) = neighborhood_search

# Empty each thread's list
@threaded for i in eachindex(update_buffer)
emptyat!(update_buffer, i)
end

# Find all cells containing points that now belong to another cell
mark_changed_cell!(neighborhood_search, cell_list, coords_fun, Val(threaded_update))
# Find all cells containing points that now belong to another cell.
# This loop is threaded for `update_strategy == :semi_parallel`.
mark_changed_cells!(neighborhood_search, coords_fun)

# Iterate over all marked cells and move the points into their new cells
# Iterate over all marked cells and move the points into their new cells.
# This is always a serial loop (hence "semi-parallel").
for j in eachindex(update_buffer)
for cell_index in update_buffer[j]
points = cell_list[cell_index]
Expand Down Expand Up @@ -182,26 +218,24 @@ end
# The type annotation is to make Julia specialize on the type of the function.
# Otherwise, unspecialized code will cause a lot of allocations and heavily impact performance.
# See https://docs.julialang.org/en/v1/manual/performance-tips/#Be-aware-of-when-Julia-avoids-specializing
@inline function mark_changed_cell!(neighborhood_search, cell_list, coords_fun::T,
threaded_update::Val{true}) where {T}
@inline function mark_changed_cells!(neighborhood_search::GridNeighborhoodSearch{<:Any,
:semi_parallel},
coords_fun::T) where {T}
# `each_cell_index(cell_list)` might return a `KeySet`, which has to be `collect`ed
# first to be able to be used in a threaded loop. This function takes care of that.
@threaded for cell_index in each_cell_index_threadable(cell_list)
@threaded for cell_index in each_cell_index_threadable(neighborhood_search.cell_list)
mark_changed_cell!(neighborhood_search, cell_index, coords_fun)
end
end

@inline function mark_changed_cell!(neighborhood_search, cell_list, coords_fun::T,
threaded_update::Val{false}) where {T}
for cell_index in each_cell_index(cell_list)
@inline function mark_changed_cells!(neighborhood_search::GridNeighborhoodSearch{<:Any,
:serial},
coords_fun::T) where {T}
for cell_index in each_cell_index(neighborhood_search.cell_list)
mark_changed_cell!(neighborhood_search, cell_index, coords_fun)
end
end

# Use this function barrier and unpack inside to avoid passing closures to Polyester.jl
# with `@batch` (`@threaded`).
# Otherwise, `@threaded` does not work here with Julia ARM on macOS.
# See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
@inline function mark_changed_cell!(neighborhood_search, cell_index, coords_fun)
(; cell_list, update_buffer) = neighborhood_search

Expand All @@ -219,6 +253,50 @@ end
end
end

# Fully parallel update with atomic push
function update_grid!(neighborhood_search::GridNeighborhoodSearch{<:Any, :parallel},
coords_fun::Function)
(; cell_list) = neighborhood_search

# Note that we need two separate loops for adding and removing points.
# `push_cell_atomic!` only guarantees thread-safety when different threads push at the
# simultaneously, but it does not work when `deleteat_cell!` is called at the same time.

# Add points to new cells
@threaded for cell_index in each_cell_index_threadable(cell_list)
for point in cell_list[cell_index]
cell_coords_ = cell_coords(coords_fun(point), neighborhood_search)

if !is_correct_cell(cell_list, cell_coords_, cell_index)
new_cell_coords = cell_coords(coords_fun(point), neighborhood_search)

# Add point to new cell or create cell if it does not exist
push_cell_atomic!(cell_list, new_cell_coords, point)
end
end
end

# Remove points from old cells
@threaded for cell_index in each_cell_index_threadable(cell_list)
points = cell_list[cell_index]

# WARNING!!!
# The `DynamicVectorOfVectors` requires this loop to be **in descending order**.
# `deleteat_cell!(..., i)` will change the order of points that come after `i`.
for i in reverse(eachindex(points))
point = points[i]
cell_coords_ = cell_coords(coords_fun(point), neighborhood_search)

if !is_correct_cell(cell_list, cell_coords_, cell_index)
# Remove moved point from this cell
deleteat_cell!(cell_list, cell_index, i)
end
end
end

return neighborhood_search
end

# 1D
@inline function eachneighbor(coords, neighborhood_search::GridNeighborhoodSearch{1})
cell = cell_coords(coords, neighborhood_search)
Expand Down Expand Up @@ -295,10 +373,11 @@ end

function copy_neighborhood_search(nhs::GridNeighborhoodSearch, search_radius, n_points;
eachpoint = 1:n_points)
(; periodic_box, threaded_update) = nhs
(; periodic_box) = nhs

cell_list = copy_cell_list(nhs.cell_list, search_radius, periodic_box)

return GridNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points, periodic_box,
cell_list, threaded_update)
cell_list,
update_strategy = update_strategy(nhs))
end
18 changes: 8 additions & 10 deletions src/nhs_precomputed.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
@doc raw"""
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
periodic_box = nothing, threaded_update = true)
periodic_box = nothing, update_strategy = nothing)
Neighborhood search with precomputed neighbor lists. A list of all neighbors is computed
for each point during initialization and update.
Expand All @@ -20,10 +20,9 @@ initialization and update.
with [`copy_neighborhood_search`](@ref).
- `periodic_box = nothing`: In order to use a (rectangular) periodic domain, pass a
[`PeriodicBox`](@ref).
- `threaded_update = true`: Can be used to deactivate thread parallelization in the
neighborhood search update. This can be one of the largest
sources of variations between simulations with different
thread numbers due to neighbor ordering changes.
- `update_strategy`: Strategy to parallelize `update!` of the internally used
`GridNeighborhoodSearch`. See [`GridNeighborhoodSearch`](@ref)
for available options.
"""
struct PrecomputedNeighborhoodSearch{NDIMS, NHS, NL, PB}
neighborhood_search :: NHS
Expand All @@ -32,10 +31,9 @@ struct PrecomputedNeighborhoodSearch{NDIMS, NHS, NL, PB}

function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
periodic_box = nothing,
threaded_update = true) where {NDIMS}
update_strategy = nothing) where {NDIMS}
nhs = GridNeighborhoodSearch{NDIMS}(; search_radius, n_points,
periodic_box = periodic_box,
threaded_update = threaded_update)
periodic_box, update_strategy)

neighbor_lists = Vector{Vector{Int}}()

Expand Down Expand Up @@ -116,8 +114,8 @@ end

function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch,
search_radius, n_points; eachpoint = 1:n_points)
threaded_update = nhs.neighborhood_search.threaded_update
update_strategy_ = update_strategy(nhs.neighborhood_search)
return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points,
periodic_box = nhs.periodic_box,
threaded_update)
update_strategy = update_strategy_)
end
17 changes: 17 additions & 0 deletions src/vector_of_vectors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,23 @@ end
return vov
end

@inline function pushat_atomic!(vov::DynamicVectorOfVectors, i, value)
(; backend, lengths) = vov

@boundscheck checkbounds(vov, i)

# Increment the column length with an atomic add to avoid race conditions.
# Store the new value since it might be changed immediately afterwards by another
# thread.
new_length = Atomix.@atomic lengths[i] += 1

# We can write here without race conditions, since the atomic add guarantees
# that `new_length` is different for each thread.
backend[new_length, i] = value

return vov
end

# `deleteat!(vov[i], j)`
@inline function deleteatat!(vov::DynamicVectorOfVectors, i, j)
(; backend, lengths) = vov
Expand Down
Loading

0 comments on commit 1708a10

Please sign in to comment.