Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collider optimization #978

Merged
merged 13 commits into from
Oct 9, 2024
41 changes: 39 additions & 2 deletions include/manifold/vec_view.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#pragma once

#include <cstddef>
#include <limits>
#include <type_traits>
#include <vector>

#include "manifold/optional_assert.h"

Expand All @@ -31,8 +34,13 @@
using Iter = T *;
using IterC = const T *;

VecView() : ptr_(nullptr), size_(0) {}

VecView(T *ptr, size_t size) : ptr_(ptr), size_(size) {}

VecView(const std::vector<std::remove_cv_t<T>> &v)
: ptr_(v.data()), size_(v.size()) {}

VecView(const VecView &other) {
ptr_ = other.ptr_;
size_ = other.size_;
Expand Down Expand Up @@ -94,6 +102,37 @@

bool empty() const { return size_ == 0; }

VecView<T> view(size_t offset = 0,
size_t length = std::numeric_limits<size_t>::max()) {
if (length == std::numeric_limits<size_t>::max())
length = this->size_ - offset;
ASSERT(length >= 0, std::out_of_range("Vec::view out of range"));
ASSERT(offset + length <= this->size_ && offset >= 0,
std::out_of_range("Vec::view out of range"));
return VecView<T>(this->ptr_ + offset, length);
}

VecView<const T> cview(
size_t offset = 0,
size_t length = std::numeric_limits<size_t>::max()) const {
if (length == std::numeric_limits<size_t>::max())
length = this->size_ - offset;
ASSERT(length >= 0, std::out_of_range("Vec::cview out of range"));
ASSERT(offset + length <= this->size_ && offset >= 0,
std::out_of_range("Vec::cview out of range"));
return VecView<const T>(this->ptr_ + offset, length);
}

VecView<const T> view(

Check warning on line 126 in include/manifold/vec_view.h

View check run for this annotation

Codecov / codecov/patch

include/manifold/vec_view.h#L126

Added line #L126 was not covered by tests
size_t offset = 0,
size_t length = std::numeric_limits<size_t>::max()) const {
return cview(offset, length);
}

T *data() { return this->ptr_; }

const T *data() const { return this->ptr_; }

#ifdef MANIFOLD_DEBUG
void Dump() const {
std::cout << "Vec = " << std::endl;
Expand All @@ -107,8 +146,6 @@
protected:
T *ptr_ = nullptr;
size_t size_ = 0;

VecView() = default;
};

} // namespace manifold
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,5 @@ if(TRACY_ENABLE)
GIT_PROGRESS TRUE
)
FetchContent_MakeAvailable(tracy)
target_link_libraries(manifold INTERFACE TracyClient)
target_link_libraries(manifold PUBLIC TracyClient)
endif()
118 changes: 52 additions & 66 deletions src/collider.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include <intrin.h>
#endif

#ifdef MANIFOLD_PAR
#include <tbb/combinable.h>
#endif

namespace manifold {

namespace collider_internal {
Expand Down Expand Up @@ -155,18 +159,18 @@
};

template <typename T, const bool selfCollision, typename Recorder>
struct FindCollisions {
struct FindCollision {
VecView<const T> queries;
VecView<const Box> nodeBBox_;
VecView<const std::pair<int, int>> internalChildren_;
Recorder recorder;

int RecordCollision(int node, const int queryIdx) {
inline int RecordCollision(int node, const int queryIdx, SparseIndices& ind) {
bool overlaps = nodeBBox_[node].DoesOverlap(queries[queryIdx]);
if (overlaps && IsLeaf(node)) {
int leafIdx = Node2Leaf(node);
if (!selfCollision || leafIdx != queryIdx) {
recorder.record(queryIdx, leafIdx);
recorder.record(queryIdx, leafIdx, ind);
}
}
return overlaps && IsInternal(node); // Should traverse into node
Expand All @@ -179,15 +183,14 @@
int top = -1;
// Depth-first search
int node = kRoot;
// same implies that this query do not have any collision
if (recorder.earlyexit(queryIdx)) return;
SparseIndices& ind = recorder.local();
while (1) {
int internal = Node2Internal(node);
int child1 = internalChildren_[internal].first;
int child2 = internalChildren_[internal].second;

int traverse1 = RecordCollision(child1, queryIdx);
int traverse2 = RecordCollision(child2, queryIdx);
int traverse1 = RecordCollision(child1, queryIdx, ind);
int traverse2 = RecordCollision(child2, queryIdx, ind);

if (!traverse1 && !traverse2) {
if (top < 0) break; // done
Expand All @@ -199,48 +202,37 @@
}
}
}
recorder.end(queryIdx);
}
};

struct CountCollisions {
VecView<int> counts;
VecView<char> empty;
void record(int queryIdx, int _leafIdx) { counts[queryIdx]++; }
bool earlyexit(int _queryIdx) { return false; }
void end(int queryIdx) {
if (counts[queryIdx] == 0) empty[queryIdx] = 1;
}
};

template <const bool inverted>
struct SeqCollisionRecorder {
SparseIndices& queryTri_;
void record(int queryIdx, int leafIdx) const {
inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const {

Check warning on line 211 in src/collider.h

View check run for this annotation

Codecov / codecov/patch

src/collider.h#L211

Added line #L211 was not covered by tests
if (inverted)
queryTri_.Add(leafIdx, queryIdx);
ind.Add(leafIdx, queryIdx);
else
queryTri_.Add(queryIdx, leafIdx);
ind.Add(queryIdx, leafIdx);
}
bool earlyexit(int queryIdx) const { return false; }
void end(int queryIdx) const {}
SparseIndices& local() { return queryTri_; }
};

#ifdef MANIFOLD_PAR
template <const bool inverted>
struct ParCollisionRecorder {
SparseIndices& queryTri;
VecView<int> counts;
VecView<char> empty;
void record(int queryIdx, int leafIdx) {
int pos = counts[queryIdx]++;
tbb::combinable<SparseIndices>& store;
inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const {
// Add may invoke something in parallel, and it may return in
// another thread, making thread local unsafe
// we need to explicitly forbid parallelization by passing a flag
if (inverted)
queryTri.Set(pos, leafIdx, queryIdx);
ind.Add(leafIdx, queryIdx, true);
else
queryTri.Set(pos, queryIdx, leafIdx);
ind.Add(queryIdx, leafIdx, true);
}
bool earlyexit(int queryIdx) const { return empty[queryIdx] == 1; }
void end(int queryIdx) const {}
SparseIndices& local() { return store.local(); }
};
#endif

struct BuildInternalBoxes {
VecView<Box> nodeBBox_;
Expand Down Expand Up @@ -331,44 +323,38 @@

template <const bool selfCollision = false, const bool inverted = false,
typename T>
SparseIndices Collisions(const VecView<const T>& queriesIn) const {
void Collisions(const VecView<const T>& queriesIn,
SparseIndices& queryTri) const {
ZoneScoped;
using collider_internal::FindCollisions;
// note that the length is 1 larger than the number of queries so the last
// element can store the sum when using exclusive scan
if (queriesIn.size() < collider_internal::kSequentialThreshold) {
SparseIndices queryTri;
for_each_n(
ExecutionPolicy::Seq, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision,
collider_internal::SeqCollisionRecorder<inverted>>{
queriesIn, nodeBBox_, internalChildren_, {queryTri}});
return queryTri;
} else {
// compute the number of collisions to determine the size for allocation
// and offset, this avoids the need for atomic
Vec<int> counts(queriesIn.size() + 1, 0);
Vec<char> empty(queriesIn.size(), 0);
using collider_internal::FindCollision;
#ifdef MANIFOLD_PAR
if (queriesIn.size() > collider_internal::kSequentialThreshold) {
tbb::combinable<SparseIndices> store;
for_each_n(
ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision, collider_internal::CountCollisions>{
queriesIn, nodeBBox_, internalChildren_, {counts, empty}});
// compute start index for each query and total count
manifold::exclusive_scan(counts.begin(), counts.end(), counts.begin(), 0,
std::plus<int>());
if (counts.back() == 0) return SparseIndices(0);
SparseIndices queryTri(counts.back());
// actually recording collisions
for_each_n(
ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision,
collider_internal::ParCollisionRecorder<inverted>>{
queriesIn,
nodeBBox_,
internalChildren_,
{queryTri, counts, empty}});
return queryTri;
FindCollision<T, selfCollision,
collider_internal::ParCollisionRecorder<inverted>>{
queriesIn, nodeBBox_, internalChildren_, {store}});

std::vector<SparseIndices> tmp;
store.combine_each(
[&](SparseIndices& ind) { tmp.emplace_back(std::move(ind)); });
queryTri.FromIndices(tmp);
return;
}
#endif
for_each_n(ExecutionPolicy::Seq, countAt(0), queriesIn.size(),
FindCollision<T, selfCollision,
collider_internal::SeqCollisionRecorder<inverted>>{
queriesIn, nodeBBox_, internalChildren_, {queryTri}});
}

template <const bool selfCollision = false, const bool inverted = false,
typename T>
SparseIndices Collisions(const VecView<const T>& queriesIn) const {
SparseIndices result;
Collisions<selfCollision, inverted, T>(queriesIn, result);
return result;
}

static uint32_t MortonCode(vec3 position, Box bBox) {
Expand Down
19 changes: 11 additions & 8 deletions src/polygon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,8 @@ class EarClip {
// values < -precision so they will never affect validity. The first
// totalCost is designed to give priority to sharper angles. Any cost < (-1
// - precision) has satisfied the Delaunay condition.
double EarCost(double precision, const IdxCollider &collider) const {
double EarCost(double precision, const IdxCollider &collider,
SparseIndices &toTest) const {
vec2 openSide = left->pos - right->pos;
const vec2 center = 0.5 * (left->pos + right->pos);
const double scale = 4 / glm::dot(openSide, openSide);
Expand All @@ -506,7 +507,7 @@ class EarClip {
earBox.push_back({vec3(center.x - radius, center.y - radius, 0),
vec3(center.x + radius, center.y + radius, 0)});
earBox.back().Union(vec3(pos, 0));
const SparseIndices toTest = collider.collider.Collisions(earBox.cview());
collider.collider.Collisions(earBox.cview(), toTest);

const int lid = left->mesh_idx;
const int rid = right->mesh_idx;
Expand All @@ -522,7 +523,7 @@ class EarClip {
totalCost = std::max(totalCost, cost);
}
}

toTest.Clear();
return totalCost;
}

Expand Down Expand Up @@ -799,7 +800,8 @@ class EarClip {

// Recalculate the cost of the Vert v ear, updating it in the queue by
// removing and reinserting it.
void ProcessEar(VertItr v, const IdxCollider &collider) {
void ProcessEar(VertItr v, const IdxCollider &collider,
SparseIndices &buffer) {
if (v->ear != earsQueue_.end()) {
earsQueue_.erase(v->ear);
v->ear = earsQueue_.end();
Expand All @@ -808,7 +810,7 @@ class EarClip {
v->cost = kBest;
v->ear = earsQueue_.insert(v);
} else if (v->IsConvex(2 * precision_)) {
v->cost = v->EarCost(precision_, collider);
v->cost = v->EarCost(precision_, collider, buffer);
v->ear = earsQueue_.insert(v);
} else {
v->cost = 1; // not used, but marks reflex verts for debug
Expand Down Expand Up @@ -866,8 +868,9 @@ class EarClip {
int numTri = -2;
earsQueue_.clear();

SparseIndices buffer;
auto QueueVert = [&](VertItr v) {
ProcessEar(v, vertCollider);
ProcessEar(v, vertCollider, buffer);
++numTri;
v->PrintVert();
};
Expand All @@ -890,8 +893,8 @@ class EarClip {
ClipEar(v);
--numTri;

ProcessEar(v->left, vertCollider);
ProcessEar(v->right, vertCollider);
ProcessEar(v->left, vertCollider, buffer);
ProcessEar(v->right, vertCollider, buffer);
// This is a backup vert that is used if the queue is empty (geometrically
// invalid polygon), to ensure manifoldness.
v = v->right;
Expand Down
20 changes: 18 additions & 2 deletions src/sparse.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,22 @@
SparseIndices() = default;
SparseIndices(size_t size) { data_ = Vec<char>(size * sizeof(int64_t)); }

void Clear() { data_.clear(false); }

Check warning on line 63 in src/sparse.h

View check run for this annotation

Codecov / codecov/patch

src/sparse.h#L63

Added line #L63 was not covered by tests

void FromIndices(const std::vector<SparseIndices>& indices) {

Check warning on line 65 in src/sparse.h

View check run for this annotation

Codecov / codecov/patch

src/sparse.h#L65

Added line #L65 was not covered by tests
std::vector<size_t> sizes;
size_t total_size = 0;

Check warning on line 67 in src/sparse.h

View check run for this annotation

Codecov / codecov/patch

src/sparse.h#L67

Added line #L67 was not covered by tests
for (const auto& ind : indices) {
sizes.push_back(total_size);
total_size += ind.data_.size();

Check warning on line 70 in src/sparse.h

View check run for this annotation

Codecov / codecov/patch

src/sparse.h#L69-L70

Added lines #L69 - L70 were not covered by tests
}
data_ = Vec<char>(total_size);
for_each_n(ExecutionPolicy::Par, countAt(0), indices.size(), [&](size_t i) {
std::copy(indices[i].data_.begin(), indices[i].data_.end(),
data_.begin() + sizes[i]);
});

Check warning on line 76 in src/sparse.h

View check run for this annotation

Codecov / codecov/patch

src/sparse.h#L72-L76

Added lines #L72 - L76 were not covered by tests
}

size_t size() const { return data_.size() / sizeof(int64_t); }

Vec<int> Copy(bool use_q) const {
Expand Down Expand Up @@ -130,8 +146,8 @@
data_.size() / sizeof(int32_t));
}

inline void Add(int p, int q) {
for (unsigned int i = 0; i < sizeof(int64_t); ++i) data_.push_back(-1);
inline void Add(int p, int q, bool seq = false) {
data_.extend(sizeof(int64_t), seq);
Set(size() - 1, p, q);
}

Expand Down
Loading
Loading