Various Exact Boolean parallelizations and optimizations.

From patch D11780 from Erik Abrahamsson.
It parallelizes making the vertices, destruction of map entries,
finding if the result is PWN, finding triangle adjacencies,
and finding the ambient cell.
The latter needs a parallel_reduce from tbb, so added one into
BLI_task.hh so that if WITH_TBB is false, the code will still work.

On Erik's 6-core machine, the elapsed time went from 17.5s to 11.8s
(33% faster) on an intersection of two spheres with 3.1M faces.
On Howard's 24-core machine, the elapsed time went from 18.7s to 10.8s
for the same test.
This commit is contained in:
Erik Abrahamsson 2021-07-05 18:09:36 -04:00 committed by Howard Trickey
parent cf17f7e0cc
commit ceff86aafe
5 changed files with 286 additions and 97 deletions

View File

@ -38,6 +38,7 @@
#include "BLI_mesh_boolean.hh" #include "BLI_mesh_boolean.hh"
#include "BLI_mesh_intersect.hh" #include "BLI_mesh_intersect.hh"
#include "BLI_span.hh" #include "BLI_span.hh"
#include "BLI_task.hh"
namespace blender::meshintersect { namespace blender::meshintersect {
@ -309,22 +310,38 @@ static IMesh meshes_to_imesh(Span<const Mesh *> meshes,
clean_obmat(*obmats[mi]); clean_obmat(*obmats[mi]);
r_info->to_target_transform[mi] = inv_target_mat * objn_mat; r_info->to_target_transform[mi] = inv_target_mat * objn_mat;
/* Skip the matrix multiplication for each point when there is no transform for a mesh, Vector<Vert *> verts(me->totvert);
* for example when the first mesh is already in the target space. (Note the logic directly Span<MVert> mverts = Span(me->mvert, me->totvert);
* above, which uses an identity matrix with a null input transform). */
/* Allocate verts
* Skip the matrix multiplication for each point when there is no transform for a mesh,
* for example when the first mesh is already in the target space. (Note the logic
* directly above, which uses an identity matrix with a null input transform). */
if (obmats[mi] == nullptr) { if (obmats[mi] == nullptr) {
for (const MVert &vert : Span(me->mvert, me->totvert)) { threading::parallel_for(mverts.index_range(), 2048, [&](IndexRange range) {
const float3 co = float3(vert.co); float3 co;
r_info->mesh_to_imesh_vert[v] = arena.add_or_find_vert(mpq3(co.x, co.y, co.z), v); for (int i : range) {
++v; co = float3(mverts[i].co);
} mpq3 mco = mpq3(co.x, co.y, co.z);
double3 dco(mco[0].get_d(), mco[1].get_d(), mco[2].get_d());
verts[i] = new Vert(mco, dco, NO_INDEX, i);
}
});
} }
else { else {
for (const MVert &vert : Span(me->mvert, me->totvert)) { threading::parallel_for(mverts.index_range(), 2048, [&](IndexRange range) {
const float3 co = r_info->to_target_transform[mi] * float3(vert.co); float3 co;
r_info->mesh_to_imesh_vert[v] = arena.add_or_find_vert(mpq3(co.x, co.y, co.z), v); for (int i : range) {
++v; co = r_info->to_target_transform[mi] * float3(mverts[i].co);
} mpq3 mco = mpq3(co.x, co.y, co.z);
double3 dco(mco[0].get_d(), mco[1].get_d(), mco[2].get_d());
verts[i] = new Vert(mco, dco, NO_INDEX, i);
}
});
}
for (int i : mverts.index_range()) {
r_info->mesh_to_imesh_vert[v] = arena.add_or_find_vert(verts[i]);
++v;
} }
for (const MPoly &poly : Span(me->mpoly, me->totpoly)) { for (const MPoly &poly : Span(me->mpoly, me->totpoly)) {

View File

@ -225,6 +225,7 @@ class IMeshArena : NonCopyable, NonMovable {
*/ */
const Vert *add_or_find_vert(const mpq3 &co, int orig); const Vert *add_or_find_vert(const mpq3 &co, int orig);
const Vert *add_or_find_vert(const double3 &co, int orig); const Vert *add_or_find_vert(const double3 &co, int orig);
const Vert *add_or_find_vert(Vert *vert);
Face *add_face(Span<const Vert *> verts, Face *add_face(Span<const Vert *> verts,
int orig, int orig,

View File

@ -28,6 +28,7 @@
# define NOMINMAX # define NOMINMAX
# define TBB_MIN_MAX_CLEANUP # define TBB_MIN_MAX_CLEANUP
# endif # endif
# include "tbb/parallel_reduce.h"
# include <tbb/blocked_range.h> # include <tbb/blocked_range.h>
# include <tbb/parallel_for.h> # include <tbb/parallel_for.h>
# include <tbb/parallel_for_each.h> # include <tbb/parallel_for_each.h>
@ -76,6 +77,27 @@ void parallel_for(IndexRange range, int64_t grain_size, const Function &function
#endif #endif
} }
template<typename Value, typename Function, typename Reduction>
Value parallel_reduce(IndexRange range,
int64_t grain_size,
const Value &identity,
const Function &function,
const Reduction &reduction)
{
#ifdef WITH_TBB
return tbb::parallel_reduce(
tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
identity,
[&](const tbb::blocked_range<int64_t> &subrange, const Value &ident) {
return function(IndexRange(subrange.begin(), subrange.size()), ident);
},
reduction);
#else
UNUSED_VARS(grain_size, reduction);
return function(range, identity);
#endif
}
/** See #BLI_task_isolate for a description of what isolating a task means. */ /** See #BLI_task_isolate for a description of what isolating a task means. */
template<typename Function> void isolate_task(const Function &function) template<typename Function> void isolate_task(const Function &function)
{ {

View File

@ -21,6 +21,7 @@
#ifdef WITH_GMP #ifdef WITH_GMP
# include <algorithm> # include <algorithm>
# include <atomic>
# include <fstream> # include <fstream>
# include <iostream> # include <iostream>
@ -50,6 +51,7 @@
# include "BLI_mesh_boolean.hh" # include "BLI_mesh_boolean.hh"
# ifdef WITH_TBB # ifdef WITH_TBB
# include "tbb/parallel_reduce.h"
# include "tbb/spin_mutex.h" # include "tbb/spin_mutex.h"
# endif # endif
@ -201,9 +203,14 @@ TriMeshTopology::TriMeshTopology(const IMesh &tm)
BLI_assert(edges != nullptr); BLI_assert(edges != nullptr);
} }
edges->append_non_duplicates(e); edges->append_non_duplicates(e);
auto createf = [t](Vector<int> **pvec) { *pvec = new Vector<int>{t}; };
auto modifyf = [t](Vector<int> **pvec) { (*pvec)->append_non_duplicates(t); }; auto p = edge_tri_.lookup_ptr(Edge(v, vnext));
this->edge_tri_.add_or_modify(Edge(v, vnext), createf, modifyf); if (p == nullptr) {
edge_tri_.add_new(e, new Vector<int>{t});
}
else {
(*p)->append_non_duplicates(t);
}
} }
} }
/* Debugging. */ /* Debugging. */
@ -228,9 +235,18 @@ TriMeshTopology::TriMeshTopology(const IMesh &tm)
TriMeshTopology::~TriMeshTopology() TriMeshTopology::~TriMeshTopology()
{ {
for (const Vector<int> *vec : edge_tri_.values()) { Vector<Vector<int> *> values;
delete vec;
/* Deconstructing is faster in parallel, so it is worth building an array of things to delete. */
for (auto item : edge_tri_.values()) {
values.append(item);
} }
threading::parallel_for(values.index_range(), 256, [&](IndexRange range) {
for (int i : range) {
delete values[i];
}
});
} }
/** A Patch is a maximal set of triangles that share manifold edges only. */ /** A Patch is a maximal set of triangles that share manifold edges only. */
@ -719,6 +735,18 @@ static PatchesInfo find_patches(const IMesh &tm, const TriMeshTopology &tmtopo)
PatchesInfo pinfo(ntri); PatchesInfo pinfo(ntri);
/* Algorithm: Grow patches across manifold edges as long as there are unassigned triangles. */ /* Algorithm: Grow patches across manifold edges as long as there are unassigned triangles. */
Stack<int> cur_patch_grow; Stack<int> cur_patch_grow;
/* Create an Array containing indices of adjacent faces. */
Array<std::array<int, 3>> t_others(tm.face_size());
threading::parallel_for(tm.face_index_range(), 2048, [&](IndexRange range) {
for (int t : range) {
const Face &tri = *tm.face(t);
for (int i = 0; i < 3; ++i) {
Edge e(tri[i], tri[(i + 1) % 3]);
t_others[t][i] = tmtopo.other_tri_if_manifold(e, t);
}
}
});
for (int t : tm.face_index_range()) { for (int t : tm.face_index_range()) {
if (pinfo.tri_patch(t) == -1) { if (pinfo.tri_patch(t) == -1) {
cur_patch_grow.push(t); cur_patch_grow.push(t);
@ -739,7 +767,7 @@ static PatchesInfo find_patches(const IMesh &tm, const TriMeshTopology &tmtopo)
const Face &tri = *tm.face(tcand); const Face &tri = *tm.face(tcand);
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
Edge e(tri[i], tri[(i + 1) % 3]); Edge e(tri[i], tri[(i + 1) % 3]);
int t_other = tmtopo.other_tri_if_manifold(e, tcand); int t_other = t_others[tcand][i];
if (dbg_level > 1) { if (dbg_level > 1) {
std::cout << " edge " << e << " generates t_other=" << t_other << "\n"; std::cout << " edge " << e << " generates t_other=" << t_other << "\n";
} }
@ -953,12 +981,8 @@ static void sort_by_signed_triangle_index(Vector<int> &g,
* To accommodate this: * To accommodate this:
* If extra_tri is non-null, then an index of EXTRA_TRI_INDEX should use it for the triangle. * If extra_tri is non-null, then an index of EXTRA_TRI_INDEX should use it for the triangle.
*/ */
static Array<int> sort_tris_around_edge(const IMesh &tm, static Array<int> sort_tris_around_edge(
const TriMeshTopology &tmtopo, const IMesh &tm, const Edge e, const Span<int> tris, const int t0, const Face *extra_tri)
const Edge e,
const Span<int> tris,
const int t0,
const Face *extra_tri)
{ {
/* Divide and conquer, quick-sort-like sort. /* Divide and conquer, quick-sort-like sort.
* Pick a triangle t0, then partition into groups: * Pick a triangle t0, then partition into groups:
@ -1023,14 +1047,14 @@ static Array<int> sort_tris_around_edge(const IMesh &tm,
} }
} }
if (g3.size() > 1) { if (g3.size() > 1) {
Array<int> g3sorted = sort_tris_around_edge(tm, tmtopo, e, g3, t0, extra_tri); Array<int> g3sorted = sort_tris_around_edge(tm, e, g3, t0, extra_tri);
std::copy(g3sorted.begin(), g3sorted.end(), g3.begin()); std::copy(g3sorted.begin(), g3sorted.end(), g3.begin());
if (dbg_level > 1) { if (dbg_level > 1) {
std::cout << "g3 sorted: " << g3 << "\n"; std::cout << "g3 sorted: " << g3 << "\n";
} }
} }
if (g4.size() > 1) { if (g4.size() > 1) {
Array<int> g4sorted = sort_tris_around_edge(tm, tmtopo, e, g4, t0, extra_tri); Array<int> g4sorted = sort_tris_around_edge(tm, e, g4, t0, extra_tri);
std::copy(g4sorted.begin(), g4sorted.end(), g4.begin()); std::copy(g4sorted.begin(), g4sorted.end(), g4.begin());
if (dbg_level > 1) { if (dbg_level > 1) {
std::cout << "g4 sorted: " << g4 << "\n"; std::cout << "g4 sorted: " << g4 << "\n";
@ -1076,7 +1100,7 @@ static void find_cells_from_edge(const IMesh &tm,
const Vector<int> *edge_tris = tmtopo.edge_tris(e); const Vector<int> *edge_tris = tmtopo.edge_tris(e);
BLI_assert(edge_tris != nullptr); BLI_assert(edge_tris != nullptr);
Array<int> sorted_tris = sort_tris_around_edge( Array<int> sorted_tris = sort_tris_around_edge(
tm, tmtopo, e, Span<int>(*edge_tris), (*edge_tris)[0], nullptr); tm, e, Span<int>(*edge_tris), (*edge_tris)[0], nullptr);
int n_edge_tris = edge_tris->size(); int n_edge_tris = edge_tris->size();
Array<int> edge_patches(n_edge_tris); Array<int> edge_patches(n_edge_tris);
@ -1338,34 +1362,46 @@ static bool patch_cell_graph_ok(const CellsInfo &cinfo, const PatchesInfo &pinfo
static bool is_pwn(const IMesh &tm, const TriMeshTopology &tmtopo) static bool is_pwn(const IMesh &tm, const TriMeshTopology &tmtopo)
{ {
constexpr int dbg_level = 0; constexpr int dbg_level = 0;
std::atomic<bool> is_pwn = true;
Vector<std::pair<Edge, Vector<int> *>> tris;
for (auto item : tmtopo.edge_tri_map_items()) { for (auto item : tmtopo.edge_tri_map_items()) {
const Edge &edge = item.key; tris.append(std::pair<Edge, Vector<int> *>(item.key, item.value));
int tot_orient = 0; }
/* For each face t attached to edge, add +1 if the edge
* is positively in t, and -1 if negatively in t. */ threading::parallel_for(tris.index_range(), 2048, [&](IndexRange range) {
for (int t : *item.value) { for (int j : range) {
const Face &face = *tm.face(t); const Edge &edge = tris[j].first;
BLI_assert(face.size() == 3); int tot_orient = 0;
for (int i : face.index_range()) { /* For each face t attached to edge, add +1 if the edge
if (face[i] == edge.v0()) { * is positively in t, and -1 if negatively in t. */
if (face[(i + 1) % 3] == edge.v1()) { for (int t : *tris[j].second) {
++tot_orient; const Face &face = *tm.face(t);
} BLI_assert(face.size() == 3);
else { for (int i : face.index_range()) {
BLI_assert(face[(i + 3 - 1) % 3] == edge.v1()); if (face[i] == edge.v0()) {
--tot_orient; if (face[(i + 1) % 3] == edge.v1()) {
++tot_orient;
}
else {
BLI_assert(face[(i + 3 - 1) % 3] == edge.v1());
--tot_orient;
}
} }
} }
} }
} if (tot_orient != 0) {
if (tot_orient != 0) { if (dbg_level > 0) {
if (dbg_level > 0) { std::cout << "edge causing non-pwn: " << edge << "\n";
std::cout << "edge causing non-pwn: " << edge << "\n"; }
is_pwn = false;
# ifdef WITH_TBB
tbb::task::self().cancel_group_execution();
# endif
} }
return false;
} }
} });
return true; return is_pwn.load();
} }
/** /**
@ -1396,8 +1432,7 @@ static int find_cell_for_point_near_edge(mpq3 p,
Array<int> edge_tris(etris->size() + 1); Array<int> edge_tris(etris->size() + 1);
std::copy(etris->begin(), etris->end(), edge_tris.begin()); std::copy(etris->begin(), etris->end(), edge_tris.begin());
edge_tris[edge_tris.size() - 1] = EXTRA_TRI_INDEX; edge_tris[edge_tris.size() - 1] = EXTRA_TRI_INDEX;
Array<int> sorted_tris = sort_tris_around_edge( Array<int> sorted_tris = sort_tris_around_edge(tm, e, edge_tris, edge_tris[0], dummy_tri);
tm, tmtopo, e, edge_tris, edge_tris[0], dummy_tri);
if (dbg_level > 0) { if (dbg_level > 0) {
std::cout << "sorted tris = " << sorted_tris << "\n"; std::cout << "sorted tris = " << sorted_tris << "\n";
} }
@ -1425,6 +1460,11 @@ static int find_cell_for_point_near_edge(mpq3 p,
return c; return c;
} }
static const Vert *max_x_vert(const Vert *a, const Vert *b)
{
return (a->co_exact.x > b->co_exact.x) ? a : b;
}
/** /**
* Find the ambient cell -- that is, the cell that is outside * Find the ambient cell -- that is, the cell that is outside
* all other cells. * all other cells.
@ -1452,39 +1492,64 @@ static int find_ambient_cell(const IMesh &tm,
/* First find a vertex with the maximum x value. */ /* First find a vertex with the maximum x value. */
/* Prefer not to populate the verts in the #IMesh just for this. */ /* Prefer not to populate the verts in the #IMesh just for this. */
const Vert *v_extreme; const Vert *v_extreme;
mpq_class extreme_x;
if (component_patches == nullptr) { if (component_patches == nullptr) {
v_extreme = (*tm.face(0))[0]; v_extreme = threading::parallel_reduce(
extreme_x = v_extreme->co_exact.x; tm.face_index_range(),
for (const Face *f : tm.faces()) { 2048,
for (const Vert *v : *f) { (*tm.face(0))[0],
const mpq_class &x = v->co_exact.x; [&](IndexRange range, const Vert *init) {
if (x > extreme_x) { const Vert *ans = init;
v_extreme = v; for (int i : range) {
extreme_x = x; const Face *f = tm.face(i);
} for (const Vert *v : *f) {
} if (v->co_exact.x > ans->co_exact.x) {
} ans = v;
}
}
}
return ans;
},
max_x_vert);
} }
else { else {
if (dbg_level > 0) { if (dbg_level > 0) {
std::cout << "restrict to patches " << *component_patches << "\n"; std::cout << "restrict to patches " << *component_patches << "\n";
} }
int p0 = (*component_patches)[0]; int p0 = (*component_patches)[0];
v_extreme = (*tm.face(pinfo.patch(p0).tri(0)))[0]; v_extreme = threading::parallel_reduce(
extreme_x = v_extreme->co_exact.x; component_patches->index_range(),
for (int p : *component_patches) { 2048,
for (int t : pinfo.patch(p).tris()) { (*tm.face(pinfo.patch(p0).tri(0)))[0],
const Face *f = tm.face(t); [&](IndexRange range, const Vert *init) {
for (const Vert *v : *f) { const Vert *ans = init;
const mpq_class &x = v->co_exact.x; for (int pi : range) {
if (x > extreme_x) { int p = (*component_patches)[pi];
v_extreme = v; const Vert *tris_ans = threading::parallel_reduce(
extreme_x = x; IndexRange(pinfo.patch(p).tot_tri()),
2048,
init,
[&](IndexRange tris_range, const Vert *t_init) {
const Vert *v_ans = t_init;
for (int i : tris_range) {
int t = pinfo.patch(p).tri(i);
const Face *f = tm.face(t);
for (const Vert *v : *f) {
if (v->co_exact.x > v_ans->co_exact.x) {
v_ans = v;
}
}
}
return v_ans;
},
max_x_vert);
if (tris_ans->co_exact.x > ans->co_exact.x) {
ans = tris_ans;
}
} }
} return ans;
} },
} max_x_vert);
} }
if (dbg_level > 0) { if (dbg_level > 0) {
std::cout << "v_extreme = " << v_extreme << "\n"; std::cout << "v_extreme = " << v_extreme << "\n";
@ -1493,7 +1558,8 @@ static int find_ambient_cell(const IMesh &tm,
* when projected onto the XY plane. That edge is guaranteed to * when projected onto the XY plane. That edge is guaranteed to
* be on the convex hull of the mesh. */ * be on the convex hull of the mesh. */
const Vector<Edge> &edges = tmtopo.vert_edges(v_extreme); const Vector<Edge> &edges = tmtopo.vert_edges(v_extreme);
const mpq_class extreme_y = v_extreme->co_exact.y; const mpq_class &extreme_x = v_extreme->co_exact.x;
const mpq_class &extreme_y = v_extreme->co_exact.y;
Edge ehull; Edge ehull;
mpq_class max_abs_slope = -1; mpq_class max_abs_slope = -1;
for (Edge e : edges) { for (Edge e : edges) {
@ -1514,8 +1580,8 @@ static int find_ambient_cell(const IMesh &tm,
if (dbg_level > 0) { if (dbg_level > 0) {
std::cout << "ehull = " << ehull << " slope = " << max_abs_slope << "\n"; std::cout << "ehull = " << ehull << " slope = " << max_abs_slope << "\n";
} }
/* Sort triangles around ehull, including a dummy triangle that include a known point in ambient /* Sort triangles around ehull, including a dummy triangle that include a known point in
* cell. */ * ambient cell. */
mpq3 p_in_ambient = v_extreme->co_exact; mpq3 p_in_ambient = v_extreme->co_exact;
p_in_ambient.x += 1; p_in_ambient.x += 1;
int c_ambient = find_cell_for_point_near_edge(p_in_ambient, ehull, tm, tmtopo, pinfo, arena); int c_ambient = find_cell_for_point_near_edge(p_in_ambient, ehull, tm, tmtopo, pinfo, arena);
@ -2816,7 +2882,8 @@ static IMesh raycast_patches_boolean(const IMesh &tm,
} }
/** /**
* If \a tri1 and \a tri2 have a common edge (in opposite orientation), * If \a tri1 and \a tri2 have a common edge (in opposite orientation),
* return the indices into \a tri1 and \a tri2 where that common edge starts. Else return (-1,-1). * return the indices into \a tri1 and \a tri2 where that common edge starts. Else return
* (-1,-1).
*/ */
static std::pair<int, int> find_tris_common_edge(const Face &tri1, const Face &tri2) static std::pair<int, int> find_tris_common_edge(const Face &tri1, const Face &tri2)
{ {
@ -3378,8 +3445,8 @@ static void dissolve_verts(IMesh *imesh, const Array<bool> dissolve, IMeshArena
* will have an original edge that is NO_INDEX. * will have an original edge that is NO_INDEX.
* Not all triangulation edges can be removed: if they ended up non-trivially overlapping a real * Not all triangulation edges can be removed: if they ended up non-trivially overlapping a real
* input edge, then we need to keep it. Also, some are necessary to make the output satisfy * input edge, then we need to keep it. Also, some are necessary to make the output satisfy
* the "valid #BMesh" property: we can't produce output faces that have repeated vertices in them, * the "valid #BMesh" property: we can't produce output faces that have repeated vertices in
* or have several disconnected boundaries (e.g., faces with holes). * them, or have several disconnected boundaries (e.g., faces with holes).
*/ */
static IMesh polymesh_from_trimesh_with_dissolve(const IMesh &tm_out, static IMesh polymesh_from_trimesh_with_dissolve(const IMesh &tm_out,
const IMesh &imesh_in, const IMesh &imesh_in,

View File

@ -43,6 +43,7 @@
# include "BLI_set.hh" # include "BLI_set.hh"
# include "BLI_span.hh" # include "BLI_span.hh"
# include "BLI_task.h" # include "BLI_task.h"
# include "BLI_task.hh"
# include "BLI_threads.h" # include "BLI_threads.h"
# include "BLI_vector.hh" # include "BLI_vector.hh"
# include "BLI_vector_set.hh" # include "BLI_vector_set.hh"
@ -51,6 +52,10 @@
# include "BLI_mesh_intersect.hh" # include "BLI_mesh_intersect.hh"
# ifdef WITH_TBB
# include "tbb/parallel_sort.h"
# endif
// # define PERFDEBUG // # define PERFDEBUG
namespace blender::meshintersect { namespace blender::meshintersect {
@ -406,6 +411,11 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
return add_or_find_vert(mco, co, orig); return add_or_find_vert(mco, co, orig);
} }
const Vert *add_or_find_vert(Vert *vert)
{
return add_or_find_vert_(vert);
}
Face *add_face(Span<const Vert *> verts, int orig, Span<int> edge_origs, Span<bool> is_intersect) Face *add_face(Span<const Vert *> verts, int orig, Span<int> edge_origs, Span<bool> is_intersect)
{ {
Face *f = new Face(verts, next_face_id_++, orig, edge_origs, is_intersect); Face *f = new Face(verts, next_face_id_++, orig, edge_origs, is_intersect);
@ -486,10 +496,9 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
private: private:
const Vert *add_or_find_vert(const mpq3 &mco, const double3 &dco, int orig) const Vert *add_or_find_vert(const mpq3 &mco, const double3 &dco, int orig)
{ {
/* Don't allocate Vert yet, in case it is already there. */ Vert *vtry = new Vert(mco, dco, NO_INDEX, NO_INDEX);
Vert vtry(mco, dco, NO_INDEX, NO_INDEX);
const Vert *ans; const Vert *ans;
VSetKey vskey(&vtry); VSetKey vskey(vtry);
if (intersect_use_threading) { if (intersect_use_threading) {
# ifdef USE_SPINLOCK # ifdef USE_SPINLOCK
BLI_spin_lock(&lock_); BLI_spin_lock(&lock_);
@ -499,7 +508,9 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
} }
const VSetKey *lookup = vset_.lookup_key_ptr(vskey); const VSetKey *lookup = vset_.lookup_key_ptr(vskey);
if (!lookup) { if (!lookup) {
vskey.vert = new Vert(mco, dco, next_vert_id_++, orig); vtry->id = next_vert_id_++;
vtry->orig = orig;
vskey.vert = vtry; // new Vert(mco, dco, next_vert_id_++, orig);
vset_.add_new(vskey); vset_.add_new(vskey);
allocated_verts_.append(std::unique_ptr<Vert>(vskey.vert)); allocated_verts_.append(std::unique_ptr<Vert>(vskey.vert));
ans = vskey.vert; ans = vskey.vert;
@ -510,6 +521,45 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
* This is the intended semantics: if the Vert already * This is the intended semantics: if the Vert already
* exists then we are merging verts and using the first-seen * exists then we are merging verts and using the first-seen
* one as the canonical one. */ * one as the canonical one. */
delete vtry;
ans = lookup->vert;
}
if (intersect_use_threading) {
# ifdef USE_SPINLOCK
BLI_spin_unlock(&lock_);
# else
BLI_mutex_unlock(mutex_);
# endif
}
return ans;
};
const Vert *add_or_find_vert_(Vert *vtry)
{
const Vert *ans;
VSetKey vskey(vtry);
if (intersect_use_threading) {
# ifdef USE_SPINLOCK
BLI_spin_lock(&lock_);
# else
BLI_mutex_lock(mutex_);
# endif
}
const VSetKey *lookup = vset_.lookup_key_ptr(vskey);
if (!lookup) {
vtry->id = next_vert_id_++;
vskey.vert = vtry; // new Vert(mco, dco, next_vert_id_++, orig);
vset_.add_new(vskey);
allocated_verts_.append(std::unique_ptr<Vert>(vskey.vert));
ans = vskey.vert;
}
else {
/* It was a duplicate, so return the existing one.
* Note that the returned Vert may have a different orig.
* This is the intended semantics: if the Vert already
* exists then we are merging verts and using the first-seen
* one as the canonical one. */
delete vtry;
ans = lookup->vert; ans = lookup->vert;
} }
if (intersect_use_threading) { if (intersect_use_threading) {
@ -550,6 +600,11 @@ const Vert *IMeshArena::add_or_find_vert(const mpq3 &co, int orig)
return pimpl_->add_or_find_vert(co, orig); return pimpl_->add_or_find_vert(co, orig);
} }
const Vert *IMeshArena::add_or_find_vert(Vert *vert)
{
return pimpl_->add_or_find_vert(vert);
}
Face *IMeshArena::add_face(Span<const Vert *> verts, Face *IMeshArena::add_face(Span<const Vert *> verts,
int orig, int orig,
Span<int> edge_origs, Span<int> edge_origs,
@ -633,7 +688,11 @@ void IMesh::populate_vert(int max_verts)
* TODO: when all debugged, set fix_order = false. */ * TODO: when all debugged, set fix_order = false. */
const bool fix_order = true; const bool fix_order = true;
if (fix_order) { if (fix_order) {
# ifdef WITH_TBB
tbb::parallel_sort(vert_.begin(), vert_.end(), [](const Vert *a, const Vert *b) {
# else
std::sort(vert_.begin(), vert_.end(), [](const Vert *a, const Vert *b) { std::sort(vert_.begin(), vert_.end(), [](const Vert *a, const Vert *b) {
# endif
if (a->orig != NO_INDEX && b->orig != NO_INDEX) { if (a->orig != NO_INDEX && b->orig != NO_INDEX) {
return a->orig < b->orig; return a->orig < b->orig;
} }
@ -2187,6 +2246,14 @@ IMesh triangulate_polymesh(IMesh &imesh, IMeshArena *arena)
Vector<Face *> face_tris; Vector<Face *> face_tris;
constexpr int estimated_tris_per_face = 3; constexpr int estimated_tris_per_face = 3;
face_tris.reserve(estimated_tris_per_face * imesh.face_size()); face_tris.reserve(estimated_tris_per_face * imesh.face_size());
threading::parallel_for(imesh.face_index_range(), 2048, [&](IndexRange range) {
for (int i : range) {
Face *f = imesh.face(i);
if (!f->plane_populated() && f->size() >= 4) {
f->populate_plane(false);
}
}
});
for (Face *f : imesh.faces()) { for (Face *f : imesh.faces()) {
/* Tessellate face f, following plan similar to #BM_face_calc_tessellation. */ /* Tessellate face f, following plan similar to #BM_face_calc_tessellation. */
int flen = f->size(); int flen = f->size();
@ -2278,12 +2345,22 @@ class TriOverlaps {
if (two_trees_no_self) { if (two_trees_no_self) {
tree_b_ = BLI_bvhtree_new(tm.face_size(), FLT_EPSILON, 8, 6); tree_b_ = BLI_bvhtree_new(tm.face_size(), FLT_EPSILON, 8, 6);
} }
/* Create a Vector containing face shape. */
Vector<int> shapes;
shapes.resize(tm.face_size());
threading::parallel_for(tm.face_index_range(), 2048, [&](IndexRange range) {
for (int t : range) {
shapes[t] = shape_fn(tm.face(t)->orig);
}
});
float bbpts[6]; float bbpts[6];
for (int t : tm.face_index_range()) { for (int t : tm.face_index_range()) {
const BoundingBox &bb = tri_bb[t]; const BoundingBox &bb = tri_bb[t];
copy_v3_v3(bbpts, bb.min); copy_v3_v3(bbpts, bb.min);
copy_v3_v3(bbpts + 3, bb.max); copy_v3_v3(bbpts + 3, bb.max);
int shape = shape_fn(tm.face(t)->orig); int shape = shapes[t];
if (two_trees_no_self) { if (two_trees_no_self) {
if (shape == 0) { if (shape == 0) {
BLI_bvhtree_insert(tree_, t, bbpts, 2); BLI_bvhtree_insert(tree_, t, bbpts, 2);
@ -2575,11 +2652,13 @@ static void calc_subdivided_non_cluster_tris(Array<IMesh> &r_tri_subdivided,
0, overlap_tri_range_tot, &data, calc_subdivided_tri_range_func, &settings); 0, overlap_tri_range_tot, &data, calc_subdivided_tri_range_func, &settings);
/* Now have to put in the triangles that are the same as the input ones, and not in clusters. /* Now have to put in the triangles that are the same as the input ones, and not in clusters.
*/ */
for (int t : tm.face_index_range()) { threading::parallel_for(tm.face_index_range(), 2048, [&](IndexRange range) {
if (r_tri_subdivided[t].face_size() == 0 && clinfo.tri_cluster(t) == NO_INDEX) { for (int t : range) {
r_tri_subdivided[t] = IMesh({tm.face(t)}); if (r_tri_subdivided[t].face_size() == 0 && clinfo.tri_cluster(t) == NO_INDEX) {
r_tri_subdivided[t] = IMesh({tm.face(t)});
}
} }
} });
} }
/** /**
@ -2936,11 +3015,15 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
double overlap_time = PIL_check_seconds_timer(); double overlap_time = PIL_check_seconds_timer();
std::cout << "intersect overlaps calculated, time = " << overlap_time - bb_calc_time << "\n"; std::cout << "intersect overlaps calculated, time = " << overlap_time - bb_calc_time << "\n";
# endif # endif
for (int t : tm_clean->face_index_range()) { Array<IMesh> tri_subdivided(tm_clean->face_size(), NoInitialization());
if (tri_ov.first_overlap_index(t) != -1) { threading::parallel_for(tm_clean->face_index_range(), 1024, [&](IndexRange range) {
tm_clean->face(t)->populate_plane(true); for (int t : range) {
if (tri_ov.first_overlap_index(t) != -1) {
tm_clean->face(t)->populate_plane(true);
}
new (static_cast<void *>(&tri_subdivided[t])) IMesh;
} }
} });
# ifdef PERFDEBUG # ifdef PERFDEBUG
double plane_populate = PIL_check_seconds_timer(); double plane_populate = PIL_check_seconds_timer();
std::cout << "planes populated, time = " << plane_populate - overlap_time << "\n"; std::cout << "planes populated, time = " << plane_populate - overlap_time << "\n";
@ -2965,7 +3048,6 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
doperfmax(1, clinfo.tot_cluster()); doperfmax(1, clinfo.tot_cluster());
doperfmax(2, tri_ov.overlap().size()); doperfmax(2, tri_ov.overlap().size());
# endif # endif
Array<IMesh> tri_subdivided(tm_clean->face_size());
calc_subdivided_non_cluster_tris(tri_subdivided, *tm_clean, itt_map, clinfo, tri_ov, arena); calc_subdivided_non_cluster_tris(tri_subdivided, *tm_clean, itt_map, clinfo, tri_ov, arena);
# ifdef PERFDEBUG # ifdef PERFDEBUG
double subdivided_tris_time = PIL_check_seconds_timer(); double subdivided_tris_time = PIL_check_seconds_timer();