Mesh: Parallelize remaining topology maps

Similar to [0], but also including the further optimizations from [1],
parallelize the vertex to edge, vertex to face, and edge to face
topology map index creation (the first part of the maps was already
parallelized). Vertex to face index creation went from 9.8 ms to 1.7 ms
on average, for a 1 million vertex grid. That's nice, since this map
is used for vertex normals after [2].

[0]: 226359ec48
[1]: 98e33adac2
[2]: 5052e0d407
This commit is contained in:
Hans Goudey 2023-11-21 18:22:47 -05:00
parent 75f24a5f14
commit bb80716244
1 changed files with 34 additions and 23 deletions

View File

@ -347,6 +347,25 @@ static Array<int> reverse_indices_in_groups(const Span<int> group_indices,
return results;
}
/* A version of #reverse_indices_in_groups that stores face indices instead of corner indices. */
static void reverse_group_indices_in_groups(const OffsetIndices<int> groups,
const Span<int> group_to_elem,
const OffsetIndices<int> offsets,
MutableSpan<int> results)
{
int *counts = MEM_cnew_array<int>(size_t(offsets.size()), __func__);
BLI_SCOPED_DEFER([&]() { MEM_freeN(counts); })
threading::parallel_for(groups.index_range(), 1024, [&](const IndexRange range) {
for (const int64_t face : range) {
for (const int elem : group_to_elem.slice(groups[face])) {
const int index_in_group = atomic_fetch_and_add_int32(&counts[elem], 1);
results[offsets[elem][index_in_group]] = int(face);
}
}
});
sort_small_groups(offsets, 1024, results);
}
static GroupedSpan<int> gather_groups(const Span<int> group_indices,
const int groups_num,
Array<int> &r_offsets,
@ -370,16 +389,21 @@ GroupedSpan<int> build_vert_to_edge_map(const Span<int2> edges,
Array<int> &r_indices)
{
r_offsets = create_reverse_offsets(edges.cast<int>(), verts_num);
r_indices.reinitialize(r_offsets.last());
Array<int> counts(verts_num, 0);
const OffsetIndices<int> offsets(r_offsets);
r_indices.reinitialize(offsets.total_size());
for (const int64_t edge_i : edges.index_range()) {
for (const int vert : {edges[edge_i][0], edges[edge_i][1]}) {
r_indices[r_offsets[vert] + counts[vert]] = int(edge_i);
counts[vert]++;
/* Version of #reverse_indices_in_groups that accounts for storing two indices for each edge. */
int *counts = MEM_cnew_array<int>(size_t(offsets.size()), __func__);
BLI_SCOPED_DEFER([&]() { MEM_freeN(counts); })
threading::parallel_for(edges.index_range(), 1024, [&](const IndexRange range) {
for (const int64_t edge : range) {
for (const int vert : {edges[edge][0], edges[edge][1]}) {
const int index_in_group = atomic_fetch_and_add_int32(&counts[vert], 1);
r_indices[offsets[vert][index_in_group]] = int(edge);
}
}
}
return {OffsetIndices<int>(r_offsets), r_indices};
});
return {offsets, r_indices};
}
void build_vert_to_face_indices(const OffsetIndices<int> faces,
@ -387,13 +411,7 @@ void build_vert_to_face_indices(const OffsetIndices<int> faces,
const OffsetIndices<int> offsets,
MutableSpan<int> r_indices)
{
Array<int> counts(offsets.size(), 0);
for (const int64_t face_i : faces.index_range()) {
for (const int vert : corner_verts.slice(faces[face_i])) {
r_indices[offsets[vert].start() + counts[vert]] = int(face_i);
counts[vert]++;
}
}
reverse_group_indices_in_groups(faces, corner_verts, offsets, r_indices);
}
GroupedSpan<int> build_vert_to_face_map(const OffsetIndices<int> faces,
@ -438,14 +456,7 @@ GroupedSpan<int> build_edge_to_face_map(const OffsetIndices<int> faces,
{
r_offsets = create_reverse_offsets(corner_edges, edges_num);
r_indices.reinitialize(r_offsets.last());
Array<int> counts(edges_num, 0);
for (const int64_t face_i : faces.index_range()) {
for (const int edge : corner_edges.slice(faces[face_i])) {
r_indices[r_offsets[edge] + counts[edge]] = int(face_i);
counts[edge]++;
}
}
reverse_group_indices_in_groups(faces, corner_edges, OffsetIndices<int>(r_offsets), r_indices);
return {OffsetIndices<int>(r_offsets), r_indices};
}