tornavis/source/blender/blenlib/BLI_index_mask.hh

908 lines
32 KiB
C++

/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include <array>
#include <optional>
#include <variant>
#include "BLI_bit_span.hh"
#include "BLI_function_ref.hh"
#include "BLI_linear_allocator.hh"
#include "BLI_offset_span.hh"
#include "BLI_task.hh"
#include "BLI_unique_sorted_indices.hh"
#include "BLI_vector.hh"
namespace blender {
template<typename T> class VArray;
}
namespace blender::index_mask {
/**
* Constants that define the maximum segment size. Segment sizes are limited so that the indices
* within each segment can be stored as #int16_t, which allows the mask to stored much more
* compactly than if 32 or 64 bit ints would be used.
* - Using 8 bit ints does not work well, because then the maximum segment size would be too small
* for eliminate per-segment overhead in many cases and also leads to many more segments.
* - The most-significant-bit is not used so that signed integers can be used which avoids common
* issues when mixing signed and unsigned ints.
* - The second most-significant bit is not used for indices so that #max_segment_size itself can
* be stored in the #int16_t.
* - The maximum number of indices in a segment is 16384, which is generally enough to make the
* overhead per segment negligible when processing large index masks.
* - A power of two is used for #max_segment_size, because that allows for faster construction of
* index masks for index ranges.
*/
static constexpr int64_t max_segment_size_shift = 14;
static constexpr int64_t max_segment_size = (1 << max_segment_size_shift); /* 16384 */
static constexpr int64_t max_segment_size_mask_low = max_segment_size - 1;
static constexpr int64_t max_segment_size_mask_high = ~max_segment_size_mask_low;
/**
* Encodes a position in an #IndexMask. The term "raw" just means that this does not have the usual
* iterator methods like `operator++`. Supporting those would require storing more data. Generally,
* the fastest way to iterate over an #IndexMask is using a `foreach_*` method anyway.
*/
struct RawMaskIterator {
/** Index of the segment in the index mask. */
int64_t segment_i;
/** Element within the segment. */
int16_t index_in_segment;
};
/**
* Base type of #IndexMask. This only exists to make it more convenient to construct an index mask
* in a few functions with #IndexMask::data_for_inplace_construction.
*
* The names intentionally have a trailing underscore here even though they are public in
* #IndexMaskData because they are private in #IndexMask.
*/
struct IndexMaskData {
/**
* Size of the index mask, i.e. the number of indices.
*/
int64_t indices_num_;
/**
* Number of segments in the index mask. Each segment contains at least one of the indices.
*/
int64_t segments_num_;
/**
* Pointer to the index array for every segment. The size of each array can be computed from
* #cumulative_segment_sizes_.
*/
const int16_t **indices_by_segment_;
/**
* Offset that is applied to the indices in each segment.
*/
const int64_t *segment_offsets_;
/**
* Encodes the size of each segment. The size of a specific segment can be computed by
* subtracting consecutive values (also see #OffsetIndices). The size of this array is one
* larger than #segments_num_. Note that the first value is _not_ necessarily zero when an
* index mask is a slice of another mask.
*/
const int64_t *cumulative_segment_sizes_;
/**
* Index into the first segment where the #IndexMask starts. This exists to support slicing
* without having to modify and therefor allocate a new #indices_by_segment_ array.
*/
int64_t begin_index_in_segment_;
/**
* Index into the last segment where the #IndexMask ends. This exists to support slicing without
* having to modify and therefore allocate a new #cumulative_segment_sizes_ array.
*/
int64_t end_index_in_segment_;
};
/**
* #IndexMask does not own any memory itself. In many cases the memory referenced by a mask has
* static life-time (e.g. when a mask is a range). To create more complex masks, additional memory
* is necessary. #IndexMaskMemory is a simple wrapper around a linear allocator that has to be
* passed to functions that might need to allocate extra memory.
*/
class IndexMaskMemory : public LinearAllocator<> {
private:
/** Inline buffer to avoid heap allocations when working with small index masks. */
AlignedBuffer<1024, 8> inline_buffer_;
public:
IndexMaskMemory()
{
this->provide_buffer(inline_buffer_);
}
};
using IndexMaskSegment = OffsetSpan<int64_t, int16_t>;
/**
* An #IndexMask is a sequence of unique and sorted indices (`BLI_unique_sorted_indices.hh`).
* It's commonly used when a subset of elements in an array has to be processed.
*
* #IndexMask is a non-owning container. That data it references is usually either statically
* allocated or is owned by an #IndexMaskMemory.
*
* Internally, an index mask is split into an arbitrary number of ordered segments. Each segment
* contains up to #max_segment_size (2^14 = 16384) indices. The indices in a segment are stored as
* `int16_t`, but each segment also has a `int64_t` offset.
*
* The data structure is designed to satisfy the following key requirements:
* - Construct index mask for an #IndexRange in O(1) time (after initial setup).
* - Support efficient slicing (O(log n) with a low constant factor).
* - Support multi-threaded construction without severe serial bottlenecks.
* - Support efficient iteration over indices that uses #IndexRange when possible.
*
* Construction:
* A new index mask is usually created by calling one of its constructors which are O(1), or for
* more complex masks, by calling various `IndexMask::from_*` functions that create masks from
* various sources. Those generally need additional memory which is provided with by an
* #IndexMaskMemory.
*
* Some of the `IndexMask::from_*` functions are have an `IndexMask universe` input. When
* provided, the function will only consider the indices in the "universe". The term comes from
* mathematics: https://en.wikipedia.org/wiki/Universe_(mathematics).
*
* Iteration:
* To iterate over the indices, one usually has to use one of the `foreach_*` functions which
* require a callback function. Due to the internal segmentation of the index mask, this is more
* efficient than using a normal C++ iterator and range-based for loops.
*
* There are multiple variants of the `foreach_*` functions which are useful in different
* scenarios. The callback can generally take one or two arguments. The first is the index
* stored in the mask and the second is the index that would have to be passed into `operator[]`
* to get the first index.
*
* The `foreach_*` methods also accept an optional `GrainSize` argument. When that is provided,
* multi-threading is used when appropriate. Integrating multi-threading at this level works well
* because mask iteration and parallelism are often used at the same time.
*
* Extraction:
* An #IndexMask can be converted into various other forms using the `to_*` methods.
*
*/
class IndexMask : private IndexMaskData {
public:
/** Construct an empty mask. */
IndexMask();
/** Construct a mask that contains the indices from 0 to `size - 1`. This takes O(1) time. */
explicit IndexMask(int64_t size);
/** Construct a mask that contains the indices in the range. This takes O(1) time. */
IndexMask(IndexRange range);
/** Construct a mask from unique sorted indices. */
template<typename T> static IndexMask from_indices(Span<T> indices, IndexMaskMemory &memory);
/** Construct a mask from the indices of set bits. */
static IndexMask from_bits(BitSpan bits, IndexMaskMemory &memory);
/** Construct a mask from the indices of set bits, but limited to the indices in #universe. */
static IndexMask from_bits(const IndexMask &universe, BitSpan bits, IndexMaskMemory &memory);
/** Construct a mask from the true indices. */
static IndexMask from_bools(Span<bool> bools, IndexMaskMemory &memory);
static IndexMask from_bools(const VArray<bool> &bools, IndexMaskMemory &memory);
/** Construct a mask from the true indices, but limited by the indices in #universe. */
static IndexMask from_bools(const IndexMask &universe,
Span<bool> bools,
IndexMaskMemory &memory);
static IndexMask from_bools(const IndexMask &universe,
const VArray<bool> &bools,
IndexMaskMemory &memory);
/** Construct a mask from the union of two other masks. */
static IndexMask from_union(const IndexMask &mask_a,
const IndexMask &mask_b,
IndexMaskMemory &memory);
/** Construct a mask from all the indices for which the predicate is true. */
template<typename Fn>
static IndexMask from_predicate(const IndexMask &universe,
GrainSize grain_size,
IndexMaskMemory &memory,
Fn &&predicate);
/** Sorts all indices from #universe into the different output masks. */
template<typename T, typename Fn>
static void from_groups(const IndexMask &universe,
IndexMaskMemory &memory,
Fn &&get_group_index,
MutableSpan<IndexMask> r_masks);
int64_t size() const;
bool is_empty() const;
IndexRange index_range() const;
int64_t first() const;
int64_t last() const;
/**
* \return Minimum number of elements an array has to have so that it can be indexed by every
* index stored in the mask.
*/
int64_t min_array_size() const;
/**
* \return Position where the #query_index is stored, or none if the index is not in the mask.
*/
std::optional<RawMaskIterator> find(int64_t query_index) const;
/**
* \return True when the #query_index is stored in the mask.
*/
bool contains(int64_t query_index) const;
/** \return The iterator for the given index such that `mask[iterator] == mask[index]`. */
RawMaskIterator index_to_iterator(int64_t index) const;
/** \return The index for the given iterator such that `mask[iterator] == mask[index]`. */
int64_t iterator_to_index(const RawMaskIterator &it) const;
/**
* Get the index at the given position. Prefer `foreach_*` methods for better performance. This
* takes O(log n) time.
*/
int64_t operator[](int64_t i) const;
/**
* Same as above but takes O(1) time. It's still preferable to use `foreach_*` methods for
* iteration.
*/
int64_t operator[](const RawMaskIterator &it) const;
/**
* Get a new mask that contains a consecutive subset of this mask. Takes O(log n) time and but
* can reuse the memory from the source mask.
*/
IndexMask slice(IndexRange range) const;
IndexMask slice(int64_t start, int64_t size) const;
/**
* Same as above but can also add an offset to every index in the mask.
* Takes O(log n + range.size()) time but with a very small constant factor.
*/
IndexMask slice_and_offset(IndexRange range, int64_t offset, IndexMaskMemory &memory) const;
IndexMask slice_and_offset(int64_t start,
int64_t size,
int64_t offset,
IndexMaskMemory &memory) const;
/**
* \return A new index mask that contains all the indices from the universe that are not in the
* current mask.
*/
IndexMask complement(IndexRange universe, IndexMaskMemory &memory) const;
/**
* \return Number of segments in the mask.
*/
int64_t segments_num() const;
/**
* \return Indices stored in the n-th segment.
*/
IndexMaskSegment segment(int64_t segment_i) const;
/**
* Calls the function once for every index.
*
* Supported function signatures:
* - `(int64_t i)`
* - `(int64_t i, int64_t pos)`
*
* `i` is the index that should be processed and `pos` is the position of that index in the mask:
* `i == mask[pos]`
*/
template<typename Fn> void foreach_index(Fn &&fn) const;
template<typename Fn> void foreach_index(GrainSize grain_size, Fn &&fn) const;
/**
* Same as #foreach_index, but generates more code, increasing compile time and binary size. This
* is because separate loops are generated for segments that are ranges and those that are not.
* Only use this when very little processing is done for each index.
*/
template<typename IndexT, typename Fn> void foreach_index_optimized(Fn &&fn) const;
template<typename IndexT, typename Fn>
void foreach_index_optimized(GrainSize grain_size, Fn &&fn) const;
/**
* Calls the function once for every segment. This should be used instead of #foreach_index if
* the algorithm can be implemented more efficiently by processing multiple indices at once.
*
* Supported function signatures:
* - `(IndexMaskSegment segment)`
* - `(IndexMaskSegment segment, int64_t segment_pos)`
*
* The `segment_pos` is the position in the mask where the segment starts:
* `segment[0] == mask[segment_pos]`
*/
template<typename Fn> void foreach_segment(Fn &&fn) const;
template<typename Fn> void foreach_segment(GrainSize grain_size, Fn &&fn) const;
/**
* This is similar to #foreach_segment but supports slightly different function signatures:
* - `(auto segment)`
* - `(auto segment, int64_t segment_pos)`
*
* The `segment` input is either of type `IndexMaskSegment` or `IndexRange`, so the function has
* to support both cases. This also means that more code is generated by the compiler because the
* function is instantiated twice. Only use this when very little processing happens per index.
*/
template<typename Fn> void foreach_segment_optimized(Fn &&fn) const;
template<typename Fn> void foreach_segment_optimized(GrainSize grain_size, Fn &&fn) const;
/**
* Calls the function once for every range. Note that this might call the function for each index
* separately in the worst case if there are no consecutive indices.
*
* Support function signatures:
* - `(IndexRange segment)`
* - `(IndexRange segment, int64_t segment_pos)`
*/
template<typename Fn> void foreach_range(Fn &&fn) const;
/**
* Fill the provided span with the indices in the mask. The span is expected to have the same
* size as the mask.
*/
template<typename T> void to_indices(MutableSpan<T> r_indices) const;
/**
* Set the bits at indices in the mask to 1 and all other bits to 0.
*/
void to_bits(MutableBitSpan r_bits) const;
/**
* Set the bools at indies in the mask to true and all others to false.
*/
void to_bools(MutableSpan<bool> r_bools) const;
/**
* Try to convert the entire index mask into a range. This only works if there are no gaps
* between any indices.
*/
std::optional<IndexRange> to_range() const;
/**
* \return All index ranges in the mask. In the worst case this is a separate range for every
* index.
*/
Vector<IndexRange> to_ranges() const;
/**
* \return All index ranges in the universe that are not in the mask. In the worst case this is a
* separate range for every index.
*/
Vector<IndexRange> to_ranges_invert(IndexRange universe) const;
/**
* \return All segments in sorted vector. Segments that encode a range are already converted to
* an #IndexRange.
*/
template<int64_t N = 4>
Vector<std::variant<IndexRange, IndexMaskSegment>, N> to_spans_and_ranges() const;
/**
* Is used by some functions to get low level access to the mask in order to construct it.
*/
IndexMaskData &data_for_inplace_construction();
};
/**
* Utility that makes it efficient to build many small index masks from segments one after another.
* The class has to be constructed once. Afterwards, `update` has to be called to fill the mask
* with the provided segment.
*/
class IndexMaskFromSegment : NonCopyable, NonMovable {
private:
int64_t segment_offset_;
const int16_t *segment_indices_;
std::array<int64_t, 2> cumulative_segment_sizes_;
IndexMask mask_;
public:
IndexMaskFromSegment();
const IndexMask &update(IndexMaskSegment segment);
};
inline IndexMaskFromSegment::IndexMaskFromSegment()
{
IndexMaskData &data = mask_.data_for_inplace_construction();
cumulative_segment_sizes_[0] = 0;
data.segments_num_ = 1;
data.indices_by_segment_ = &segment_indices_;
data.segment_offsets_ = &segment_offset_;
data.cumulative_segment_sizes_ = cumulative_segment_sizes_.data();
data.begin_index_in_segment_ = 0;
}
inline const IndexMask &IndexMaskFromSegment::update(const IndexMaskSegment segment)
{
const Span<int16_t> indices = segment.base_span();
BLI_assert(!indices.is_empty());
BLI_assert(std::is_sorted(indices.begin(), indices.end()));
BLI_assert(indices[0] >= 0);
BLI_assert(indices.last() < max_segment_size);
const int64_t indices_num = indices.size();
IndexMaskData &data = mask_.data_for_inplace_construction();
segment_offset_ = segment.offset();
segment_indices_ = indices.data();
cumulative_segment_sizes_[1] = int16_t(indices_num);
data.indices_num_ = indices_num;
data.end_index_in_segment_ = indices_num;
return mask_;
}
std::array<int16_t, max_segment_size> build_static_indices_array();
const IndexMask &get_static_index_mask_for_min_size(const int64_t min_size);
std::ostream &operator<<(std::ostream &stream, const IndexMask &mask);
/* -------------------------------------------------------------------- */
/** \name Inline Utilities
* \{ */
inline const std::array<int16_t, max_segment_size> &get_static_indices_array()
{
alignas(64) static const std::array<int16_t, max_segment_size> data =
build_static_indices_array();
return data;
}
template<typename T>
inline void masked_fill(MutableSpan<T> data, const T &value, const IndexMask &mask)
{
mask.foreach_index_optimized<int64_t>([&](const int64_t i) { data[i] = value; });
}
/* -------------------------------------------------------------------- */
/** \name #RawMaskIterator Inline Methods
* \{ */
inline bool operator!=(const RawMaskIterator &a, const RawMaskIterator &b)
{
return a.segment_i != b.segment_i || a.index_in_segment != b.index_in_segment;
}
inline bool operator==(const RawMaskIterator &a, const RawMaskIterator &b)
{
return !(a != b);
}
/* -------------------------------------------------------------------- */
/** \name #IndexMask Inline Methods
* \{ */
inline void init_empty_mask(IndexMaskData &data)
{
static constexpr int64_t cumulative_sizes_for_empty_mask[1] = {0};
data.indices_num_ = 0;
data.segments_num_ = 0;
data.cumulative_segment_sizes_ = cumulative_sizes_for_empty_mask;
/* Intentionally leave some pointer uninitialized which must not be accessed on empty masks
* anyway. */
}
inline IndexMask::IndexMask()
{
init_empty_mask(*this);
}
inline IndexMask::IndexMask(const int64_t size)
{
if (size == 0) {
init_empty_mask(*this);
return;
}
*this = get_static_index_mask_for_min_size(size);
indices_num_ = size;
segments_num_ = ((size + max_segment_size - 1) >> max_segment_size_shift);
begin_index_in_segment_ = 0;
end_index_in_segment_ = size - ((size - 1) & max_segment_size_mask_high);
}
inline IndexMask::IndexMask(const IndexRange range)
{
if (range.is_empty()) {
init_empty_mask(*this);
return;
}
const int64_t one_after_last = range.one_after_last();
*this = get_static_index_mask_for_min_size(one_after_last);
const int64_t first_segment_i = range.first() >> max_segment_size_shift;
const int64_t last_segment_i = range.last() >> max_segment_size_shift;
indices_num_ = range.size();
segments_num_ = last_segment_i - first_segment_i + 1;
indices_by_segment_ += first_segment_i;
segment_offsets_ += first_segment_i;
cumulative_segment_sizes_ += first_segment_i;
begin_index_in_segment_ = range.first() & max_segment_size_mask_low;
end_index_in_segment_ = one_after_last - ((one_after_last - 1) & max_segment_size_mask_high);
}
inline int64_t IndexMask::size() const
{
return indices_num_;
}
inline bool IndexMask::is_empty() const
{
return indices_num_ == 0;
}
inline IndexRange IndexMask::index_range() const
{
return IndexRange(indices_num_);
}
inline int64_t IndexMask::first() const
{
BLI_assert(indices_num_ > 0);
return segment_offsets_[0] + indices_by_segment_[0][begin_index_in_segment_];
}
inline int64_t IndexMask::last() const
{
BLI_assert(indices_num_ > 0);
const int64_t last_segment_i = segments_num_ - 1;
return segment_offsets_[last_segment_i] +
indices_by_segment_[last_segment_i][end_index_in_segment_ - 1];
}
inline int64_t IndexMask::min_array_size() const
{
if (indices_num_ == 0) {
return 0;
}
return this->last() + 1;
}
inline RawMaskIterator IndexMask::index_to_iterator(const int64_t index) const
{
BLI_assert(index >= 0);
BLI_assert(index < indices_num_);
RawMaskIterator it;
const int64_t full_index = index + cumulative_segment_sizes_[0] + begin_index_in_segment_;
it.segment_i = -1 +
binary_search::find_predicate_begin(
cumulative_segment_sizes_,
cumulative_segment_sizes_ + segments_num_ + 1,
[&](const int64_t cumulative_size) { return cumulative_size > full_index; });
it.index_in_segment = full_index - cumulative_segment_sizes_[it.segment_i];
return it;
}
inline int64_t IndexMask::iterator_to_index(const RawMaskIterator &it) const
{
BLI_assert(it.segment_i >= 0);
BLI_assert(it.segment_i < segments_num_);
BLI_assert(it.index_in_segment >= 0);
BLI_assert(it.index_in_segment < cumulative_segment_sizes_[it.segment_i + 1] -
cumulative_segment_sizes_[it.segment_i]);
return it.index_in_segment + cumulative_segment_sizes_[it.segment_i] -
cumulative_segment_sizes_[0] - begin_index_in_segment_;
}
inline int64_t IndexMask::operator[](const int64_t i) const
{
const RawMaskIterator it = this->index_to_iterator(i);
return (*this)[it];
}
inline int64_t IndexMask::operator[](const RawMaskIterator &it) const
{
return segment_offsets_[it.segment_i] + indices_by_segment_[it.segment_i][it.index_in_segment];
}
inline int64_t IndexMask::segments_num() const
{
return segments_num_;
}
inline IndexMaskSegment IndexMask::segment(const int64_t segment_i) const
{
BLI_assert(segment_i >= 0);
BLI_assert(segment_i < segments_num_);
const int64_t full_segment_size = cumulative_segment_sizes_[segment_i + 1] -
cumulative_segment_sizes_[segment_i];
const int64_t begin_index = (segment_i == 0) ? begin_index_in_segment_ : 0;
const int64_t end_index = (segment_i == segments_num_ - 1) ? end_index_in_segment_ :
full_segment_size;
const int64_t segment_size = end_index - begin_index;
return IndexMaskSegment{segment_offsets_[segment_i],
{indices_by_segment_[segment_i] + begin_index, segment_size}};
}
inline IndexMask IndexMask::slice(const IndexRange range) const
{
return this->slice(range.start(), range.size());
}
inline IndexMaskData &IndexMask::data_for_inplace_construction()
{
return *this;
}
template<typename Fn>
constexpr bool has_segment_and_start_parameter =
std::is_invocable_r_v<void, Fn, IndexMaskSegment, int64_t> ||
std::is_invocable_r_v<void, Fn, IndexRange, int64_t>;
template<typename Fn> inline void IndexMask::foreach_index(Fn &&fn) const
{
this->foreach_segment(
[&](const IndexMaskSegment indices, [[maybe_unused]] const int64_t start_segment_pos) {
if constexpr (std::is_invocable_r_v<void, Fn, int64_t, int64_t>) {
for (const int64_t i : indices.index_range()) {
fn(indices[i], start_segment_pos + i);
}
}
else {
for (const int64_t index : indices) {
fn(index);
}
}
});
}
template<typename Fn>
inline void IndexMask::foreach_index(const GrainSize grain_size, Fn &&fn) const
{
threading::parallel_for(this->index_range(), grain_size.value, [&](const IndexRange range) {
const IndexMask sub_mask = this->slice(range);
sub_mask.foreach_index([&](const int64_t i, [[maybe_unused]] const int64_t index_pos) {
if constexpr (std::is_invocable_r_v<void, Fn, int64_t, int64_t>) {
fn(i, index_pos + range.start());
}
else {
fn(i);
}
});
});
}
template<typename T, typename Fn>
#if (defined(__GNUC__) && !defined(__clang__))
[[gnu::optimize("O3")]]
#endif
inline void
optimized_foreach_index(const IndexMaskSegment segment, const Fn fn)
{
BLI_assert(segment.last() < std::numeric_limits<T>::max());
if (unique_sorted_indices::non_empty_is_range(segment.base_span())) {
const T start = T(segment[0]);
const T last = T(segment.last());
for (T i = start; i <= last; i++) {
fn(i);
}
}
else {
for (const int64_t i : segment) {
fn(T(i));
}
}
}
template<typename T, typename Fn>
#if (defined(__GNUC__) && !defined(__clang__))
[[gnu::optimize("O3")]]
#endif
inline void
optimized_foreach_index_with_pos(const IndexMaskSegment segment,
const int64_t segment_pos,
const Fn fn)
{
BLI_assert(segment.last() < std::numeric_limits<T>::max());
BLI_assert(segment.size() + segment_pos < std::numeric_limits<T>::max());
if (unique_sorted_indices::non_empty_is_range(segment.base_span())) {
const T start = T(segment[0]);
const T last = T(segment.last());
for (T i = start, pos = T(segment_pos); i <= last; i++, pos++) {
fn(i, pos);
}
}
else {
T pos = T(segment_pos);
for (const int64_t i : segment.index_range()) {
const T index = T(segment[i]);
fn(index, pos);
pos++;
}
}
}
template<typename IndexT, typename Fn>
inline void IndexMask::foreach_index_optimized(Fn &&fn) const
{
this->foreach_segment(
[&](const IndexMaskSegment segment, [[maybe_unused]] const int64_t segment_pos) {
if constexpr (std::is_invocable_r_v<void, Fn, IndexT, IndexT>) {
optimized_foreach_index_with_pos<IndexT>(segment, segment_pos, fn);
}
else {
optimized_foreach_index<IndexT>(segment, fn);
}
});
}
template<typename IndexT, typename Fn>
inline void IndexMask::foreach_index_optimized(const GrainSize grain_size, Fn &&fn) const
{
threading::parallel_for(this->index_range(), grain_size.value, [&](const IndexRange range) {
const IndexMask sub_mask = this->slice(range);
sub_mask.foreach_segment(
[&](const IndexMaskSegment segment, [[maybe_unused]] const int64_t segment_pos) {
if constexpr (std::is_invocable_r_v<void, Fn, IndexT, IndexT>) {
optimized_foreach_index_with_pos<IndexT>(segment, segment_pos + range.start(), fn);
}
else {
optimized_foreach_index<IndexT>(segment, fn);
}
});
});
}
template<typename Fn> inline void IndexMask::foreach_segment_optimized(Fn &&fn) const
{
this->foreach_segment(
[&](const IndexMaskSegment segment, [[maybe_unused]] const int64_t start_segment_pos) {
if (unique_sorted_indices::non_empty_is_range(segment.base_span())) {
const IndexRange range(segment[0], segment.size());
if constexpr (has_segment_and_start_parameter<Fn>) {
fn(range, start_segment_pos);
}
else {
fn(range);
}
}
else {
if constexpr (has_segment_and_start_parameter<Fn>) {
fn(segment, start_segment_pos);
}
else {
fn(segment);
}
}
});
}
template<typename Fn>
inline void IndexMask::foreach_segment_optimized(const GrainSize grain_size, Fn &&fn) const
{
threading::parallel_for(this->index_range(), grain_size.value, [&](const IndexRange range) {
const IndexMask sub_mask = this->slice(range);
sub_mask.foreach_segment_optimized(
[&fn, range_start = range.start()](const auto segment,
[[maybe_unused]] const int64_t start_segment_pos) {
if constexpr (has_segment_and_start_parameter<Fn>) {
fn(segment, start_segment_pos + range_start);
}
else {
fn(segment);
}
});
});
}
template<typename Fn> inline void IndexMask::foreach_segment(Fn &&fn) const
{
[[maybe_unused]] int64_t segment_pos = 0;
for (const int64_t segment_i : IndexRange(segments_num_)) {
const IndexMaskSegment segment = this->segment(segment_i);
if constexpr (has_segment_and_start_parameter<Fn>) {
fn(segment, segment_pos);
segment_pos += segment.size();
}
else {
fn(segment);
}
}
}
template<typename Fn>
inline void IndexMask::foreach_segment(const GrainSize grain_size, Fn &&fn) const
{
threading::parallel_for(this->index_range(), grain_size.value, [&](const IndexRange range) {
const IndexMask sub_mask = this->slice(range);
sub_mask.foreach_segment(
[&fn, range_start = range.start()](const IndexMaskSegment mask_segment,
[[maybe_unused]] const int64_t segment_pos) {
if constexpr (has_segment_and_start_parameter<Fn>) {
fn(mask_segment, segment_pos + range_start);
}
else {
fn(mask_segment);
}
});
});
}
template<typename Fn> inline void IndexMask::foreach_range(Fn &&fn) const
{
this->foreach_segment([&](const IndexMaskSegment indices, [[maybe_unused]] int64_t segment_pos) {
Span<int16_t> base_indices = indices.base_span();
while (!base_indices.is_empty()) {
const int64_t next_range_size = unique_sorted_indices::find_size_of_next_range(base_indices);
const IndexRange range(int64_t(base_indices[0]) + indices.offset(), next_range_size);
if constexpr (has_segment_and_start_parameter<Fn>) {
fn(range, segment_pos);
}
else {
fn(range);
}
segment_pos += next_range_size;
base_indices = base_indices.drop_front(next_range_size);
}
});
}
namespace detail {
IndexMask from_predicate_impl(
const IndexMask &universe,
GrainSize grain_size,
IndexMaskMemory &memory,
FunctionRef<int64_t(IndexMaskSegment indices, int16_t *r_true_indices)> filter_indices);
}
template<typename Fn>
inline IndexMask IndexMask::from_predicate(const IndexMask &universe,
const GrainSize grain_size,
IndexMaskMemory &memory,
Fn &&predicate)
{
return detail::from_predicate_impl(
universe,
grain_size,
memory,
[&](const IndexMaskSegment indices, int16_t *__restrict r_true_indices) {
int16_t *r_current = r_true_indices;
const int16_t *in_end = indices.base_span().end();
const int64_t offset = indices.offset();
for (const int16_t *in_current = indices.base_span().data(); in_current < in_end;
in_current++) {
const int16_t local_index = *in_current;
const int64_t global_index = int64_t(local_index) + offset;
const bool condition = predicate(global_index);
*r_current = local_index;
/* Branchless conditional increment. */
r_current += condition;
}
const int16_t true_indices_num = int16_t(r_current - r_true_indices);
return true_indices_num;
});
}
template<typename T, typename Fn>
void IndexMask::from_groups(const IndexMask &universe,
IndexMaskMemory &memory,
Fn &&get_group_index,
MutableSpan<IndexMask> r_masks)
{
Vector<Vector<T>> indices_by_group(r_masks.size());
universe.foreach_index([&](const int64_t i) {
const int group_index = get_group_index(i);
indices_by_group[group_index].append(T(i));
});
for (const int64_t i : r_masks.index_range()) {
r_masks[i] = IndexMask::from_indices<T>(indices_by_group[i], memory);
}
}
std::optional<IndexRange> inline IndexMask::to_range() const
{
if (indices_num_ == 0) {
return IndexRange{};
}
const int64_t first_index = this->first();
const int64_t last_index = this->last();
if (last_index - first_index == indices_num_ - 1) {
return IndexRange(first_index, indices_num_);
}
return std::nullopt;
}
template<int64_t N>
inline Vector<std::variant<IndexRange, IndexMaskSegment>, N> IndexMask::to_spans_and_ranges() const
{
Vector<std::variant<IndexRange, IndexMaskSegment>, N> segments;
this->foreach_segment_optimized([&](const auto segment) { segments.append(segment); });
return segments;
}
} // namespace blender::index_mask
namespace blender {
using index_mask::IndexMask;
using index_mask::IndexMaskFromSegment;
using index_mask::IndexMaskMemory;
using index_mask::IndexMaskSegment;
} // namespace blender