tornavis/build_files/build_environment/patches/openvdb_metal.diff

7977 lines
379 KiB
Diff

diff --git a/nanovdb/nanovdb/NanoVDB.h b/nanovdb/nanovdb/NanoVDB.h
index fde5c47..cff460a 100644
--- a/nanovdb/nanovdb/NanoVDB.h
+++ b/nanovdb/nanovdb/NanoVDB.h
@@ -140,7 +140,27 @@
#define NANOVDB_ALIGN(n) alignas(n)
#endif // !defined(NANOVDB_ALIGN)
-#ifdef __CUDACC_RTC__
+#ifdef __KERNEL_METAL__
+
+using namespace metal;
+#define std metal
+#define double uint64_t
+#define __global__ device
+#define __local__ thread
+#define __constant__ constant
+#define sqrtf sqrt
+#define rintf rint
+#define fminf fmin
+#define fmaxf fmax
+#define floorf floor
+#define ceilf ceil
+#define fabs abs
+#define fmaf fma
+#define tanf tan
+
+#define NANOVDB_ASSERT(x)
+
+#elif defined(__CUDACC_RTC__)
typedef signed char int8_t;
typedef short int16_t;
@@ -157,6 +177,10 @@ typedef unsigned long long uint64_t;
#else // !__CUDACC_RTC__
+#define __constant__ const
+#define __global__
+#define __local__
+
#include <stdlib.h> // for abs in clang7
#include <stdint.h> // for types like int32_t etc
#include <stddef.h> // for size_t type
@@ -262,7 +286,7 @@ enum class GridType : uint32_t { Unknown = 0,
Index = 19,// index into an external array of values
End = 20 };
-#ifndef __CUDACC_RTC__
+#if !defined(__CUDACC_RTC__) && !defined(__KERNEL_METAL__)
/// @brief Retuns a c-string used to describe a GridType
inline const char* toStr(GridType gridType)
{
@@ -289,7 +313,7 @@ enum class GridClass : uint32_t { Unknown = 0,
IndexGrid = 8,// grid whose values are offsets, e.g. into an external array
End = 9 };
-#ifndef __CUDACC_RTC__
+#if !defined(__CUDACC_RTC__) && !defined(__KERNEL_METAL__)
/// @brief Retuns a c-string used to describe a GridClass
inline const char* toStr(GridClass gridClass)
{
@@ -313,7 +337,7 @@ enum class GridFlags : uint32_t {
End = 1 << 6,
};
-#ifndef __CUDACC_RTC__
+#if !defined(__CUDACC_RTC__) && !defined(__KERNEL_METAL__)
/// @brief Retuns a c-string used to describe a GridFlags
inline const char* toStr(GridFlags gridFlags)
{
@@ -355,13 +379,13 @@ enum class GridBlindDataSemantic : uint32_t { Unknown = 0,
template<typename T1, typename T2>
struct is_same
{
- static constexpr bool value = false;
+ static __constant__ constexpr bool value = false;
};
template<typename T>
struct is_same<T, T>
{
- static constexpr bool value = true;
+ static __constant__ constexpr bool value = true;
};
// --------------------------> enable_if <------------------------------------
@@ -383,13 +407,13 @@ struct enable_if<true, T>
template<typename T>
struct is_const
{
- static constexpr bool value = false;
+ static __constant__ constexpr bool value = false;
};
template<typename T>
struct is_const<const T>
{
- static constexpr bool value = true;
+ static __constant__ constexpr bool value = true;
};
// --------------------------> remove_const <------------------------------------
@@ -412,7 +436,7 @@ struct remove_const<const T>
template<typename T>
struct is_floating_point
{
- static const bool value = is_same<T, float>::value || is_same<T, double>::value;
+ static __constant__ const bool value = is_same<T, float>::value || is_same<T, double>::value;
};
// --------------------------> is_specialization <------------------------------------
@@ -425,12 +449,12 @@ struct is_floating_point
template<typename AnyType, template<typename...> class TemplateType>
struct is_specialization
{
- static const bool value = false;
+ static __constant__ const bool value = false;
};
template<typename... Args, template<typename...> class TemplateType>
struct is_specialization<TemplateType<Args...>, TemplateType>
{
- static const bool value = true;
+ static __constant__ const bool value = true;
};
// --------------------------> Value Map <------------------------------------
@@ -495,19 +519,19 @@ struct BuildToValueMap<FpN>
// --------------------------> utility functions related to alignment <------------------------------------
/// @brief return true if the specified pointer is aligned
-__hostdev__ inline static bool isAligned(const void* p)
+__hostdev__ inline static bool isAligned(__global__ const void* p)
{
return uint64_t(p) % NANOVDB_DATA_ALIGNMENT == 0;
}
/// @brief return true if the specified pointer is aligned and not NULL
-__hostdev__ inline static bool isValid(const void* p)
+__hostdev__ inline static bool isValid(__global__ const void* p)
{
return p != nullptr && uint64_t(p) % NANOVDB_DATA_ALIGNMENT == 0;
}
/// @brief return the smallest number of bytes that when added to the specified pointer results in an aligned pointer
-__hostdev__ inline static uint64_t alignmentPadding(const void* p)
+__hostdev__ inline static uint64_t alignmentPadding(__global__ const void* p)
{
NANOVDB_ASSERT(p);
return (NANOVDB_DATA_ALIGNMENT - (uint64_t(p) % NANOVDB_DATA_ALIGNMENT)) % NANOVDB_DATA_ALIGNMENT;
@@ -515,43 +539,66 @@ __hostdev__ inline static uint64_t alignmentPadding(const void* p)
/// @brief offset the specified pointer so it is aligned.
template <typename T>
-__hostdev__ inline static T* alignPtr(T* p)
+__hostdev__ inline static __global__ T* alignPtr(__global__ T* p)
{
NANOVDB_ASSERT(p);
- return reinterpret_cast<T*>( (uint8_t*)p + alignmentPadding(p) );
+ return reinterpret_cast<__global__ T*>( (__global__ uint8_t*)p + alignmentPadding(p) );
}
/// @brief offset the specified pointer so it is aligned.
template <typename T>
-__hostdev__ inline static const T* alignPtr(const T* p)
+__hostdev__ inline static __global__ const T* alignPtr(__global__ const T* p)
{
NANOVDB_ASSERT(p);
- return reinterpret_cast<const T*>( (const uint8_t*)p + alignmentPadding(p) );
+ return reinterpret_cast<__global__ const T*>( (__global__ const uint8_t*)p + alignmentPadding(p) );
}
// --------------------------> PtrDiff PtrAdd <------------------------------------
template <typename T1, typename T2>
-__hostdev__ inline static int64_t PtrDiff(const T1* p, const T2* q)
+__hostdev__ inline static int64_t PtrDiff(__global__ const T1* p, __global__ const T2* q)
{
NANOVDB_ASSERT(p && q);
- return reinterpret_cast<const char*>(p) - reinterpret_cast<const char*>(q);
+ return reinterpret_cast<__global__ const char*>(p) - reinterpret_cast<__global__ const char*>(q);
}
+#if defined(__KERNEL_METAL__)
+template <typename T1, typename T2>
+__hostdev__ inline static int64_t PtrDiff(__local__ const T1* p, __local__ const T2* q)
+{
+ NANOVDB_ASSERT(p && q);
+ return reinterpret_cast<__local__ const char*>(p) - reinterpret_cast<__local__ const char*>(q);
+}
+#endif
template <typename DstT, typename SrcT>
-__hostdev__ inline static DstT* PtrAdd(SrcT *p, int64_t offset)
+__hostdev__ inline static __global__ DstT* PtrAdd(__global__ SrcT *p, int64_t offset)
{
NANOVDB_ASSERT(p);
- return reinterpret_cast<DstT*>(reinterpret_cast<char*>(p) + offset);
+ return reinterpret_cast<__global__ DstT*>(reinterpret_cast<__global__ char*>(p) + offset);
}
+#if defined(__KERNEL_METAL__)
+template <typename DstT, typename SrcT>
+__hostdev__ inline static __local__ DstT* PtrAdd(__local__ SrcT *p, int64_t offset)
+{
+ NANOVDB_ASSERT(p);
+ return reinterpret_cast<__local__ DstT*>(reinterpret_cast<__local__ char*>(p) + offset);
+}
+#endif
template <typename DstT, typename SrcT>
-__hostdev__ inline static const DstT* PtrAdd(const SrcT *p, int64_t offset)
+__hostdev__ inline static __global__ const DstT* PtrAdd(__global__ const SrcT *p, int64_t offset)
{
NANOVDB_ASSERT(p);
- return reinterpret_cast<const DstT*>(reinterpret_cast<const char*>(p) + offset);
+ return reinterpret_cast<__global__ const DstT*>(reinterpret_cast<__global__ const char*>(p) + offset);
}
-
+#if defined(__KERNEL_METAL__)
+template <typename DstT, typename SrcT>
+__hostdev__ inline static __local__ const DstT* PtrAdd(__local__ const SrcT *p, int64_t offset)
+{
+ NANOVDB_ASSERT(p);
+ return reinterpret_cast<__local__ const DstT*>(reinterpret_cast<__local__ const char*>(p) + offset);
+}
+#endif
// --------------------------> Rgba8 <------------------------------------
/// @brief 8-bit red, green, blue, alpha packed into 32 bit unsigned int
@@ -562,13 +609,13 @@ class Rgba8
uint32_t packed;// 32 bit packed representation
} mData;
public:
- static const int SIZE = 4;
+ static __constant__ const int SIZE = 4;
using ValueType = uint8_t;
- Rgba8(const Rgba8&) = default;
- Rgba8(Rgba8&&) = default;
- Rgba8& operator=(Rgba8&&) = default;
- Rgba8& operator=(const Rgba8&) = default;
+ Rgba8(__global__ const Rgba8&) = default;
+ Rgba8(__global__ Rgba8&&) = default;
+ __global__ Rgba8& operator=(__global__ Rgba8&&) __global__ = default;
+ __global__ Rgba8& operator=(__global__ const Rgba8&) __global__ = default;
__hostdev__ Rgba8() : mData{0,0,0,0} {static_assert(sizeof(uint32_t) == sizeof(Rgba8),"Unexpected sizeof");}
__hostdev__ Rgba8(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 255u) : mData{r, g, b, a} {}
explicit __hostdev__ Rgba8(uint8_t v) : Rgba8(v,v,v,v) {}
@@ -579,8 +626,8 @@ public:
(uint8_t(0.5f + a * 255.0f))}// round to nearest
{
}
- __hostdev__ bool operator<(const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; }
- __hostdev__ bool operator==(const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; }
+ __hostdev__ bool operator<(__global__ const Rgba8& rhs) const { return mData.packed < rhs.mData.packed; }
+ __hostdev__ bool operator==(__global__ const Rgba8& rhs) const { return mData.packed == rhs.mData.packed; }
__hostdev__ float lengthSqr() const
{
return 0.0000153787005f*(float(mData.c[0])*mData.c[0] +
@@ -588,18 +635,18 @@ public:
float(mData.c[2])*mData.c[2]);//1/255^2
}
__hostdev__ float length() const { return sqrtf(this->lengthSqr() ); }
- __hostdev__ const uint8_t& operator[](int n) const { return mData.c[n]; }
- __hostdev__ uint8_t& operator[](int n) { return mData.c[n]; }
- __hostdev__ const uint32_t& packed() const { return mData.packed; }
- __hostdev__ uint32_t& packed() { return mData.packed; }
- __hostdev__ const uint8_t& r() const { return mData.c[0]; }
- __hostdev__ const uint8_t& g() const { return mData.c[1]; }
- __hostdev__ const uint8_t& b() const { return mData.c[2]; }
- __hostdev__ const uint8_t& a() const { return mData.c[3]; }
- __hostdev__ uint8_t& r() { return mData.c[0]; }
- __hostdev__ uint8_t& g() { return mData.c[1]; }
- __hostdev__ uint8_t& b() { return mData.c[2]; }
- __hostdev__ uint8_t& a() { return mData.c[3]; }
+ __hostdev__ __global__ const uint8_t& operator[](int n) const __global__ { return mData.c[n]; }
+ __hostdev__ __global__ uint8_t& operator[](int n) __global__ { return mData.c[n]; }
+ __hostdev__ __global__ const uint32_t& packed() const __global__ { return mData.packed; }
+ __hostdev__ __global__ uint32_t& packed() __global__ { return mData.packed; }
+ __hostdev__ __global__ const uint8_t& r() const __global__ { return mData.c[0]; }
+ __hostdev__ __global__ const uint8_t& g() const __global__ { return mData.c[1]; }
+ __hostdev__ __global__ const uint8_t& b() const __global__ { return mData.c[2]; }
+ __hostdev__ __global__ const uint8_t& a() const __global__ { return mData.c[3]; }
+ __hostdev__ __global__ uint8_t& r() __global__ { return mData.c[0]; }
+ __hostdev__ __global__ uint8_t& g() __global__ { return mData.c[1]; }
+ __hostdev__ __global__ uint8_t& b() __global__ { return mData.c[2]; }
+ __hostdev__ __global__ uint8_t& a() __global__ { return mData.c[3]; }
};// Rgba8
using PackedRGBA8 = Rgba8;// for backwards compatibility
@@ -660,17 +707,17 @@ public:
NANOVDB_ASSERT(minor < (1u << 11));// max value of minor is 2047
NANOVDB_ASSERT(patch < (1u << 10));// max value of patch is 1023
}
- __hostdev__ bool operator==(const Version &rhs) const {return mData == rhs.mData;}
- __hostdev__ bool operator< (const Version &rhs) const {return mData < rhs.mData;}
- __hostdev__ bool operator<=(const Version &rhs) const {return mData <= rhs.mData;}
- __hostdev__ bool operator> (const Version &rhs) const {return mData > rhs.mData;}
- __hostdev__ bool operator>=(const Version &rhs) const {return mData >= rhs.mData;}
+ __hostdev__ bool operator==(__global__ const Version &rhs) const {return mData == rhs.mData;}
+ __hostdev__ bool operator< (__global__ const Version &rhs) const {return mData < rhs.mData;}
+ __hostdev__ bool operator<=(__global__ const Version &rhs) const {return mData <= rhs.mData;}
+ __hostdev__ bool operator> (__global__ const Version &rhs) const {return mData > rhs.mData;}
+ __hostdev__ bool operator>=(__global__ const Version &rhs) const {return mData >= rhs.mData;}
__hostdev__ uint32_t id() const { return mData; }
__hostdev__ uint32_t getMajor() const { return (mData >> 21) & ((1u << 11) - 1);}
__hostdev__ uint32_t getMinor() const { return (mData >> 10) & ((1u << 11) - 1);}
__hostdev__ uint32_t getPatch() const { return mData & ((1u << 10) - 1);}
-#ifndef __CUDACC_RTC__
+#if !defined(__CUDACC_RTC__) && !defined(__KERNEL_METAL__)
const char* c_str() const
{
char *buffer = (char*)malloc(4 + 1 + 4 + 1 + 4 + 1);// xxxx.xxxx.xxxx\0
@@ -749,7 +796,7 @@ struct Maximum
//@}
template<typename Type>
-__hostdev__ inline bool isApproxZero(const Type& x)
+__hostdev__ inline bool isApproxZero(__global__ const Type& x)
{
return !(x > Tolerance<Type>::value()) && !(x < -Tolerance<Type>::value());
}
@@ -771,10 +818,12 @@ __hostdev__ inline float Min(float a, float b)
{
return fminf(a, b);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Min(double a, double b)
{
return fmin(a, b);
}
+#endif
template<typename Type>
__hostdev__ inline Type Max(Type a, Type b)
{
@@ -793,45 +842,55 @@ __hostdev__ inline float Max(float a, float b)
{
return fmaxf(a, b);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Max(double a, double b)
{
return fmax(a, b);
}
+#endif
__hostdev__ inline float Clamp(float x, float a, float b)
{
return Max(Min(x, b), a);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Clamp(double x, double a, double b)
{
return Max(Min(x, b), a);
}
+#endif
__hostdev__ inline float Fract(float x)
{
return x - floorf(x);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Fract(double x)
{
return x - floor(x);
}
+#endif
__hostdev__ inline int32_t Floor(float x)
{
return int32_t(floorf(x));
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline int32_t Floor(double x)
{
return int32_t(floor(x));
}
+#endif
__hostdev__ inline int32_t Ceil(float x)
{
return int32_t(ceilf(x));
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline int32_t Ceil(double x)
{
return int32_t(ceil(x));
}
+#endif
template<typename T>
__hostdev__ inline T Pow2(T x)
@@ -875,46 +934,78 @@ __hostdev__ inline int Abs(int x)
}
template<typename CoordT, typename RealT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Round(const Vec3T<RealT>& xyz);
+__hostdev__ inline CoordT Round(__global__ const Vec3T<RealT>& xyz);
+#if defined(__KERNEL_METAL__)
+template<typename CoordT, typename RealT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Round(__local__ const Vec3T<RealT>& xyz);
+#endif
template<typename CoordT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Round(const Vec3T<float>& xyz)
+__hostdev__ inline CoordT Round(__global__ const Vec3T<float>& xyz)
{
return CoordT(int32_t(rintf(xyz[0])), int32_t(rintf(xyz[1])), int32_t(rintf(xyz[2])));
//return CoordT(int32_t(roundf(xyz[0])), int32_t(roundf(xyz[1])), int32_t(roundf(xyz[2])) );
//return CoordT(int32_t(floorf(xyz[0] + 0.5f)), int32_t(floorf(xyz[1] + 0.5f)), int32_t(floorf(xyz[2] + 0.5f)));
}
+#if defined(__KERNEL_METAL__)
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Round(__local__ const Vec3T<float>& xyz)
+{
+ return CoordT(int32_t(rintf(xyz[0])), int32_t(rintf(xyz[1])), int32_t(rintf(xyz[2])));
+ //return CoordT(int32_t(roundf(xyz[0])), int32_t(roundf(xyz[1])), int32_t(roundf(xyz[2])) );
+ //return CoordT(int32_t(floorf(xyz[0] + 0.5f)), int32_t(floorf(xyz[1] + 0.5f)), int32_t(floorf(xyz[2] + 0.5f)));
+}
+#endif
template<typename CoordT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Round(const Vec3T<double>& xyz)
+__hostdev__ inline CoordT Round(__global__ const Vec3T<double>& xyz)
{
return CoordT(int32_t(floor(xyz[0] + 0.5)), int32_t(floor(xyz[1] + 0.5)), int32_t(floor(xyz[2] + 0.5)));
}
+#if defined(__KERNEL_METAL__)
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Round(__local__ const Vec3T<double>& xyz)
+{
+ return CoordT(int32_t(floor(xyz[0] + 0.5)), int32_t(floor(xyz[1] + 0.5)), int32_t(floor(xyz[2] + 0.5)));
+}
+#endif
template<typename CoordT, typename RealT, template<typename> class Vec3T>
-__hostdev__ inline CoordT RoundDown(const Vec3T<RealT>& xyz)
+__hostdev__ inline CoordT RoundDown(__global__ const Vec3T<RealT>& xyz)
{
return CoordT(Floor(xyz[0]), Floor(xyz[1]), Floor(xyz[2]));
}
-
+#if defined(__KERNEL_METAL__)
+template<typename CoordT, typename RealT, template<typename> class Vec3T>
+__hostdev__ inline CoordT RoundDown(__local__ const Vec3T<RealT>& xyz)
+{
+ return CoordT(Floor(xyz[0]), Floor(xyz[1]), Floor(xyz[2]));
+}
+#endif
//@{
/// Return the square root of a floating-point value.
__hostdev__ inline float Sqrt(float x)
{
return sqrtf(x);
}
+#ifndef __KERNEL_METAL__
__hostdev__ inline double Sqrt(double x)
{
return sqrt(x);
}
+#endif
//@}
/// Return the sign of the given value as an integer (either -1, 0 or 1).
template <typename T>
-__hostdev__ inline T Sign(const T &x) { return ((T(0) < x)?T(1):T(0)) - ((x < T(0))?T(1):T(0)); }
+__hostdev__ inline T Sign(__global__ const T &x) { return ((T(0) < x)?T(1):T(0)) - ((x < T(0))?T(1):T(0)); }
+#if defined(__KERNEL_METAL__)
+template <typename T>
+__hostdev__ inline T Sign(__local__ const T &x) { return ((T(0) < x)?T(1):T(0)) - ((x < T(0))?T(1):T(0)); }
+#endif
template<typename Vec3T>
-__hostdev__ inline int MinIndex(const Vec3T& v)
+__hostdev__ inline int MinIndex(__global__ const Vec3T& v)
{
#if 0
static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values
@@ -930,8 +1021,27 @@ __hostdev__ inline int MinIndex(const Vec3T& v)
#endif
}
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
-__hostdev__ inline int MaxIndex(const Vec3T& v)
+__hostdev__ inline int MinIndex(__local__ const Vec3T& v)
+{
+#if 0
+ static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values
+ const int hashKey = ((v[0] < v[1]) << 2) + ((v[0] < v[2]) << 1) + (v[1] < v[2]); // ?*4+?*2+?*1
+ return hashTable[hashKey];
+#else
+ if (v[0] < v[1] && v[0] < v[2])
+ return 0;
+ if (v[1] < v[2])
+ return 1;
+ else
+ return 2;
+#endif
+}
+#endif
+
+template<typename Vec3T>
+__hostdev__ inline int MaxIndex(__global__ const Vec3T& v)
{
#if 0
static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values
@@ -947,6 +1057,25 @@ __hostdev__ inline int MaxIndex(const Vec3T& v)
#endif
}
+#if defined(__KERNEL_METAL__)
+template<typename Vec3T>
+__hostdev__ inline int MaxIndex(__local__ const Vec3T& v)
+{
+#if 0
+ static const int hashTable[8] = {2, 1, 9, 1, 2, 9, 0, 0}; //9 are dummy values
+ const int hashKey = ((v[0] > v[1]) << 2) + ((v[0] > v[2]) << 1) + (v[1] > v[2]); // ?*4+?*2+?*1
+ return hashTable[hashKey];
+#else
+ if (v[0] > v[1] && v[0] > v[2])
+ return 0;
+ if (v[1] > v[2])
+ return 1;
+ else
+ return 2;
+#endif
+}
+#endif
+
/// @brief round up byteSize to the nearest wordSize, e.g. to align to machine word: AlignUp<sizeof(size_t)(n)
///
/// @details both wordSize and byteSize are in byte units
@@ -988,7 +1117,7 @@ public:
{
}
- __hostdev__ Coord(ValueType *ptr)
+ __hostdev__ Coord(__global__ ValueType *ptr)
: mVec{ptr[0], ptr[1], ptr[2]}
{
}
@@ -997,9 +1126,9 @@ public:
__hostdev__ int32_t y() const { return mVec[1]; }
__hostdev__ int32_t z() const { return mVec[2]; }
- __hostdev__ int32_t& x() { return mVec[0]; }
- __hostdev__ int32_t& y() { return mVec[1]; }
- __hostdev__ int32_t& z() { return mVec[2]; }
+ __hostdev__ __global__ int32_t& x() __global__ { return mVec[0]; }
+ __hostdev__ __global__ int32_t& y() __global__ { return mVec[1]; }
+ __hostdev__ __global__ int32_t& z() __global__ { return mVec[2]; }
__hostdev__ static Coord max() { return Coord(int32_t((1u << 31) - 1)); }
@@ -1009,15 +1138,21 @@ public:
/// @brief Return a const reference to the given Coord component.
/// @warning The argument is assumed to be 0, 1, or 2.
- __hostdev__ const ValueType& operator[](IndexType i) const { return mVec[i]; }
+ __hostdev__ __global__ const ValueType& operator[](IndexType i) const __global__ { return mVec[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ const ValueType& operator[](IndexType i) const __local__ { return mVec[i]; }
+#endif
/// @brief Return a non-const reference to the given Coord component.
/// @warning The argument is assumed to be 0, 1, or 2.
- __hostdev__ ValueType& operator[](IndexType i) { return mVec[i]; }
+ __hostdev__ __global__ ValueType& operator[](IndexType i) __global__ { return mVec[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ ValueType& operator[](IndexType i) __local__ { return mVec[i]; }
+#endif
/// @brief Assignment operator that works with openvdb::Coord
template <typename CoordT>
- __hostdev__ Coord& operator=(const CoordT &other)
+ __hostdev__ __global__ Coord& operator=(__global__ const CoordT &other) __global__
{
static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof");
mVec[0] = other[0];
@@ -1025,6 +1160,17 @@ public:
mVec[2] = other[2];
return *this;
}
+#if defined(__KERNEL_METAL__)
+ template <typename CoordT>
+ __hostdev__ __local__ Coord& operator=(__local__ const CoordT &other) __local__
+ {
+ static_assert(sizeof(Coord) == sizeof(CoordT), "Mis-matched sizeof");
+ mVec[0] = other[0];
+ mVec[1] = other[1];
+ mVec[2] = other[2];
+ return *this;
+ }
+#endif
/// @brief Return a new instance with coordinates masked by the given unsigned integer.
__hostdev__ Coord operator&(IndexType n) const { return Coord(mVec[0] & n, mVec[1] & n, mVec[2] & n); }
@@ -1036,52 +1182,52 @@ public:
__hostdev__ Coord operator>>(IndexType n) const { return Coord(mVec[0] >> n, mVec[1] >> n, mVec[2] >> n); }
/// @brief Return true if this Coord is lexicographically less than the given Coord.
- __hostdev__ bool operator<(const Coord& rhs) const
+ __hostdev__ bool operator<(__global__ const Coord& rhs) const
{
return mVec[0] < rhs[0] ? true : mVec[0] > rhs[0] ? false : mVec[1] < rhs[1] ? true : mVec[1] > rhs[1] ? false : mVec[2] < rhs[2] ? true : false;
}
// @brief Return true if the Coord components are identical.
- __hostdev__ bool operator==(const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; }
- __hostdev__ bool operator!=(const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; }
- __hostdev__ Coord& operator&=(int n)
+ __hostdev__ bool operator==(__global__ const Coord& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; }
+ __hostdev__ bool operator!=(__global__ const Coord& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; }
+ __hostdev__ __global__ Coord& operator&=(int n) __global__
{
mVec[0] &= n;
mVec[1] &= n;
mVec[2] &= n;
return *this;
}
- __hostdev__ Coord& operator<<=(uint32_t n)
+ __hostdev__ __global__ Coord& operator<<=(uint32_t n) __global__
{
mVec[0] <<= n;
mVec[1] <<= n;
mVec[2] <<= n;
return *this;
}
- __hostdev__ Coord& operator>>=(uint32_t n)
+ __hostdev__ __global__ Coord& operator>>=(uint32_t n) __global__
{
mVec[0] >>= n;
mVec[1] >>= n;
mVec[2] >>= n;
return *this;
}
- __hostdev__ Coord& operator+=(int n)
+ __hostdev__ __global__ Coord& operator+=(int n) __global__
{
mVec[0] += n;
mVec[1] += n;
mVec[2] += n;
return *this;
}
- __hostdev__ Coord operator+(const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); }
- __hostdev__ Coord operator-(const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); }
- __hostdev__ Coord& operator+=(const Coord& rhs)
+ __hostdev__ Coord operator+(__global__ const Coord& rhs) const { return Coord(mVec[0] + rhs[0], mVec[1] + rhs[1], mVec[2] + rhs[2]); }
+ __hostdev__ Coord operator-(__global__ const Coord& rhs) const { return Coord(mVec[0] - rhs[0], mVec[1] - rhs[1], mVec[2] - rhs[2]); }
+ __hostdev__ __global__ Coord& operator+=(__global__ const Coord& rhs) __global__
{
mVec[0] += rhs[0];
mVec[1] += rhs[1];
mVec[2] += rhs[2];
return *this;
}
- __hostdev__ Coord& operator-=(const Coord& rhs)
+ __hostdev__ __global__ Coord& operator-=(__global__ const Coord& rhs) __global__
{
mVec[0] -= rhs[0];
mVec[1] -= rhs[1];
@@ -1090,7 +1236,7 @@ public:
}
/// @brief Perform a component-wise minimum with the other Coord.
- __hostdev__ Coord& minComponent(const Coord& other)
+ __hostdev__ __global__ Coord& minComponent(__global__ const Coord& other) __global__
{
if (other[0] < mVec[0])
mVec[0] = other[0];
@@ -1102,7 +1248,7 @@ public:
}
/// @brief Perform a component-wise maximum with the other Coord.
- __hostdev__ Coord& maxComponent(const Coord& other)
+ __hostdev__ __global__ Coord& maxComponent(__global__ const Coord& other) __global__
{
if (other[0] > mVec[0])
mVec[0] = other[0];
@@ -1113,16 +1259,16 @@ public:
return *this;
}
- __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const
+ __hostdev__ Coord offsetBy(ValueType dx, ValueType dy, ValueType dz) const __global__
{
return Coord(mVec[0] + dx, mVec[1] + dy, mVec[2] + dz);
}
- __hostdev__ Coord offsetBy(ValueType n) const { return this->offsetBy(n, n, n); }
+ __hostdev__ Coord offsetBy(ValueType n) const __global__ { return this->offsetBy(n, n, n); }
/// Return true if any of the components of @a a are smaller than the
/// corresponding components of @a b.
- __hostdev__ static inline bool lessThan(const Coord& a, const Coord& b)
+ __hostdev__ static inline bool lessThan(__global__ const Coord& a, __global__ const Coord& b)
{
return (a[0] < b[0] || a[1] < b[1] || a[2] < b[2]);
}
@@ -1130,7 +1276,13 @@ public:
/// @brief Return the largest integer coordinates that are not greater
/// than @a xyz (node centered conversion).
template<typename Vec3T>
- __hostdev__ static Coord Floor(const Vec3T& xyz) { return Coord(nanovdb::Floor(xyz[0]), nanovdb::Floor(xyz[1]), nanovdb::Floor(xyz[2])); }
+ __hostdev__ static Coord Floor(__global__ const Vec3T& xyz) { return Coord(nanovdb::Floor(xyz[0]), nanovdb::Floor(xyz[1]), nanovdb::Floor(xyz[2])); }
+#if defined __KERNEL_METAL__
+ /// @brief Return the largest integer coordinates that are not greater
+ /// than @a xyz (node centered conversion).
+ template<typename Vec3T>
+ __hostdev__ static Coord Floor(__local__ const Vec3T& xyz) { return Coord(nanovdb::Floor(xyz[0]), nanovdb::Floor(xyz[1]), nanovdb::Floor(xyz[2])); }
+#endif
/// @brief Return a hash key derived from the existing coordinates.
/// @details For details on this hash function please see the VDB paper.
@@ -1159,7 +1311,7 @@ class Vec3
T mVec[3];
public:
- static const int SIZE = 3;
+ static __constant__ const int SIZE = 3;
using ValueType = T;
Vec3() = default;
__hostdev__ explicit Vec3(T x)
@@ -1171,30 +1323,36 @@ public:
{
}
template<typename T2>
- __hostdev__ explicit Vec3(const Vec3<T2>& v)
+ __hostdev__ explicit Vec3(__global__ const Vec3<T2>& v)
: mVec{T(v[0]), T(v[1]), T(v[2])}
{
}
- __hostdev__ explicit Vec3(const Coord& ijk)
+ __hostdev__ explicit Vec3(__global__ const Coord& ijk)
: mVec{T(ijk[0]), T(ijk[1]), T(ijk[2])}
{
}
- __hostdev__ bool operator==(const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; }
- __hostdev__ bool operator!=(const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; }
+ __hostdev__ bool operator==(__global__ const Vec3& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2]; }
+ __hostdev__ bool operator!=(__global__ const Vec3& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2]; }
template<typename Vec3T>
- __hostdev__ Vec3& operator=(const Vec3T& rhs)
+ __hostdev__ __global__ Vec3& operator=(__global__ const Vec3T& rhs)
{
mVec[0] = rhs[0];
mVec[1] = rhs[1];
mVec[2] = rhs[2];
return *this;
}
- __hostdev__ const T& operator[](int i) const { return mVec[i]; }
- __hostdev__ T& operator[](int i) { return mVec[i]; }
+ __hostdev__ __global__ const T& operator[](int i) const __global__ { return mVec[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ const T& operator[](int i) const __local__ { return mVec[i]; }
+#endif
+ __hostdev__ __global__ T& operator[](int i) __global__ { return mVec[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ T& operator[](int i) __local__ { return mVec[i]; }
+#endif
template<typename Vec3T>
- __hostdev__ T dot(const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; }
+ __hostdev__ T dot(__global__ const Vec3T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2]; }
template<typename Vec3T>
- __hostdev__ Vec3 cross(const Vec3T& v) const
+ __hostdev__ Vec3 cross(__global__ const Vec3T& v) const
{
return Vec3(mVec[1] * v[2] - mVec[2] * v[1],
mVec[2] * v[0] - mVec[0] * v[2],
@@ -1206,37 +1364,62 @@ public:
}
__hostdev__ T length() const { return Sqrt(this->lengthSqr()); }
__hostdev__ Vec3 operator-() const { return Vec3(-mVec[0], -mVec[1], -mVec[2]); }
- __hostdev__ Vec3 operator*(const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); }
- __hostdev__ Vec3 operator/(const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); }
- __hostdev__ Vec3 operator+(const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); }
- __hostdev__ Vec3 operator-(const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); }
- __hostdev__ Vec3 operator*(const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); }
- __hostdev__ Vec3 operator/(const T& s) const { return (T(1) / s) * (*this); }
- __hostdev__ Vec3& operator+=(const Vec3& v)
+ __hostdev__ Vec3 operator*(__global__ const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ Vec3 operator*(__local__ const Vec3& v) const { return Vec3(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2]); }
+#endif
+ __hostdev__ Vec3 operator/(__global__ const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ Vec3 operator/(__local__ const Vec3& v) const { return Vec3(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2]); }
+#endif
+ __hostdev__ Vec3 operator+(__global__ const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ Vec3 operator-(__local__ const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); }
+ __hostdev__ Vec3 operator+(__local__ const Vec3& v) const { return Vec3(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2]); }
+#endif
+ __hostdev__ Vec3 operator-(__global__ const Vec3& v) const { return Vec3(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2]); }
+ __hostdev__ Vec3 operator*(__global__ const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ Vec3 operator*(__local__ const T& s) const { return Vec3(s * mVec[0], s * mVec[1], s * mVec[2]); }
+#endif
+ __hostdev__ Vec3 operator/(__global__ const T& s) const { return (T(1) / s) * (*this); }
+ __hostdev__ __global__ Vec3& operator+=(__global__ const Vec3& v)
{
mVec[0] += v[0];
mVec[1] += v[1];
mVec[2] += v[2];
return *this;
}
- __hostdev__ Vec3& operator-=(const Vec3& v)
+ __hostdev__ __global__ Vec3& operator-=(__global__ const Vec3& v)
{
mVec[0] -= v[0];
mVec[1] -= v[1];
mVec[2] -= v[2];
return *this;
}
- __hostdev__ Vec3& operator*=(const T& s)
+ __hostdev__ __global__ Vec3& operator*=(__global__ const T& s)
{
mVec[0] *= s;
mVec[1] *= s;
mVec[2] *= s;
return *this;
}
- __hostdev__ Vec3& operator/=(const T& s) { return (*this) *= T(1) / s; }
- __hostdev__ Vec3& normalize() { return (*this) /= this->length(); }
+#if defined __KERNEL_METAL__
+ __hostdev__ __local__ Vec3& operator*=(__local__ const T& s)
+ {
+ mVec[0] *= s;
+ mVec[1] *= s;
+ mVec[2] *= s;
+ return *this;
+ }
+#endif
+ __hostdev__ __global__ Vec3& operator/=(__global__ const T& s) { return (*this) *= T(1) / s; }
+#if defined __KERNEL_METAL__
+ __hostdev__ __local__ Vec3& operator/=(__local__ const T& s) { return (*this) *= T(1) / s; }
+#endif
+ __hostdev__ __global__ Vec3& normalize() { return (*this) /= this->length(); }
/// @brief Perform a component-wise minimum with the other Coord.
- __hostdev__ Vec3& minComponent(const Vec3& other)
+ __hostdev__ __global__ Vec3& minComponent(__global__ const Vec3& other)
{
if (other[0] < mVec[0])
mVec[0] = other[0];
@@ -1248,7 +1431,7 @@ public:
}
/// @brief Perform a component-wise maximum with the other Coord.
- __hostdev__ Vec3& maxComponent(const Vec3& other)
+ __hostdev__ __global__ Vec3& maxComponent(__global__ const Vec3& other)
{
if (other[0] > mVec[0])
mVec[0] = other[0];
@@ -1274,15 +1457,29 @@ public:
}; // Vec3<T>
template<typename T1, typename T2>
-__hostdev__ inline Vec3<T2> operator*(T1 scalar, const Vec3<T2>& vec)
+__hostdev__ inline Vec3<T2> operator*(T1 scalar, __global__ const Vec3<T2>& vec)
{
return Vec3<T2>(scalar * vec[0], scalar * vec[1], scalar * vec[2]);
}
+#if defined(__KERNEL_METAL__)
template<typename T1, typename T2>
-__hostdev__ inline Vec3<T2> operator/(T1 scalar, const Vec3<T2>& vec)
+__hostdev__ inline Vec3<T2> operator*(T1 scalar, __local__ const Vec3<T2>& vec)
+{
+ return Vec3<T2>(scalar * vec[0], scalar * vec[1], scalar * vec[2]);
+}
+#endif
+template<typename T1, typename T2>
+__hostdev__ inline Vec3<T2> operator/(T1 scalar, __global__ const Vec3<T2>& vec)
{
return Vec3<T2>(scalar / vec[0], scalar / vec[1], scalar / vec[2]);
}
+#if defined(__KERNEL_METAL__)
+template<typename T1, typename T2>
+__hostdev__ inline Vec3<T2> operator/(T1 scalar, __local__ const Vec3<T2>& vec)
+{
+ return Vec3<T2>(scalar / vec[0], scalar / vec[1], scalar / vec[2]);
+}
+#endif
using Vec3R = Vec3<double>;
using Vec3d = Vec3<double>;
@@ -1304,7 +1501,7 @@ class Vec4
T mVec[4];
public:
- static const int SIZE = 4;
+ static __constant__ const int SIZE = 4;
using ValueType = T;
Vec4() = default;
__hostdev__ explicit Vec4(T x)
@@ -1316,14 +1513,14 @@ public:
{
}
template<typename T2>
- __hostdev__ explicit Vec4(const Vec4<T2>& v)
+ __hostdev__ explicit Vec4(__global__ const Vec4<T2>& v)
: mVec{T(v[0]), T(v[1]), T(v[2]), T(v[3])}
{
}
- __hostdev__ bool operator==(const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; }
- __hostdev__ bool operator!=(const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; }
+ __hostdev__ bool operator==(__global__ const Vec4& rhs) const { return mVec[0] == rhs[0] && mVec[1] == rhs[1] && mVec[2] == rhs[2] && mVec[3] == rhs[3]; }
+ __hostdev__ bool operator!=(__global__ const Vec4& rhs) const { return mVec[0] != rhs[0] || mVec[1] != rhs[1] || mVec[2] != rhs[2] || mVec[3] != rhs[3]; }
template<typename Vec4T>
- __hostdev__ Vec4& operator=(const Vec4T& rhs)
+ __hostdev__ __global__ Vec4& operator=(__global__ const Vec4T& rhs)
{
mVec[0] = rhs[0];
mVec[1] = rhs[1];
@@ -1331,23 +1528,23 @@ public:
mVec[3] = rhs[3];
return *this;
}
- __hostdev__ const T& operator[](int i) const { return mVec[i]; }
- __hostdev__ T& operator[](int i) { return mVec[i]; }
+ __hostdev__ __global__ const T& operator[](int i) const { return mVec[i]; }
+ __hostdev__ __global__ T& operator[](int i) { return mVec[i]; }
template<typename Vec4T>
- __hostdev__ T dot(const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; }
+ __hostdev__ T dot(__global__ const Vec4T& v) const { return mVec[0] * v[0] + mVec[1] * v[1] + mVec[2] * v[2] + mVec[3] * v[3]; }
__hostdev__ T lengthSqr() const
{
return mVec[0] * mVec[0] + mVec[1] * mVec[1] + mVec[2] * mVec[2] + mVec[3] * mVec[3]; // 7 flops
}
__hostdev__ T length() const { return Sqrt(this->lengthSqr()); }
__hostdev__ Vec4 operator-() const { return Vec4(-mVec[0], -mVec[1], -mVec[2], -mVec[3]); }
- __hostdev__ Vec4 operator*(const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); }
- __hostdev__ Vec4 operator/(const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); }
- __hostdev__ Vec4 operator+(const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); }
- __hostdev__ Vec4 operator-(const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); }
- __hostdev__ Vec4 operator*(const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); }
- __hostdev__ Vec4 operator/(const T& s) const { return (T(1) / s) * (*this); }
- __hostdev__ Vec4& operator+=(const Vec4& v)
+ __hostdev__ Vec4 operator*(__global__ const Vec4& v) const { return Vec4(mVec[0] * v[0], mVec[1] * v[1], mVec[2] * v[2], mVec[3] * v[3]); }
+ __hostdev__ Vec4 operator/(__global__ const Vec4& v) const { return Vec4(mVec[0] / v[0], mVec[1] / v[1], mVec[2] / v[2], mVec[3] / v[3]); }
+ __hostdev__ Vec4 operator+(__global__ const Vec4& v) const { return Vec4(mVec[0] + v[0], mVec[1] + v[1], mVec[2] + v[2], mVec[3] + v[3]); }
+ __hostdev__ Vec4 operator-(__global__ const Vec4& v) const { return Vec4(mVec[0] - v[0], mVec[1] - v[1], mVec[2] - v[2], mVec[3] - v[3]); }
+ __hostdev__ Vec4 operator*(__global__ const T& s) const { return Vec4(s * mVec[0], s * mVec[1], s * mVec[2], s * mVec[3]); }
+ __hostdev__ Vec4 operator/(__global__ const T& s) const { return (T(1) / s) * (*this); }
+ __hostdev__ __global__ Vec4& operator+=(__global__ const Vec4& v)
{
mVec[0] += v[0];
mVec[1] += v[1];
@@ -1355,7 +1552,7 @@ public:
mVec[3] += v[3];
return *this;
}
- __hostdev__ Vec4& operator-=(const Vec4& v)
+ __hostdev__ __global__ Vec4& operator-=(__global__ const Vec4& v)
{
mVec[0] -= v[0];
mVec[1] -= v[1];
@@ -1363,7 +1560,7 @@ public:
mVec[3] -= v[3];
return *this;
}
- __hostdev__ Vec4& operator*=(const T& s)
+ __hostdev__ __global__ Vec4& operator*=(__global__ const T& s)
{
mVec[0] *= s;
mVec[1] *= s;
@@ -1371,10 +1568,10 @@ public:
mVec[3] *= s;
return *this;
}
- __hostdev__ Vec4& operator/=(const T& s) { return (*this) *= T(1) / s; }
- __hostdev__ Vec4& normalize() { return (*this) /= this->length(); }
+ __hostdev__ __global__ Vec4& operator/=(__global__ const T& s) { return (*this) *= T(1) / s; }
+ __hostdev__ __global__ Vec4& normalize() { return (*this) /= this->length(); }
/// @brief Perform a component-wise minimum with the other Coord.
- __hostdev__ Vec4& minComponent(const Vec4& other)
+ __hostdev__ __global__ Vec4& minComponent(__global__ const Vec4& other)
{
if (other[0] < mVec[0])
mVec[0] = other[0];
@@ -1388,7 +1585,7 @@ public:
}
/// @brief Perform a component-wise maximum with the other Coord.
- __hostdev__ Vec4& maxComponent(const Vec4& other)
+ __hostdev__ __global__ Vec4& maxComponent(__global__ const Vec4& other)
{
if (other[0] > mVec[0])
mVec[0] = other[0];
@@ -1403,12 +1600,12 @@ public:
}; // Vec4<T>
template<typename T1, typename T2>
-__hostdev__ inline Vec4<T2> operator*(T1 scalar, const Vec4<T2>& vec)
+__hostdev__ inline Vec4<T2> operator*(T1 scalar, __global__ const Vec4<T2>& vec)
{
return Vec4<T2>(scalar * vec[0], scalar * vec[1], scalar * vec[2], scalar * vec[3]);
}
template<typename T1, typename T2>
-__hostdev__ inline Vec4<T2> operator/(T1 scalar, const Vec3<T2>& vec)
+__hostdev__ inline Vec4<T2> operator/(T1 scalar, __global__ const Vec3<T2>& vec)
{
return Vec4<T2>(scalar / vec[0], scalar / vec[1], scalar / vec[2], scalar / vec[3]);
}
@@ -1428,23 +1625,23 @@ struct TensorTraits;
template<typename T>
struct TensorTraits<T, 0>
{
- static const int Rank = 0; // i.e. scalar
- static const bool IsScalar = true;
- static const bool IsVector = false;
- static const int Size = 1;
+ static __constant__ const int Rank = 0; // i.e. scalar
+ static __constant__ const bool IsScalar = true;
+ static __constant__ const bool IsVector = false;
+ static __constant__ const int Size = 1;
using ElementType = T;
- static T scalar(const T& s) { return s; }
+ static T scalar(__global__ const T& s) { return s; }
};
template<typename T>
struct TensorTraits<T, 1>
{
- static const int Rank = 1; // i.e. vector
- static const bool IsScalar = false;
- static const bool IsVector = true;
- static const int Size = T::SIZE;
+ static __constant__ const int Rank = 1; // i.e. vector
+ static __constant__ const bool IsScalar = false;
+ static __constant__ const bool IsVector = true;
+ static __constant__ const int Size = T::SIZE;
using ElementType = typename T::ValueType;
- static ElementType scalar(const T& v) { return v.length(); }
+ static ElementType scalar(__global__ const T& v) { return v.length(); }
};
// ----------------------------> FloatTraits <--------------------------------------
@@ -1528,71 +1725,80 @@ __hostdev__ inline GridType mapToGridType()
// ----------------------------> matMult <--------------------------------------
template<typename Vec3T>
-__hostdev__ inline Vec3T matMult(const float* mat, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMult(__global__ const float* mat, __global__ const Vec3T& xyz)
{
return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], xyz[2] * mat[2])),
fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], xyz[2] * mat[5])),
fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops
}
-
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
-__hostdev__ inline Vec3T matMult(const double* mat, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMult(__global__ const float* mat, __local__ const Vec3T& xyz)
+{
+ return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], xyz[2] * mat[2])),
+ fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], xyz[2] * mat[5])),
+ fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops
+}
+#endif
+#ifndef __KERNEL_METAL__
+template<typename Vec3T>
+__hostdev__ inline Vec3T matMult(__global__ const double* mat, __global__ const Vec3T& xyz)
{
return Vec3T(fma(static_cast<double>(xyz[0]), mat[0], fma(static_cast<double>(xyz[1]), mat[1], static_cast<double>(xyz[2]) * mat[2])),
fma(static_cast<double>(xyz[0]), mat[3], fma(static_cast<double>(xyz[1]), mat[4], static_cast<double>(xyz[2]) * mat[5])),
fma(static_cast<double>(xyz[0]), mat[6], fma(static_cast<double>(xyz[1]), mat[7], static_cast<double>(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops
}
-
+#endif
template<typename Vec3T>
-__hostdev__ inline Vec3T matMult(const float* mat, const float* vec, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMult(__global__ const float* mat, __global__ const float* vec, __global__ const Vec3T& xyz)
{
return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[1], fmaf(xyz[2], mat[2], vec[0]))),
fmaf(xyz[0], mat[3], fmaf(xyz[1], mat[4], fmaf(xyz[2], mat[5], vec[1]))),
fmaf(xyz[0], mat[6], fmaf(xyz[1], mat[7], fmaf(xyz[2], mat[8], vec[2])))); // 9 fmaf = 9 flops
}
-
+#ifndef __KERNEL_METAL__
template<typename Vec3T>
-__hostdev__ inline Vec3T matMult(const double* mat, const double* vec, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMult(__global__ const double* mat, __global__ const double* vec, __global__ const Vec3T& xyz)
{
return Vec3T(fma(static_cast<double>(xyz[0]), mat[0], fma(static_cast<double>(xyz[1]), mat[1], fma(static_cast<double>(xyz[2]), mat[2], vec[0]))),
fma(static_cast<double>(xyz[0]), mat[3], fma(static_cast<double>(xyz[1]), mat[4], fma(static_cast<double>(xyz[2]), mat[5], vec[1]))),
fma(static_cast<double>(xyz[0]), mat[6], fma(static_cast<double>(xyz[1]), mat[7], fma(static_cast<double>(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops
}
-
+#endif
// matMultT: Multiply with the transpose:
template<typename Vec3T>
-__hostdev__ inline Vec3T matMultT(const float* mat, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMultT(__global__ const float* mat, __global__ const Vec3T& xyz)
{
return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[3], xyz[2] * mat[6])),
fmaf(xyz[0], mat[1], fmaf(xyz[1], mat[4], xyz[2] * mat[7])),
fmaf(xyz[0], mat[2], fmaf(xyz[1], mat[5], xyz[2] * mat[8]))); // 6 fmaf + 3 mult = 9 flops
}
-
+#ifndef __KERNEL_METAL__
template<typename Vec3T>
-__hostdev__ inline Vec3T matMultT(const double* mat, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMultT(__global__ const double* mat, __global__ const Vec3T& xyz)
{
return Vec3T(fma(static_cast<double>(xyz[0]), mat[0], fma(static_cast<double>(xyz[1]), mat[3], static_cast<double>(xyz[2]) * mat[6])),
fma(static_cast<double>(xyz[0]), mat[1], fma(static_cast<double>(xyz[1]), mat[4], static_cast<double>(xyz[2]) * mat[7])),
fma(static_cast<double>(xyz[0]), mat[2], fma(static_cast<double>(xyz[1]), mat[5], static_cast<double>(xyz[2]) * mat[8]))); // 6 fmaf + 3 mult = 9 flops
}
-
+#endif
template<typename Vec3T>
-__hostdev__ inline Vec3T matMultT(const float* mat, const float* vec, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMultT(__global__ const float* mat, __global__ const float* vec, __global__ const Vec3T& xyz)
{
return Vec3T(fmaf(xyz[0], mat[0], fmaf(xyz[1], mat[3], fmaf(xyz[2], mat[6], vec[0]))),
fmaf(xyz[0], mat[1], fmaf(xyz[1], mat[4], fmaf(xyz[2], mat[7], vec[1]))),
fmaf(xyz[0], mat[2], fmaf(xyz[1], mat[5], fmaf(xyz[2], mat[8], vec[2])))); // 9 fmaf = 9 flops
}
-
+#ifndef __KERNEL_METAL__
template<typename Vec3T>
-__hostdev__ inline Vec3T matMultT(const double* mat, const double* vec, const Vec3T& xyz)
+__hostdev__ inline Vec3T matMultT(__global__ const double* mat, __global__ const double* vec, __global__ const Vec3T& xyz)
{
return Vec3T(fma(static_cast<double>(xyz[0]), mat[0], fma(static_cast<double>(xyz[1]), mat[3], fma(static_cast<double>(xyz[2]), mat[6], vec[0]))),
fma(static_cast<double>(xyz[0]), mat[1], fma(static_cast<double>(xyz[1]), mat[4], fma(static_cast<double>(xyz[2]), mat[7], vec[1]))),
fma(static_cast<double>(xyz[0]), mat[2], fma(static_cast<double>(xyz[1]), mat[5], fma(static_cast<double>(xyz[2]), mat[8], vec[2])))); // 9 fma = 9 flops
}
-
+#endif
// ----------------------------> BBox <-------------------------------------
// Base-class for static polymorphism (cannot be constructed directly)
@@ -1600,22 +1806,34 @@ template<typename Vec3T>
struct BaseBBox
{
Vec3T mCoord[2];
- __hostdev__ bool operator==(const BaseBBox& rhs) const { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; };
- __hostdev__ bool operator!=(const BaseBBox& rhs) const { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; };
- __hostdev__ const Vec3T& operator[](int i) const { return mCoord[i]; }
- __hostdev__ Vec3T& operator[](int i) { return mCoord[i]; }
- __hostdev__ Vec3T& min() { return mCoord[0]; }
- __hostdev__ Vec3T& max() { return mCoord[1]; }
- __hostdev__ const Vec3T& min() const { return mCoord[0]; }
- __hostdev__ const Vec3T& max() const { return mCoord[1]; }
- __hostdev__ Coord& translate(const Vec3T& xyz)
+ __hostdev__ bool operator==(__global__ const BaseBBox& rhs) const __global__ { return mCoord[0] == rhs.mCoord[0] && mCoord[1] == rhs.mCoord[1]; };
+ __hostdev__ bool operator!=(__global__ const BaseBBox& rhs) const __global__ { return mCoord[0] != rhs.mCoord[0] || mCoord[1] != rhs.mCoord[1]; };
+ __hostdev__ __global__ const Vec3T& operator[](int i) const __global__ { return mCoord[i]; }
+ __hostdev__ __global__ Vec3T& operator[](int i) __global__ { return mCoord[i]; }
+ __hostdev__ __global__ Vec3T& min() __global__ { return mCoord[0]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ Vec3T& min() __local__ { return mCoord[0]; }
+#endif
+ __hostdev__ __global__ Vec3T& max() __global__ { return mCoord[1]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ Vec3T& max() __local__ { return mCoord[1]; }
+#endif
+ __hostdev__ __global__ const Vec3T& min() const __global__ { return mCoord[0]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ const Vec3T& min() const __local__ { return mCoord[0]; }
+#endif
+ __hostdev__ __global__ const Vec3T& max() const __global__ { return mCoord[1]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __local__ const Vec3T& max() const __local__ { return mCoord[1]; }
+#endif
+ __hostdev__ __global__ Coord& translate(__global__ const Vec3T& xyz) __global__
{
mCoord[0] += xyz;
mCoord[1] += xyz;
return *this;
}
// @brief Expand this bounding box to enclose point (i, j, k).
- __hostdev__ BaseBBox& expand(const Vec3T& xyz)
+ __hostdev__ __global__ BaseBBox& expand(__global__ const Vec3T& xyz) __global__
{
mCoord[0].minComponent(xyz);
mCoord[1].maxComponent(xyz);
@@ -1623,7 +1841,7 @@ struct BaseBBox
}
/// @brief Intersect this bounding box with the given bounding box.
- __hostdev__ BaseBBox& intersect(const BaseBBox& bbox)
+ __hostdev__ __global__ BaseBBox& intersect(__global__ const BaseBBox& bbox) __global__
{
mCoord[0].maxComponent(bbox.min());
mCoord[1].minComponent(bbox.max());
@@ -1634,7 +1852,7 @@ struct BaseBBox
//{
// return BaseBBox(mCoord[0].offsetBy(-padding),mCoord[1].offsetBy(padding));
//}
- __hostdev__ bool isInside(const Vec3T& xyz)
+ __hostdev__ bool isInside(__global__ const Vec3T& xyz)
{
if (xyz[0] < mCoord[0][0] || xyz[1] < mCoord[0][1] || xyz[2] < mCoord[0][2])
return false;
@@ -1642,10 +1860,20 @@ struct BaseBBox
return false;
return true;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isInside(__local__ const Vec3T& xyz)
+ {
+ if (xyz[0] < mCoord[0][0] || xyz[1] < mCoord[0][1] || xyz[2] < mCoord[0][2])
+ return false;
+ if (xyz[0] > mCoord[1][0] || xyz[1] > mCoord[1][1] || xyz[2] > mCoord[1][2])
+ return false;
+ return true;
+ }
+#endif
protected:
__hostdev__ BaseBBox() {}
- __hostdev__ BaseBBox(const Vec3T& min, const Vec3T& max)
+ __hostdev__ BaseBBox(__global__ const Vec3T& min, __global__ const Vec3T& max)
: mCoord{min, max}
{
}
@@ -1659,38 +1887,45 @@ struct BBox;
/// @note Min is inclusive and max is exclusive. If min = max the dimension of
/// the bounding box is zero and therefore it is also empty.
template<typename Vec3T>
-struct BBox<Vec3T, true> : public BaseBBox<Vec3T>
+struct BBox<Vec3T, true>
+#if !defined(__KERNEL_METAL__)
+ : public BaseBBox<Vec3T>
+#endif
{
using Vec3Type = Vec3T;
using ValueType = typename Vec3T::ValueType;
static_assert(is_floating_point<ValueType>::value, "Expected a floating point coordinate type");
using BaseT = BaseBBox<Vec3T>;
+#if defined(__KERNEL_METAL__)
+ BaseBBox<Vec3T> mCoord;
+#else
using BaseT::mCoord;
+#endif
+
__hostdev__ BBox()
: BaseT(Vec3T( Maximum<typename Vec3T::ValueType>::value()),
Vec3T(-Maximum<typename Vec3T::ValueType>::value()))
{
}
- __hostdev__ BBox(const Vec3T& min, const Vec3T& max)
+ __hostdev__ BBox(__global__ const Vec3T& min, __global__ const Vec3T& max)
: BaseT(min, max)
{
}
- __hostdev__ BBox(const Coord& min, const Coord& max)
+ __hostdev__ BBox(__global__ const Coord& min, __global__ const Coord& max)
: BaseT(Vec3T(ValueType(min[0]), ValueType(min[1]), ValueType(min[2])),
Vec3T(ValueType(max[0] + 1), ValueType(max[1] + 1), ValueType(max[2] + 1)))
{
}
- __hostdev__ static BBox createCube(const Coord& min, typename Coord::ValueType dim)
+ __hostdev__ static BBox createCube(__global__ const Coord& min, typename Coord::ValueType dim)
{
return BBox(min, min.offsetBy(dim));
}
-
- __hostdev__ BBox(const BaseBBox<Coord>& bbox) : BBox(bbox[0], bbox[1]) {}
+ __hostdev__ BBox(__global__ const BaseBBox<Coord>& bbox) __global__ : BBox(bbox[0], bbox[1]) {}
__hostdev__ bool empty() const { return mCoord[0][0] >= mCoord[1][0] ||
mCoord[0][1] >= mCoord[1][1] ||
mCoord[0][2] >= mCoord[1][2]; }
__hostdev__ Vec3T dim() const { return this->empty() ? Vec3T(0) : this->max() - this->min(); }
- __hostdev__ bool isInside(const Vec3T& p) const
+ __hostdev__ bool isInside(__global__ const Vec3T& p) const
{
return p[0] > mCoord[0][0] && p[1] > mCoord[0][1] && p[2] > mCoord[0][2] &&
p[0] < mCoord[1][0] && p[1] < mCoord[1][1] && p[2] < mCoord[1][2];
@@ -1703,24 +1938,32 @@ struct BBox<Vec3T, true> : public BaseBBox<Vec3T>
/// @note Both min and max are INCLUDED in the bbox so dim = max - min + 1. So,
/// if min = max the bounding box contains exactly one point and dim = 1!
template<typename CoordT>
-struct BBox<CoordT, false> : public BaseBBox<CoordT>
+struct BBox<CoordT, false>
+#if !defined(__KERNEL_METAL__)
+ : public BaseBBox<CoordT>
+#endif
{
+
static_assert(is_same<int, typename CoordT::ValueType>::value, "Expected \"int\" coordinate type");
using BaseT = BaseBBox<CoordT>;
+#if defined(__KERNEL_METAL__)
+ BaseBBox<CoordT> mCoord;
+#else
using BaseT::mCoord;
+#endif
/// @brief Iterator over the domain covered by a BBox
/// @details z is the fastest-moving coordinate.
class Iterator
{
- const BBox& mBBox;
+ __global__ const BBox& mBBox;
CoordT mPos;
public:
- __hostdev__ Iterator(const BBox& b)
+ __hostdev__ Iterator(__global__ const BBox& b)
: mBBox(b)
, mPos(b.min())
{
}
- __hostdev__ Iterator& operator++()
+ __hostdev__ __global__ Iterator& operator++()
{
if (mPos[2] < mBBox[1][2]) {// this is the most common case
++mPos[2];
@@ -1734,7 +1977,7 @@ struct BBox<CoordT, false> : public BaseBBox<CoordT>
}
return *this;
}
- __hostdev__ Iterator operator++(int)
+ __hostdev__ Iterator operator++(int) __global__
{
auto tmp = *this;
++(*this);
@@ -1742,20 +1985,20 @@ struct BBox<CoordT, false> : public BaseBBox<CoordT>
}
/// @brief Return @c true if the iterator still points to a valid coordinate.
__hostdev__ operator bool() const { return mPos[0] <= mBBox[1][0]; }
- __hostdev__ const CoordT& operator*() const { return mPos; }
+ __hostdev__ __global__ const CoordT& operator*() const { return mPos; }
}; // Iterator
__hostdev__ Iterator begin() const { return Iterator{*this}; }
__hostdev__ BBox()
: BaseT(CoordT::max(), CoordT::min())
{
}
- __hostdev__ BBox(const CoordT& min, const CoordT& max)
+ __hostdev__ BBox(__global__ const CoordT& min, __global__ const CoordT& max)
: BaseT(min, max)
{
}
template<typename SplitT>
- __hostdev__ BBox(BBox& other, const SplitT&)
+ __hostdev__ BBox(__global__ BBox& other, __global__ const SplitT&)
: BaseT(other.mCoord[0], other.mCoord[1])
{
NANOVDB_ASSERT(this->is_divisible());
@@ -1764,7 +2007,7 @@ struct BBox<CoordT, false> : public BaseBBox<CoordT>
other.mCoord[0][n] = mCoord[1][n] + 1;
}
- __hostdev__ static BBox createCube(const CoordT& min, typename CoordT::ValueType dim)
+ __hostdev__ static BBox createCube(__global__ const CoordT& min, typename CoordT::ValueType dim)
{
return BBox(min, min.offsetBy(dim - 1));
}
@@ -1778,15 +2021,23 @@ struct BBox<CoordT, false> : public BaseBBox<CoordT>
mCoord[0][2] > mCoord[1][2]; }
__hostdev__ CoordT dim() const { return this->empty() ? Coord(0) : this->max() - this->min() + Coord(1); }
__hostdev__ uint64_t volume() const { auto d = this->dim(); return uint64_t(d[0])*uint64_t(d[1])*uint64_t(d[2]); }
- __hostdev__ bool isInside(const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); }
- /// @brief Return @c true if the given bounding box is inside this bounding box.
- __hostdev__ bool isInside(const BBox& b) const
+ __hostdev__ bool isInside(__global__ const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isInside(__local__ const CoordT& p) const { return !(CoordT::lessThan(p, this->min()) || CoordT::lessThan(this->max(), p)); }
+#endif
+ __hostdev__ bool isInside(__global__ const BBox& b) const
{
return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max()));
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isInside(__local__ const BBox& b) const
+ {
+ return !(CoordT::lessThan(b.min(), this->min()) || CoordT::lessThan(this->max(), b.max()));
+ }
+#endif
/// @brief Return @c true if the given bounding box overlaps with this bounding box.
- __hostdev__ bool hasOverlap(const BBox& b) const
+ __hostdev__ bool hasOverlap(__global__ const BBox& b) const
{
return !(CoordT::lessThan(this->max(), b.min()) || CoordT::lessThan(b.max(), this->min()));
}
@@ -1826,6 +2077,8 @@ __hostdev__ static inline uint32_t FindLowestOn(uint32_t v)
return static_cast<uint32_t>(index);
#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS)
return static_cast<uint32_t>(__builtin_ctzl(v));
+#elif defined(__KERNEL_METAL__)
+ return ctz(v);
#else
//#warning Using software implementation for FindLowestOn(uint32_t)
static const unsigned char DeBruijn[32] = {
@@ -1856,6 +2109,8 @@ __hostdev__ static inline uint32_t FindHighestOn(uint32_t v)
return static_cast<uint32_t>(index);
#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS)
return sizeof(unsigned long) * 8 - 1 - __builtin_clzl(v);
+#elif defined(__KERNEL_METAL__)
+ return clz(v);
#else
//#warning Using software implementation for FindHighestOn(uint32_t)
static const unsigned char DeBruijn[32] = {
@@ -1884,6 +2139,8 @@ __hostdev__ static inline uint32_t FindLowestOn(uint64_t v)
return static_cast<uint32_t>(index);
#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS)
return static_cast<uint32_t>(__builtin_ctzll(v));
+#elif defined(__KERNEL_METAL__)
+ return ctz(v);
#else
//#warning Using software implementation for FindLowestOn(uint64_t)
static const unsigned char DeBruijn[64] = {
@@ -1918,6 +2175,8 @@ __hostdev__ static inline uint32_t FindHighestOn(uint64_t v)
return static_cast<uint32_t>(index);
#elif (defined(__GNUC__) || defined(__clang__)) && defined(NANOVDB_USE_INTRINSICS)
return sizeof(unsigned long) * 8 - 1 - __builtin_clzll(v);
+#elif defined(__KERNEL_METAL__)
+ return clz(v);
#else
const uint32_t* p = reinterpret_cast<const uint32_t*>(&v);
return p[1] ? 32u + FindHighestOn(p[1]) : FindHighestOn(p[0]);
@@ -1955,8 +2214,8 @@ __hostdev__ inline uint32_t CountOn(uint64_t v)
template<uint32_t LOG2DIM>
class Mask
{
- static constexpr uint32_t SIZE = 1U << (3 * LOG2DIM); // Number of bits in mask
- static constexpr uint32_t WORD_COUNT = SIZE >> 6; // Number of 64 bit words
+ static __constant__ constexpr uint32_t SIZE = 1U << (3 * LOG2DIM); // Number of bits in mask
+ static __constant__ constexpr uint32_t WORD_COUNT = SIZE >> 6; // Number of 64 bit words
uint64_t mWords[WORD_COUNT];
public:
@@ -1973,7 +2232,7 @@ public:
__hostdev__ uint32_t countOn() const
{
uint32_t sum = 0, n = WORD_COUNT;
- for (const uint64_t* w = mWords; n--; ++w)
+ for (__global__ const uint64_t* w = mWords; n--; ++w)
sum += CountOn(*w);
return sum;
}
@@ -1982,7 +2241,7 @@ public:
inline __hostdev__ uint32_t countOn(uint32_t i) const
{
uint32_t n = i >> 6, sum = CountOn( mWords[n] & ((uint64_t(1) << (i & 63u))-1u) );
- for (const uint64_t* w = mWords; n--; ++w) sum += CountOn(*w);
+ for (__global__ const uint64_t* w = mWords; n--; ++w) sum += CountOn(*w);
return sum;
}
@@ -1990,13 +2249,21 @@ public:
class Iterator
{
public:
- __hostdev__ Iterator() : mPos(Mask::SIZE), mParent(nullptr){}
- __hostdev__ Iterator(uint32_t pos, const Mask* parent) : mPos(pos), mParent(parent){}
- Iterator& operator=(const Iterator&) = default;
+ __hostdev__ Iterator()
+ : mPos(Mask::SIZE)
+ , mParent(nullptr)
+ {
+ }
+ __hostdev__ Iterator(uint32_t pos, __global__ const Mask* parent)
+ : mPos(pos)
+ , mParent(parent)
+ {
+ }
+ __global__ Iterator& operator=(__global__ const Iterator&) = default;
__hostdev__ uint32_t operator*() const { return mPos; }
__hostdev__ uint32_t pos() const { return mPos; }
__hostdev__ operator bool() const { return mPos != Mask::SIZE; }
- __hostdev__ Iterator& operator++()
+ __hostdev__ __global__ Iterator& operator++()
{
mPos = mParent->findNext<On>(mPos + 1);
return *this;
@@ -2010,7 +2277,7 @@ public:
private:
uint32_t mPos;
- const Mask* mParent;
+ __global__ const Mask* mParent;
}; // Member class Iterator
using OnIterator = Iterator<true>;
@@ -2034,7 +2301,7 @@ public:
}
/// @brief Copy constructor
- __hostdev__ Mask(const Mask& other)
+ __hostdev__ Mask(__global__ const Mask& other)
{
for (uint32_t i = 0; i < WORD_COUNT; ++i)
mWords[i] = other.mWords[i];
@@ -2042,36 +2309,36 @@ public:
/// @brief Return a const reference to the <i>n</i>th word of the bit mask, for a word of arbitrary size.
template<typename WordT>
- __hostdev__ const WordT& getWord(int n) const
+ __hostdev__ __global__ const WordT& getWord(int n) const
{
NANOVDB_ASSERT(n * 8 * sizeof(WordT) < SIZE);
- return reinterpret_cast<const WordT*>(mWords)[n];
+ return reinterpret_cast<__global__ const WordT*>(mWords)[n];
}
/// @brief Return a reference to the <i>n</i>th word of the bit mask, for a word of arbitrary size.
template<typename WordT>
- __hostdev__ WordT& getWord(int n)
+ __hostdev__ __global__ WordT& getWord(int n)
{
NANOVDB_ASSERT(n * 8 * sizeof(WordT) < SIZE);
- return reinterpret_cast<WordT*>(mWords)[n];
+ return reinterpret_cast<__global__ WordT*>(mWords)[n];
}
/// @brief Assignment operator that works with openvdb::util::NodeMask
template<typename MaskT>
- __hostdev__ Mask& operator=(const MaskT& other)
+ __hostdev__ __global__ Mask& operator=(__global__ const MaskT& other)
{
static_assert(sizeof(Mask) == sizeof(MaskT), "Mismatching sizeof");
static_assert(WORD_COUNT == MaskT::WORD_COUNT, "Mismatching word count");
static_assert(LOG2DIM == MaskT::LOG2DIM, "Mismatching LOG2DIM");
- auto *src = reinterpret_cast<const uint64_t*>(&other);
- uint64_t *dst = mWords;
+ __global__ auto *src = reinterpret_cast<__global__ const uint64_t*>(&other);
+ __global__ uint64_t *dst = mWords;
for (uint32_t i = 0; i < WORD_COUNT; ++i) {
*dst++ = *src++;
}
return *this;
}
- __hostdev__ bool operator==(const Mask& other) const
+ __hostdev__ bool operator==(__global__ const Mask& other) const
{
for (uint32_t i = 0; i < WORD_COUNT; ++i) {
if (mWords[i] != other.mWords[i]) return false;
@@ -2079,22 +2346,33 @@ public:
return true;
}
- __hostdev__ bool operator!=(const Mask& other) const { return !((*this) == other); }
+ __hostdev__ bool operator!=(__global__ const Mask& other) const { return !((*this) == other); }
/// @brief Return true if the given bit is set.
- __hostdev__ bool isOn(uint32_t n) const { return 0 != (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
-
+ __hostdev__ bool isOn(uint32_t n) const __global__ { return 0 != (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isOn(uint32_t n) const __local__ { return 0 != (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
+#endif
/// @brief Return true if the given bit is NOT set.
- __hostdev__ bool isOff(uint32_t n) const { return 0 == (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
+ __hostdev__ bool isOff(uint32_t n) const __global__ { return 0 == (mWords[n >> 6] & (uint64_t(1) << (n & 63))); }
/// @brief Return true if all the bits are set in this Mask.
- __hostdev__ bool isOn() const
+ __hostdev__ bool isOn() const __global__
{
for (uint32_t i = 0; i < WORD_COUNT; ++i)
if (mWords[i] != ~uint64_t(0))
return false;
return true;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isOn() const __local__
+ {
+ for (uint32_t i = 0; i < WORD_COUNT; ++i)
+ if (mWords[i] != ~uint64_t(0))
+ return false;
+ return true;
+ }
+#endif
/// @brief Return true if none of the bits are set in this Mask.
__hostdev__ bool isOff() const
@@ -2115,7 +2393,7 @@ public:
__hostdev__ void set(uint32_t n, bool On)
{
#if 1 // switch between branchless
- auto &word = mWords[n >> 6];
+ __global__ auto &word = mWords[n >> 6];
n &= 63;
word &= ~(uint64_t(1) << n);
word |= uint64_t(On) << n;
@@ -2149,40 +2427,40 @@ public:
__hostdev__ void toggle()
{
uint32_t n = WORD_COUNT;
- for (auto* w = mWords; n--; ++w)
+ for (__global__ auto* w = mWords; n--; ++w)
*w = ~*w;
}
__hostdev__ void toggle(uint32_t n) { mWords[n >> 6] ^= uint64_t(1) << (n & 63); }
/// @brief Bitwise intersection
- __hostdev__ Mask& operator&=(const Mask& other)
+ __hostdev__ __global__ Mask& operator&=(__global__ const Mask& other)
{
- uint64_t *w1 = mWords;
- const uint64_t *w2 = other.mWords;
+ __global__ uint64_t *w1 = mWords;
+ __global__ const uint64_t *w2 = other.mWords;
for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= *w2;
return *this;
}
/// @brief Bitwise union
- __hostdev__ Mask& operator|=(const Mask& other)
+ __hostdev__ __global__ Mask& operator|=(__global__ const Mask& other)
{
- uint64_t *w1 = mWords;
- const uint64_t *w2 = other.mWords;
+ __global__ uint64_t *w1 = mWords;
+ __global__ const uint64_t *w2 = other.mWords;
for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 |= *w2;
return *this;
}
/// @brief Bitwise difference
- __hostdev__ Mask& operator-=(const Mask& other)
+ __hostdev__ __global__ Mask& operator-=(__global__ const Mask& other)
{
- uint64_t *w1 = mWords;
- const uint64_t *w2 = other.mWords;
+ __global__ uint64_t *w1 = mWords;
+ __global__ const uint64_t *w2 = other.mWords;
for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 &= ~*w2;
return *this;
}
/// @brief Bitwise XOR
- __hostdev__ Mask& operator^=(const Mask& other)
+ __hostdev__ __global__ Mask& operator^=(__global__ const Mask& other)
{
- uint64_t *w1 = mWords;
- const uint64_t *w2 = other.mWords;
+ __global__ uint64_t *w1 = mWords;
+ __global__ const uint64_t *w2 = other.mWords;
for (uint32_t n = WORD_COUNT; n--; ++w1, ++w2) *w1 ^= *w2;
return *this;
}
@@ -2194,7 +2472,7 @@ private:
__hostdev__ uint32_t findFirst() const
{
uint32_t n = 0;
- const uint64_t* w = mWords;
+ __global__ const uint64_t* w = mWords;
for (; n<WORD_COUNT && !(On ? *w : ~*w); ++w, ++n);
return n==WORD_COUNT ? SIZE : (n << 6) + FindLowestOn(On ? *w : ~*w);
}
@@ -2233,53 +2511,73 @@ struct Map
/// @brief Initialize the member data
template<typename Mat3T, typename Vec3T>
- __hostdev__ void set(const Mat3T& mat, const Mat3T& invMat, const Vec3T& translate, double taper);
+ __hostdev__ void set(__global__ const Mat3T& mat, __global__ const Mat3T& invMat, __global__ const Vec3T& translate, double taper) __global__;
/// @brief Initialize the member data
/// @note The last (4th) row of invMat is actually ignored.
template<typename Mat4T>
- __hostdev__ void set(const Mat4T& mat, const Mat4T& invMat, double taper) {this->set(mat, invMat, mat[3], taper);}
+ __hostdev__ void set(__global__ const Mat4T& mat, __global__ const Mat4T& invMat, double taper) __global__ {this->set(mat, invMat, mat[3], taper);}
template<typename Vec3T>
- __hostdev__ void set(double scale, const Vec3T &translation, double taper);
+ __hostdev__ void set(double scale, __global__ const Vec3T &translation, double taper) __global__;
template<typename Vec3T>
- __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return matMult(mMatD, mVecD, xyz); }
+ __hostdev__ Vec3T applyMap(__global__ const Vec3T& xyz) const { return matMult(mMatD, mVecD, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyMapF(const Vec3T& xyz) const { return matMult(mMatF, mVecF, xyz); }
+ __hostdev__ Vec3T applyMapF(__global__ const Vec3T& xyz) const { return matMult(mMatF, mVecF, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyJacobian(const Vec3T& xyz) const { return matMult(mMatD, xyz); }
+ __hostdev__ Vec3T applyJacobian(__global__ const Vec3T& xyz) const { return matMult(mMatD, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyJacobianF(const Vec3T& xyz) const { return matMult(mMatF, xyz); }
+ __hostdev__ Vec3T applyJacobianF(__global__ const Vec3T& xyz) const { return matMult(mMatF, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const
+ __hostdev__ Vec3T applyInverseMap(__global__ const Vec3T& xyz) const __global__
{
return matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2]));
}
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const
+ __hostdev__ Vec3T applyInverseMap(__local__ const Vec3T& xyz) const __global__
+ {
+ return matMult(mInvMatD, Vec3T(xyz[0] - mVecD[0], xyz[1] - mVecD[1], xyz[2] - mVecD[2]));
+ }
+#endif
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyInverseMapF(const __global__ Vec3T& xyz) const __global__
{
return matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2]));
}
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyInverseMapF(const __local__ Vec3T& xyz) const __global__
+ {
+ return matMult(mInvMatF, Vec3T(xyz[0] - mVecF[0], xyz[1] - mVecF[1], xyz[2] - mVecF[2]));
+ }
+#endif
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return matMult(mInvMatD, xyz); }
+ __hostdev__ Vec3T applyInverseJacobian(__global__ const Vec3T& xyz) const __global__ { return matMult(mInvMatD, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return matMult(mInvMatF, xyz); }
+ __hostdev__ Vec3T applyInverseJacobianF(__global__ const Vec3T& xyz) const __global__ { return matMult(mInvMatF, xyz); }
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyInverseJacobianF(__local__ const Vec3T& xyz) const __global__ { return matMult(mInvMatF, xyz); }
+#endif
template<typename Vec3T>
- __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return matMultT(mInvMatD, xyz); }
+ __hostdev__ Vec3T applyIJT(__global__ const Vec3T& xyz) const { return matMultT(mInvMatD, xyz); }
template<typename Vec3T>
- __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return matMultT(mInvMatF, xyz); }
+ __hostdev__ Vec3T applyIJTF(__global__ const Vec3T& xyz) const { return matMultT(mInvMatF, xyz); }
}; // Map
template<typename Mat3T, typename Vec3T>
-__hostdev__ inline void Map::set(const Mat3T& mat, const Mat3T& invMat, const Vec3T& translate, double taper)
+__hostdev__ inline void Map::set(__global__ const Mat3T& mat, __global__ const Mat3T& invMat, __global__ const Vec3T& translate, double taper) __global__
{
- float *mf = mMatF, *vf = mVecF, *mif = mInvMatF;
- double *md = mMatD, *vd = mVecD, *mid = mInvMatD;
+ __global__ float * mf = mMatF, *vf = mVecF;
+ __global__ float* mif = mInvMatF;
+ __global__ double *md = mMatD, *vd = mVecD;
+ __global__ double* mid = mInvMatD;
mTaperF = static_cast<float>(taper);
mTaperD = taper;
for (int i = 0; i < 3; ++i) {
@@ -2295,8 +2593,19 @@ __hostdev__ inline void Map::set(const Mat3T& mat, const Mat3T& invMat, const Ve
}
template<typename Vec3T>
-__hostdev__ inline void Map::set(double dx, const Vec3T &trans, double taper)
+__hostdev__ inline void Map::set(double dx, __global__ const Vec3T &trans, double taper) __global__
{
+#if defined __KERNEL_METAL__
+ const float mat[3][3] = {
+ {(float)dx, 0.0, 0.0}, // row 0
+ {0.0, (float)dx, 0.0}, // row 1
+ {0.0, 0.0, (float)dx}, // row 2
+ }, idx = 1.0/(float)dx, invMat[3][3] = {
+ {idx, 0.0, 0.0}, // row 0
+ {0.0, idx, 0.0}, // row 1
+ {0.0, 0.0, idx}, // row 2
+ };
+#else
const double mat[3][3] = {
{dx, 0.0, 0.0}, // row 0
{0.0, dx, 0.0}, // row 1
@@ -2306,6 +2615,7 @@ __hostdev__ inline void Map::set(double dx, const Vec3T &trans, double taper)
{0.0, idx, 0.0}, // row 1
{0.0, 0.0, idx}, // row 2
};
+#endif
this->set(mat, invMat, trans, taper);
}
@@ -2313,7 +2623,7 @@ __hostdev__ inline void Map::set(double dx, const Vec3T &trans, double taper)
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData
{
- static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less!
+ static __constant__ const int MaxNameSize = 256;// due to NULL termination the maximum length is one less!
int64_t mByteOffset; // byte offset to the blind data, relative to the GridData.
uint64_t mElementCount; // number of elements, e.g. point count
uint32_t mFlags; // flags
@@ -2328,10 +2638,10 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridBlindMetaData
return blindDataCount * sizeof(GridBlindMetaData);
}
- __hostdev__ void setBlindData(void *ptr) { mByteOffset = PtrDiff(ptr, this); }
+ __hostdev__ void setBlindData(__global__ void *ptr) __global__ { mByteOffset = PtrDiff(ptr, this); }
template <typename T>
- __hostdev__ const T* getBlindData() const { return PtrAdd<T>(this, mByteOffset); }
+ __hostdev__ __global__ const T* getBlindData() const { return PtrAdd<T>(this, mByteOffset); }
}; // GridBlindMetaData
@@ -2430,7 +2740,7 @@ struct NodeTrait<const GridOrTreeOrRootT, 3>
/// @note No client code should (or can) interface with this struct so it can safely be ignored!
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData
{// sizeof(GridData) = 672B
- static const int MaxNameSize = 256;// due to NULL termination the maximum length is one less
+ static __constant__ const int MaxNameSize = 256;// due to NULL termination the maximum length is one less
uint64_t mMagic; // 8B (0) magic to validate it is valid grid data.
uint64_t mChecksum; // 8B (8). Checksum of grid buffer.
Version mVersion;// 4B (16) major, minor, and patch version numbers
@@ -2450,8 +2760,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData
uint64_t mData1, mData2;// 2x8B (656) padding to 32 B alignment. mData1 is use for the total number of values indexed by an IndexGrid
// Set and unset various bit flags
- __hostdev__ void setFlagsOff() { mFlags = uint32_t(0); }
- __hostdev__ void setMinMaxOn(bool on = true)
+ __hostdev__ void setFlagsOff() __global__ { mFlags = uint32_t(0); }
+ __hostdev__ void setMinMaxOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasMinMax);
@@ -2459,7 +2769,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData
mFlags &= ~static_cast<uint32_t>(GridFlags::HasMinMax);
}
}
- __hostdev__ void setBBoxOn(bool on = true)
+ __hostdev__ void setBBoxOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasBBox);
@@ -2467,7 +2777,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData
mFlags &= ~static_cast<uint32_t>(GridFlags::HasBBox);
}
}
- __hostdev__ void setLongGridNameOn(bool on = true)
+ __hostdev__ void setLongGridNameOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasLongGridName);
@@ -2475,7 +2785,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData
mFlags &= ~static_cast<uint32_t>(GridFlags::HasLongGridName);
}
}
- __hostdev__ void setAverageOn(bool on = true)
+ __hostdev__ void setAverageOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasAverage);
@@ -2483,7 +2793,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData
mFlags &= ~static_cast<uint32_t>(GridFlags::HasAverage);
}
}
- __hostdev__ void setStdDeviationOn(bool on = true)
+ __hostdev__ void setStdDeviationOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::HasStdDeviation);
@@ -2491,7 +2801,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData
mFlags &= ~static_cast<uint32_t>(GridFlags::HasStdDeviation);
}
}
- __hostdev__ void setBreadthFirstOn(bool on = true)
+ __hostdev__ void setBreadthFirstOn(bool on = true) __global__
{
if (on) {
mFlags |= static_cast<uint32_t>(GridFlags::IsBreadthFirst);
@@ -2502,37 +2812,49 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) GridData
// Affine transformations based on double precision
template<typename Vec3T>
- __hostdev__ Vec3T applyMap(const Vec3T& xyz) const { return mMap.applyMap(xyz); } // Pos: index -> world
+ __hostdev__ Vec3T applyMap(__global__ const Vec3T& xyz) const __global__ { return mMap.applyMap(xyz); } // Pos: index -> world
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseMap(const Vec3T& xyz) const { return mMap.applyInverseMap(xyz); } // Pos: world -> index
+ __hostdev__ Vec3T applyInverseMap(__global__ const Vec3T& xyz) const __global__ { return mMap.applyInverseMap(xyz); } // Pos: world -> index
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
- __hostdev__ Vec3T applyJacobian(const Vec3T& xyz) const { return mMap.applyJacobian(xyz); } // Dir: index -> world
+ __hostdev__ Vec3T applyInverseMap(__local__ const Vec3T& xyz) const __global__ { return mMap.applyInverseMap(xyz); } // Pos: world -> index
+#endif
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseJacobian(const Vec3T& xyz) const { return mMap.applyInverseJacobian(xyz); } // Dir: world -> index
+ __hostdev__ Vec3T applyJacobian(__global__ const Vec3T& xyz) const __global__ { return mMap.applyJacobian(xyz); } // Dir: index -> world
template<typename Vec3T>
- __hostdev__ Vec3T applyIJT(const Vec3T& xyz) const { return mMap.applyIJT(xyz); }
+ __hostdev__ Vec3T applyInverseJacobian(__global__ const Vec3T& xyz) const __global__ { return mMap.applyInverseJacobian(xyz); } // Dir: world -> index
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyIJT(__global__ const Vec3T& xyz) const __global__ { return mMap.applyIJT(xyz); }
// Affine transformations based on single precision
template<typename Vec3T>
- __hostdev__ Vec3T applyMapF(const Vec3T& xyz) const { return mMap.applyMapF(xyz); } // Pos: index -> world
+ __hostdev__ Vec3T applyMapF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyMapF(xyz); } // Pos: index -> world
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseMapF(const Vec3T& xyz) const { return mMap.applyInverseMapF(xyz); } // Pos: world -> index
+ __hostdev__ Vec3T applyInverseMapF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyInverseMapF(xyz); } // Pos: world -> index
+#if defined(__KERNEL_METAL__)
template<typename Vec3T>
- __hostdev__ Vec3T applyJacobianF(const Vec3T& xyz) const { return mMap.applyJacobianF(xyz); } // Dir: index -> world
+ __hostdev__ Vec3T applyInverseMapF(__local__ const Vec3T& xyz) const __global__ { return mMap.applyInverseMapF(xyz); } // Pos: world -> index
+#endif
template<typename Vec3T>
- __hostdev__ Vec3T applyInverseJacobianF(const Vec3T& xyz) const { return mMap.applyInverseJacobianF(xyz); } // Dir: world -> index
+ __hostdev__ Vec3T applyJacobianF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyJacobianF(xyz); } // Dir: index -> world
template<typename Vec3T>
- __hostdev__ Vec3T applyIJTF(const Vec3T& xyz) const { return mMap.applyIJTF(xyz); }
+ __hostdev__ Vec3T applyInverseJacobianF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyInverseJacobianF(xyz); } // Dir: world -> index
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyInverseJacobianF(__local__ const Vec3T& xyz) const __global__ { return mMap.applyInverseJacobianF(xyz); } // Dir: world -> index
+#endif
+ template<typename Vec3T>
+ __hostdev__ Vec3T applyIJTF(__global__ const Vec3T& xyz) const __global__ { return mMap.applyIJTF(xyz); }
// @brief Return a non-const void pointer to the tree
- __hostdev__ void* treePtr() { return this + 1; }
+ __hostdev__ __global__ void* treePtr() __global__ { return this + 1; }
// @brief Return a const void pointer to the tree
- __hostdev__ const void* treePtr() const { return this + 1; }
+ __hostdev__ __global__ const void* treePtr() const __global__ { return this + 1; }
/// @brief Returns a const reference to the blindMetaData at the specified linear offset.
///
/// @warning The linear offset is assumed to be in the valid range
- __hostdev__ const GridBlindMetaData* blindMetaData(uint32_t n) const
+ __hostdev__ __global__ const GridBlindMetaData* blindMetaData(uint32_t n) const __global__
{
NANOVDB_ASSERT(n < mBlindMetadataCount);
return PtrAdd<GridBlindMetaData>(this, mBlindMetadataOffset) + n;
@@ -2552,8 +2874,17 @@ using DefaultReadAccessor = ReadAccessor<BuildT, 0, 1, 2>;
///
/// @note This the API of this class to interface with client code
template<typename TreeT>
-class Grid : private GridData
+class Grid
+#if !defined(__KERNEL_METAL__)
+ : private GridData
+#endif
{
+#if defined(__KERNEL_METAL__)
+ GridData _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
public:
using TreeType = TreeT;
using RootType = typename TreeT::RootType;
@@ -2566,183 +2897,195 @@ public:
/// @brief Disallow constructions, copy and assignment
///
/// @note Only a Serializer, defined elsewhere, can instantiate this class
- Grid(const Grid&) = delete;
- Grid& operator=(const Grid&) = delete;
+ Grid(__global__ const Grid&) __global__ = delete;
+ __global__ Grid& operator=(__global__ const Grid&) __global__ = delete;
~Grid() = delete;
- __hostdev__ Version version() const { return DataType::mVersion; }
+ __hostdev__ Version version() const __global__ { return BASE(mVersion); }
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() const __global__ { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief Return memory usage in bytes for this class only.
__hostdev__ static uint64_t memUsage() { return sizeof(GridData); }
/// @brief Return the memory footprint of the entire grid, i.e. including all nodes and blind data
- __hostdev__ uint64_t gridSize() const { return DataType::mGridSize; }
+ __hostdev__ uint64_t gridSize() const __global__ { return BASE(mGridSize); }
/// @brief Return index of this grid in the buffer
- __hostdev__ uint32_t gridIndex() const { return DataType::mGridIndex; }
+ __hostdev__ uint32_t gridIndex() const __global__ { return BASE(mGridIndex); }
/// @brief Return total number of grids in the buffer
- __hostdev__ uint32_t gridCount() const { return DataType::mGridCount; }
+ __hostdev__ uint32_t gridCount() const __global__ { return BASE(mGridCount); }
/// @brief @brief Return the total number of values indexed by this IndexGrid
///
/// @note This method is only defined for IndexGrid = NanoGrid<ValueIndex>
template <typename T = BuildType>
- __hostdev__ typename enable_if<is_same<T, ValueIndex>::value, const uint64_t&>::type valueCount() const {return DataType::mData1;}
+ __hostdev__ typename enable_if<is_same<T, ValueIndex>::value, uint64_t>::type valueCount() const {return BASE(mData1);}
/// @brief Return a const reference to the tree
- __hostdev__ const TreeT& tree() const { return *reinterpret_cast<const TreeT*>(this->treePtr()); }
+ __hostdev__ __global__ const TreeT& tree() const __global__ { return *reinterpret_cast<__global__ const TreeT*>(BASE(treePtr)()); }
/// @brief Return a non-const reference to the tree
- __hostdev__ TreeT& tree() { return *reinterpret_cast<TreeT*>(this->treePtr()); }
+ __hostdev__ __global__ TreeT& tree() __global__ { return *reinterpret_cast<__global__ TreeT*>(BASE(treePtr)()); }
/// @brief Return a new instance of a ReadAccessor used to access values in this grid
- __hostdev__ AccessorType getAccessor() const { return AccessorType(this->tree().root()); }
+ __hostdev__ AccessorType getAccessor() const __global__ { return AccessorType(this->tree().root()); }
/// @brief Return a const reference to the size of a voxel in world units
- __hostdev__ const Vec3R& voxelSize() const { return DataType::mVoxelSize; }
+ __hostdev__ const __global__ Vec3R& voxelSize() const __global__ { return BASE(mVoxelSize); }
/// @brief Return a const reference to the Map for this grid
- __hostdev__ const Map& map() const { return DataType::mMap; }
+ __hostdev__ const __global__ Map& map() const __global__ { return BASE(mMap); }
/// @brief world to index space transformation
template<typename Vec3T>
- __hostdev__ Vec3T worldToIndex(const Vec3T& xyz) const { return this->applyInverseMap(xyz); }
+ __hostdev__ Vec3T worldToIndex(__global__ const Vec3T& xyz) const __global__ { return BASE(applyInverseMap)(xyz); }
+
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T worldToIndex(__local__ const Vec3T& xyz) const __global__ { return BASE(applyInverseMap)(xyz); }
+#endif
/// @brief index to world space transformation
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorld(const Vec3T& xyz) const { return this->applyMap(xyz); }
+ __hostdev__ Vec3T indexToWorld(__global__ const Vec3T& xyz) const __global__ { return this->applyMap(xyz); }
/// @brief transformation from index space direction to world space direction
/// @warning assumes dir to be normalized
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldDir(const Vec3T& dir) const { return this->applyJacobian(dir); }
+ __hostdev__ Vec3T indexToWorldDir(__global__ const Vec3T& dir) const __global__ { return this->applyJacobian(dir); }
/// @brief transformation from world space direction to index space direction
/// @warning assumes dir to be normalized
template<typename Vec3T>
- __hostdev__ Vec3T worldToIndexDir(const Vec3T& dir) const { return this->applyInverseJacobian(dir); }
+ __hostdev__ Vec3T worldToIndexDir(__global__ const Vec3T& dir) const __global__ { return this->applyInverseJacobian(dir); }
/// @brief transform the gradient from index space to world space.
/// @details Applies the inverse jacobian transform map.
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldGrad(const Vec3T& grad) const { return this->applyIJT(grad); }
+ __hostdev__ Vec3T indexToWorldGrad(__global__ const Vec3T& grad) const __global__ { return this->applyIJT(grad); }
/// @brief world to index space transformation
template<typename Vec3T>
- __hostdev__ Vec3T worldToIndexF(const Vec3T& xyz) const { return this->applyInverseMapF(xyz); }
+ __hostdev__ Vec3T worldToIndexF(__global__ const Vec3T& xyz) const __global__ { return BASE(applyInverseMapF)(xyz); }
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T worldToIndexF(__local__ const Vec3T& xyz) const __global__ { return BASE(applyInverseMapF)(xyz); }
+#endif
/// @brief index to world space transformation
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldF(const Vec3T& xyz) const { return this->applyMapF(xyz); }
+ __hostdev__ Vec3T indexToWorldF(__global__ const Vec3T& xyz) const __global__ { return this->applyMapF(xyz); }
/// @brief transformation from index space direction to world space direction
/// @warning assumes dir to be normalized
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldDirF(const Vec3T& dir) const { return this->applyJacobianF(dir); }
+ __hostdev__ Vec3T indexToWorldDirF(__global__ const Vec3T& dir) const __global__ { return this->applyJacobianF(dir); }
/// @brief transformation from world space direction to index space direction
/// @warning assumes dir to be normalized
template<typename Vec3T>
- __hostdev__ Vec3T worldToIndexDirF(const Vec3T& dir) const { return this->applyInverseJacobianF(dir); }
+ __hostdev__ Vec3T worldToIndexDirF(__global__ const Vec3T& dir) const __global__ { return BASE(applyInverseJacobianF)(dir); }
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ __hostdev__ Vec3T worldToIndexDirF(__local__ const Vec3T& dir) const __global__ { return BASE(applyInverseJacobianF)(dir); }
+#endif
/// @brief Transforms the gradient from index space to world space.
/// @details Applies the inverse jacobian transform map.
template<typename Vec3T>
- __hostdev__ Vec3T indexToWorldGradF(const Vec3T& grad) const { return DataType::applyIJTF(grad); }
+ __hostdev__ Vec3T indexToWorldGradF(__global__ const Vec3T& grad) const __global__ { return BASE(applyIJTF(grad)); }
/// @brief Computes a AABB of active values in world space
- __hostdev__ const BBox<Vec3R>& worldBBox() const { return DataType::mWorldBBox; }
+ __hostdev__ __global__ const BBox<Vec3R>& worldBBox() const __global__ { return BASE(mWorldBBox); }
/// @brief Computes a AABB of active values in index space
///
/// @note This method is returning a floating point bounding box and not a CoordBBox. This makes
/// it more useful for clipping rays.
- __hostdev__ const BBox<CoordType>& indexBBox() const { return this->tree().bbox(); }
+ __hostdev__ __global__ const BBox<CoordType>& indexBBox() const __global__ { return this->tree().bbox(); }
/// @brief Return the total number of active voxels in this tree.
- __hostdev__ uint64_t activeVoxelCount() const { return this->tree().activeVoxelCount(); }
+ __hostdev__ uint64_t activeVoxelCount() const __global__ { return this->tree().activeVoxelCount(); }
/// @brief Methods related to the classification of this grid
- __hostdev__ bool isValid() const { return DataType::mMagic == NANOVDB_MAGIC_NUMBER; }
- __hostdev__ const GridType& gridType() const { return DataType::mGridType; }
- __hostdev__ const GridClass& gridClass() const { return DataType::mGridClass; }
- __hostdev__ bool isLevelSet() const { return DataType::mGridClass == GridClass::LevelSet; }
- __hostdev__ bool isFogVolume() const { return DataType::mGridClass == GridClass::FogVolume; }
- __hostdev__ bool isStaggered() const { return DataType::mGridClass == GridClass::Staggered; }
- __hostdev__ bool isPointIndex() const { return DataType::mGridClass == GridClass::PointIndex; }
- __hostdev__ bool isGridIndex() const { return DataType::mGridClass == GridClass::IndexGrid; }
- __hostdev__ bool isPointData() const { return DataType::mGridClass == GridClass::PointData; }
- __hostdev__ bool isMask() const { return DataType::mGridClass == GridClass::Topology; }
- __hostdev__ bool isUnknown() const { return DataType::mGridClass == GridClass::Unknown; }
- __hostdev__ bool hasMinMax() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasMinMax); }
- __hostdev__ bool hasBBox() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasBBox); }
- __hostdev__ bool hasLongGridName() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasLongGridName); }
- __hostdev__ bool hasAverage() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasAverage); }
- __hostdev__ bool hasStdDeviation() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::HasStdDeviation); }
- __hostdev__ bool isBreadthFirst() const { return DataType::mFlags & static_cast<uint32_t>(GridFlags::IsBreadthFirst); }
+ __hostdev__ bool isValid() const __global__ { return BASE(mMagic) == NANOVDB_MAGIC_NUMBER; }
+ __hostdev__ const __global__ GridType& gridType() const __global__ { return BASE(mGridType); }
+ __hostdev__ const __global__ GridClass& gridClass() const __global__ { return BASE(mGridClass); }
+ __hostdev__ bool isLevelSet() const __global__ { return BASE(mGridClass) == GridClass::LevelSet; }
+ __hostdev__ bool isFogVolume() const __global__ { return BASE(mGridClass) == GridClass::FogVolume; }
+ __hostdev__ bool isStaggered() const __global__ { return BASE(mGridClass) == GridClass::Staggered; }
+ __hostdev__ bool isPointIndex() const __global__ { return BASE(mGridClass) == GridClass::PointIndex; }
+ __hostdev__ bool isGridIndex() const __global__ { return BASE(mGridClass) == GridClass::IndexGrid; }
+ __hostdev__ bool isPointData() const __global__ { return BASE(mGridClass) == GridClass::PointData; }
+ __hostdev__ bool isMask() const __global__ { return BASE(mGridClass) == GridClass::Topology; }
+ __hostdev__ bool isUnknown() const __global__ { return BASE(mGridClass) == GridClass::Unknown; }
+ __hostdev__ bool hasMinMax() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasMinMax); }
+ __hostdev__ bool hasBBox() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasBBox); }
+ __hostdev__ bool hasLongGridName() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasLongGridName); }
+ __hostdev__ bool hasAverage() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasAverage); }
+ __hostdev__ bool hasStdDeviation() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::HasStdDeviation); }
+ __hostdev__ bool isBreadthFirst() const __global__ { return BASE(mFlags) & static_cast<uint32_t>(GridFlags::IsBreadthFirst); }
/// @brief return true if the specified node type is layed out breadth-first in memory and has a fixed size.
/// This allows for sequential access to the nodes.
template <typename NodeT>
- __hostdev__ bool isSequential() const { return NodeT::FIXED_SIZE && this->isBreadthFirst(); }
+ __hostdev__ bool isSequential() const __global__ { return NodeT::FIXED_SIZE && this->isBreadthFirst(); }
/// @brief return true if the specified node level is layed out breadth-first in memory and has a fixed size.
/// This allows for sequential access to the nodes.
template <int LEVEL>
- __hostdev__ bool isSequential() const { return NodeTrait<TreeT,LEVEL>::type::FIXED_SIZE && this->isBreadthFirst(); }
+ __hostdev__ bool isSequential() const __global__ { return NodeTrait<TreeT,LEVEL>::type::FIXED_SIZE && this->isBreadthFirst(); }
/// @brief Return a c-string with the name of this grid
- __hostdev__ const char* gridName() const
+ __hostdev__ __global__ const char* gridName() const __global__
{
if (this->hasLongGridName()) {
NANOVDB_ASSERT(DataType::mBlindMetadataCount>0);
- const auto &metaData = this->blindMetaData(DataType::mBlindMetadataCount-1);// always the last
+ __global__ const auto &metaData = this->blindMetaData(BASE(mBlindMetadataCount)-1);// always the last
NANOVDB_ASSERT(metaData.mDataClass == GridBlindDataClass::GridName);
return metaData.template getBlindData<const char>();
}
- return DataType::mGridName;
+ return BASE(mGridName);
}
/// @brief Return a c-string with the name of this grid, truncated to 255 characters
- __hostdev__ const char* shortGridName() const { return DataType::mGridName; }
-
+ __hostdev__ __global__ const char* shortGridName() const __global__ { return BASE(mGridName); }
/// @brief Return checksum of the grid buffer.
- __hostdev__ uint64_t checksum() const { return DataType::mChecksum; }
+ __hostdev__ uint64_t checksum() const __global__ { return BASE(mChecksum); }
/// @brief Return true if this grid is empty, i.e. contains no values or nodes.
- __hostdev__ bool isEmpty() const { return this->tree().isEmpty(); }
+ __hostdev__ bool isEmpty() const __global__ { return this->tree().isEmpty(); }
/// @brief Return the count of blind-data encoded in this grid
- __hostdev__ uint32_t blindDataCount() const { return DataType::mBlindMetadataCount; }
+ __hostdev__ uint32_t blindDataCount() const __global__ { return BASE(mBlindMetadataCount); }
/// @brief Return the index of the blind data with specified semantic if found, otherwise -1.
- __hostdev__ int findBlindDataForSemantic(GridBlindDataSemantic semantic) const;
+ __hostdev__ int findBlindDataForSemantic(GridBlindDataSemantic semantic) const __global__;
/// @brief Returns a const pointer to the blindData at the specified linear offset.
///
/// @warning Point might be NULL and the linear offset is assumed to be in the valid range
- __hostdev__ const void* blindData(uint32_t n) const
+ __hostdev__ __global__ const void* blindData(uint32_t n) const __global__
{
- if (DataType::mBlindMetadataCount == 0u) {
+ if (BASE(mBlindMetadataCount) == 0u) {
return nullptr;
}
NANOVDB_ASSERT(n < DataType::mBlindMetadataCount);
return this->blindMetaData(n).template getBlindData<void>();
}
-
- __hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const { return *DataType::blindMetaData(n); }
+
+ __hostdev__ __global__ const GridBlindMetaData& blindMetaData(uint32_t n) const __global__ { return *BASE(blindMetaData)(n); }
private:
static_assert(sizeof(GridData) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(GridData) is misaligned");
}; // Class Grid
template<typename TreeT>
-__hostdev__ int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
+__hostdev__ int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const __global__
{
for (uint32_t i = 0, n = this->blindDataCount(); i < n; ++i)
if (this->blindMetaData(i).mSemantic == semantic)
@@ -2762,14 +3105,14 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) TreeData
uint64_t mVoxelCount;// 8B, total number of active voxels in the root and all its child nodes.
// No padding since it's always 32B aligned
template <typename RootT>
- __hostdev__ void setRoot(const RootT* root) { mNodeOffset[3] = PtrDiff(root, this); }
+ __hostdev__ void setRoot(__global__ const RootT* root) __global__ { mNodeOffset[3] = PtrDiff(root, this); }
template <typename RootT>
- __hostdev__ RootT* getRoot() { return PtrAdd<RootT>(this, mNodeOffset[3]); }
+ __hostdev__ __global__ RootT* getRoot() __global__ { return PtrAdd<RootT>(this, mNodeOffset[3]); }
template <typename RootT>
- __hostdev__ const RootT* getRoot() const { return PtrAdd<RootT>(this, mNodeOffset[3]); }
+ __hostdev__ __global__ const RootT* getRoot() const __global__ { return PtrAdd<RootT>(this, mNodeOffset[3]); }
template <typename NodeT>
- __hostdev__ void setFirstNode(const NodeT* node)
+ __hostdev__ void setFirstNode(__global__ const NodeT* node) __global__
{
mNodeOffset[NodeT::LEVEL] = node ? PtrDiff(node, this) : 0;
}
@@ -2795,8 +3138,17 @@ struct GridTree<const GridT>
/// @brief VDB Tree, which is a thin wrapper around a RootNode.
template<typename RootT>
-class Tree : private TreeData<RootT::LEVEL>
+class Tree
+#if !defined(__KERNEL_METAL__)
+ : private TreeData<RootT::LEVEL>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ TreeData<RootT::LEVEL> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
static_assert(RootT::LEVEL == 3, "Tree depth is not supported");
static_assert(RootT::ChildNodeType::LOG2DIM == 5, "Tree configuration is not supported");
static_assert(RootT::ChildNodeType::ChildNodeType::LOG2DIM == 4, "Tree configuration is not supported");
@@ -2817,79 +3169,86 @@ public:
using Node0 = LeafNodeType;
/// @brief This class cannot be constructed or deleted
- Tree() = delete;
- Tree(const Tree&) = delete;
- Tree& operator=(const Tree&) = delete;
- ~Tree() = delete;
+ Tree() __global__ = delete;
+ Tree(__global__ const Tree&) __global__ = delete;
+ __global__ Tree& operator=(__global__ const Tree&) __global__ = delete;
+ ~Tree() __global__ = delete;
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() const __global__ { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief return memory usage in bytes for the class
__hostdev__ static uint64_t memUsage() { return sizeof(DataType); }
- __hostdev__ RootT& root() { return *DataType::template getRoot<RootT>(); }
+ __hostdev__ __global__ RootT& root() __global__ { return *BASE(template) getRoot<RootT>(); }
- __hostdev__ const RootT& root() const { return *DataType::template getRoot<RootT>(); }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *BASE(template) getRoot<RootT>(); }
- __hostdev__ AccessorType getAccessor() const { return AccessorType(this->root()); }
+ __hostdev__ AccessorType getAccessor() const __global__ { return AccessorType(this->root()); }
/// @brief Return the value of the given voxel (regardless of state or location in the tree.)
- __hostdev__ ValueType getValue(const CoordType& ijk) const { return this->root().getValue(ijk); }
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__ { return this->root().getValue(ijk); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__ { return this->root().getValue(ijk); }
+#endif
/// @brief Return the active state of the given voxel (regardless of state or location in the tree.)
- __hostdev__ bool isActive(const CoordType& ijk) const { return this->root().isActive(ijk); }
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__ { return this->root().isActive(ijk); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __global__ { return this->root().isActive(ijk); }
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__ { return this->root().isActive(ijk); }
+#endif
/// @brief Return true if this tree is empty, i.e. contains no values or nodes
- __hostdev__ bool isEmpty() const { return this->root().isEmpty(); }
+ __hostdev__ bool isEmpty() const __global__ { return this->root().isEmpty(); }
/// @brief Combines the previous two methods in a single call
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const { return this->root().probeValue(ijk, v); }
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const { return this->root().probeValue(ijk, v); }
/// @brief Return a const reference to the background value.
- __hostdev__ const ValueType& background() const { return this->root().background(); }
+ __hostdev__ __global__ const ValueType& background() const __global__ { return this->root().background(); }
/// @brief Sets the extrema values of all the active values in this tree, i.e. in all nodes of the tree
- __hostdev__ void extrema(ValueType& min, ValueType& max) const;
+ __hostdev__ void extrema(__global__ ValueType& min, __global__ ValueType& max) const __global__;
/// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree
- __hostdev__ const BBox<CoordType>& bbox() const { return this->root().bbox(); }
+ __hostdev__ __global__ const BBox<CoordType>& bbox() const __global__ { return this->root().bbox(); }
/// @brief Return the total number of active voxels in this tree.
- __hostdev__ uint64_t activeVoxelCount() const { return DataType::mVoxelCount; }
+ __hostdev__ uint64_t activeVoxelCount() const __global__ { return BASE(mVoxelCount); }
/// @brief Return the total number of active tiles at the specified level of the tree.
///
/// @details level = 1,2,3 corresponds to active tile count in lower internal nodes, upper
/// internal nodes, and the root level. Note active values at the leaf level are
/// referred to as active voxels (see activeVoxelCount defined above).
- __hostdev__ const uint32_t& activeTileCount(uint32_t level) const
+ __hostdev__ __global__ const uint32_t& activeTileCount(uint32_t level) const __global__
{
NANOVDB_ASSERT(level > 0 && level <= 3);// 1, 2, or 3
- return DataType::mTileCount[level - 1];
+ return BASE(mTileCount)[level - 1];
}
template<typename NodeT>
- __hostdev__ uint32_t nodeCount() const
+ __hostdev__ uint32_t nodeCount() const __global__
{
static_assert(NodeT::LEVEL < 3, "Invalid NodeT");
- return DataType::mNodeCount[NodeT::LEVEL];
+ return BASE(mNodeCount)[NodeT::LEVEL];
}
- __hostdev__ uint32_t nodeCount(int level) const
+ __hostdev__ uint32_t nodeCount(int level) const __global__
{
NANOVDB_ASSERT(level < 3);
- return DataType::mNodeCount[level];
+ return BASE(mNodeCount)[level];
}
/// @brief return a pointer to the first node of the specified type
///
/// @warning Note it may return NULL if no nodes exist
template <typename NodeT>
- __hostdev__ NodeT* getFirstNode()
+ __hostdev__ __global__ NodeT* getFirstNode() __global__
{
- const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL];
+ const uint64_t offset = BASE(mNodeOffset)[NodeT::LEVEL];
return offset>0 ? PtrAdd<NodeT>(this, offset) : nullptr;
}
@@ -2897,9 +3256,9 @@ public:
///
/// @warning Note it may return NULL if no nodes exist
template <typename NodeT>
- __hostdev__ const NodeT* getFirstNode() const
+ __hostdev__ __global__ const NodeT* getFirstNode() const __global__
{
- const uint64_t offset = DataType::mNodeOffset[NodeT::LEVEL];
+ const uint64_t offset = BASE(mNodeOffset)[NodeT::LEVEL];
return offset>0 ? PtrAdd<NodeT>(this, offset) : nullptr;
}
@@ -2907,8 +3266,8 @@ public:
///
/// @warning Note it may return NULL if no nodes exist
template <int LEVEL>
- __hostdev__ typename NodeTrait<RootT, LEVEL>::type*
- getFirstNode()
+ __hostdev__ __global__ typename NodeTrait<RootT, LEVEL>::type*
+ getFirstNode() __global__
{
return this->template getFirstNode<typename NodeTrait<RootT,LEVEL>::type>();
}
@@ -2917,27 +3276,28 @@ public:
///
/// @warning Note it may return NULL if no nodes exist
template <int LEVEL>
- __hostdev__ const typename NodeTrait<RootT, LEVEL>::type*
- getFirstNode() const
+ __hostdev__ __global__ const typename NodeTrait<RootT, LEVEL>::type*
+ getFirstNode() const __global__
{
return this->template getFirstNode<typename NodeTrait<RootT,LEVEL>::type>();
}
/// @brief Template specializations of getFirstNode
- __hostdev__ LeafNodeType* getFirstLeaf() {return this->getFirstNode<LeafNodeType>();}
- __hostdev__ const LeafNodeType* getFirstLeaf() const {return this->getFirstNode<LeafNodeType>();}
- __hostdev__ typename NodeTrait<RootT, 1>::type* getFirstLower() {return this->getFirstNode<1>();}
- __hostdev__ const typename NodeTrait<RootT, 1>::type* getFirstLower() const {return this->getFirstNode<1>();}
- __hostdev__ typename NodeTrait<RootT, 2>::type* getFirstUpper() {return this->getFirstNode<2>();}
- __hostdev__ const typename NodeTrait<RootT, 2>::type* getFirstUpper() const {return this->getFirstNode<2>();}
+ __hostdev__ __global__ LeafNodeType* getFirstLeaf() {return this->getFirstNode<LeafNodeType>();}
+ __hostdev__ __global__ const LeafNodeType* getFirstLeaf() const {return this->getFirstNode<LeafNodeType>();}
+ __hostdev__ __global__ typename NodeTrait<RootT, 1>::type* getFirstLower() {return this->getFirstNode<1>();}
+ __hostdev__ __global__ const typename NodeTrait<RootT, 1>::type* getFirstLower() const {return this->getFirstNode<1>();}
+ __hostdev__ __global__ typename NodeTrait<RootT, 2>::type* getFirstUpper() {return this->getFirstNode<2>();}
+ __hostdev__ __global__ const typename NodeTrait<RootT, 2>::type* getFirstUpper() const {return this->getFirstNode<2>();}
private:
static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(TreeData) is misaligned");
+#undef BASE
}; // Tree class
template<typename RootT>
-__hostdev__ void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
+__hostdev__ void Tree<RootT>::extrema(__global__ ValueType& min, __global__ ValueType& max) const __global__
{
min = this->root().minimum();
max = this->root().maximum();
@@ -2955,13 +3315,13 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData
using BuildT = typename ChildT::BuildType;// in rare cases BuildType != ValueType, e.g. then BuildType = ValueMask and ValueType = bool
using CoordT = typename ChildT::CoordType;
using StatsT = typename ChildT::FloatType;
- static constexpr bool FIXED_SIZE = false;
+ static __constant__ constexpr bool FIXED_SIZE = false;
/// @brief Return a key based on the coordinates of a voxel
#ifdef USE_SINGLE_ROOT_KEY
using KeyT = uint64_t;
template <typename CoordType>
- __hostdev__ static KeyT CoordToKey(const CoordType& ijk)
+ __hostdev__ static KeyT CoordToKey(__global__ const CoordType& ijk)
{
static_assert(sizeof(CoordT) == sizeof(CoordType), "Mismatching sizeof");
static_assert(32 - ChildT::TOTAL <= 21, "Cannot use 64 bit root keys");
@@ -2969,17 +3329,28 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData
(KeyT(uint32_t(ijk[1]) >> ChildT::TOTAL) << 21) | // y is the middle 21 bits
(KeyT(uint32_t(ijk[0]) >> ChildT::TOTAL) << 42); // x is the upper 21 bits
}
- __hostdev__ static CoordT KeyToCoord(const KeyT& key)
+#if defined(__KERNEL_METAL__)
+ template <typename CoordType>
+ __hostdev__ static KeyT CoordToKey(__local__ const CoordType& ijk)
+ {
+ static_assert(sizeof(CoordT) == sizeof(CoordType), "Mismatching sizeof");
+ static_assert(32 - ChildT::TOTAL <= 21, "Cannot use 64 bit root keys");
+ return (KeyT(uint32_t(ijk[2]) >> ChildT::TOTAL)) | // z is the lower 21 bits
+ (KeyT(uint32_t(ijk[1]) >> ChildT::TOTAL) << 21) | // y is the middle 21 bits
+ (KeyT(uint32_t(ijk[0]) >> ChildT::TOTAL) << 42); // x is the upper 21 bits
+ }
+#endif
+ static __constant__ constexpr uint64_t MASK = (1u << 21) - 1;
+ __hostdev__ static CoordT KeyToCoord(__global__ const KeyT& key)
{
- static constexpr uint64_t MASK = (1u << 21) - 1;
return CoordT(((key >> 42) & MASK) << ChildT::TOTAL,
((key >> 21) & MASK) << ChildT::TOTAL,
(key & MASK) << ChildT::TOTAL);
}
#else
using KeyT = CoordT;
- __hostdev__ static KeyT CoordToKey(const CoordT& ijk) { return ijk & ~ChildT::MASK; }
- __hostdev__ static CoordT KeyToCoord(const KeyT& key) { return key; }
+ __hostdev__ static KeyT CoordToKey(__global__ const CoordT& ijk) { return ijk & ~ChildT::MASK; }
+ __hostdev__ static CoordT KeyToCoord(__global__ const KeyT& key) { return key; }
#endif
BBox<CoordT> mBBox; // 24B. AABB of active values in index space.
uint32_t mTableSize; // 4B. number of tiles and child pointers in the root node
@@ -3000,23 +3371,23 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) Tile
{
template <typename CoordType>
- __hostdev__ void setChild(const CoordType& k, const ChildT *ptr, const RootData *data)
+ __hostdev__ void setChild(__global__ const CoordType& k, __global__ const ChildT *ptr, __global__ const RootData *data)
{
key = CoordToKey(k);
child = PtrDiff(ptr, data);
}
template <typename CoordType, typename ValueType>
- __hostdev__ void setValue(const CoordType& k, bool s, const ValueType &v)
+ __hostdev__ void setValue(__global__ const CoordType& k, bool s, __global__ const ValueType &v)
{
key = CoordToKey(k);
state = s;
value = v;
child = 0;
}
- __hostdev__ bool isChild() const { return child!=0; }
- __hostdev__ bool isValue() const { return child==0; }
- __hostdev__ bool isActive() const { return child==0 && state; }
- __hostdev__ CoordT origin() const { return KeyToCoord(key); }
+ __hostdev__ bool isChild() const __global__ { return child!=0; }
+ __hostdev__ bool isValue() const __global__ { return child==0; }
+ __hostdev__ bool isActive() const __global__ { return child==0 && state; }
+ __hostdev__ CoordT origin() const __global__ { return KeyToCoord(key); }
KeyT key; // USE_SINGLE_ROOT_KEY ? 8B : 12B
int64_t child; // 8B. signed byte offset from this node to the child node. 0 means it is a constant tile, so use value.
uint32_t state; // 4B. state of tile value
@@ -3026,53 +3397,64 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) RootData
/// @brief Returns a non-const reference to the tile at the specified linear offset.
///
/// @warning The linear offset is assumed to be in the valid range
- __hostdev__ const Tile* tile(uint32_t n) const
+ __hostdev__ __global__ const Tile* tile(uint32_t n) const
{
NANOVDB_ASSERT(n < mTableSize);
- return reinterpret_cast<const Tile*>(this + 1) + n;
+ return reinterpret_cast<__global__ const Tile*>(this + 1) + n;
}
- __hostdev__ Tile* tile(uint32_t n)
+ __hostdev__ __global__ Tile* tile(uint32_t n)
{
NANOVDB_ASSERT(n < mTableSize);
- return reinterpret_cast<Tile*>(this + 1) + n;
+ return reinterpret_cast<__global__ Tile*>(this + 1) + n;
}
/// @brief Returns a const reference to the child node in the specified tile.
///
/// @warning A child node is assumed to exist in the specified tile
- __hostdev__ ChildT* getChild(const Tile* tile)
+ __hostdev__ __global__ ChildT* getChild(__global__ const Tile* tile) __global__
{
NANOVDB_ASSERT(tile->child);
return PtrAdd<ChildT>(this, tile->child);
}
- __hostdev__ const ChildT* getChild(const Tile* tile) const
+ __hostdev__ __global__ const ChildT* getChild(__global__ const Tile* tile) const __global__
{
NANOVDB_ASSERT(tile->child);
return PtrAdd<ChildT>(this, tile->child);
}
- __hostdev__ const ValueT& getMin() const { return mMinimum; }
- __hostdev__ const ValueT& getMax() const { return mMaximum; }
- __hostdev__ const StatsT& average() const { return mAverage; }
- __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; }
+ __hostdev__ __global__ const ValueT& getMin() const { return mMinimum; }
+ __hostdev__ __global__ const ValueT& getMax() const { return mMaximum; }
+ __hostdev__ __global__ const StatsT& average() const { return mAverage; }
+ __hostdev__ __global__ const StatsT& stdDeviation() const { return mStdDevi; }
- __hostdev__ void setMin(const ValueT& v) { mMinimum = v; }
- __hostdev__ void setMax(const ValueT& v) { mMaximum = v; }
- __hostdev__ void setAvg(const StatsT& v) { mAverage = v; }
- __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; }
+ __hostdev__ void setMin(__global__ const ValueT& v) { mMinimum = v; }
+ __hostdev__ void setMax(__global__ const ValueT& v) { mMaximum = v; }
+ __hostdev__ void setAvg(__global__ const StatsT& v) { mAverage = v; }
+ __hostdev__ void setDev(__global__ const StatsT& v) { mStdDevi = v; }
/// @brief This class cannot be constructed or deleted
RootData() = delete;
- RootData(const RootData&) = delete;
- RootData& operator=(const RootData&) = delete;
+ RootData(__global__ const RootData&) = delete;
+ __global__ RootData& operator=(__global__ const RootData&) = delete;
~RootData() = delete;
}; // RootData
/// @brief Top-most node of the VDB tree structure.
template<typename ChildT>
-class RootNode : private RootData<ChildT>
+class RootNode
+#if !defined(__KERNEL_METAL__)
+ : private RootData<ChildT>
+#endif
{
public:
+#if defined(__KERNEL_METAL__)
+
+ RootData<ChildT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
+
using DataType = RootData<ChildT>;
using LeafNodeType = typename ChildT::LeafNodeType;
using ChildNodeType = ChildT;
@@ -3086,27 +3468,27 @@ public:
using BBoxType = BBox<CoordType>;
using AccessorType = DefaultReadAccessor<BuildType>;
using Tile = typename DataType::Tile;
- static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
+ static __constant__ constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
- static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf
+ static __constant__ constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf
class ChildIterator
{
- const DataType *mParent;
- uint32_t mPos, mSize;
+ __global__ const DataType *mParent;
+ uint32_t mPos, mSize;
public:
__hostdev__ ChildIterator() : mParent(nullptr), mPos(0), mSize(0) {}
- __hostdev__ ChildIterator(const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()) {
+ __hostdev__ ChildIterator(__global__ const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()) {
NANOVDB_ASSERT(mParent);
while (mPos<mSize && !mParent->tile(mPos)->isChild()) ++mPos;
}
- ChildIterator& operator=(const ChildIterator&) = default;
- __hostdev__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(mParent->tile(mPos));}
- __hostdev__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(mParent->tile(mPos));}
+ __global__ ChildIterator& operator=(__global__ const ChildIterator&) = default;
+ __hostdev__ __global__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(mParent->tile(mPos));}
+ __hostdev__ __global__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(mParent->tile(mPos));}
__hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); mParent->tile(mPos)->origin();}
__hostdev__ operator bool() const {return mPos < mSize;}
__hostdev__ uint32_t pos() const {return mPos;}
- __hostdev__ ChildIterator& operator++() {
+ __hostdev__ __global__ ChildIterator& operator++() {
NANOVDB_ASSERT(mParent);
++mPos;
while (mPos < mSize && mParent->tile(mPos)->isValue()) ++mPos;
@@ -3123,21 +3505,21 @@ public:
class ValueIterator
{
- const DataType *mParent;
- uint32_t mPos, mSize;
+ __global__ const DataType *mParent;
+ uint32_t mPos, mSize;
public:
__hostdev__ ValueIterator() : mParent(nullptr), mPos(0), mSize(0) {}
- __hostdev__ ValueIterator(const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){
+ __hostdev__ ValueIterator(__global__ const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){
NANOVDB_ASSERT(mParent);
while (mPos < mSize && mParent->tile(mPos)->isChild()) ++mPos;
}
- ValueIterator& operator=(const ValueIterator&) = default;
+ __global__ ValueIterator& operator=(__global__ const ValueIterator&) = default;
__hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->tile(mPos)->value;}
__hostdev__ bool isActive() const {NANOVDB_ASSERT(*this); return mParent->tile(mPos)->state;}
__hostdev__ operator bool() const {return mPos < mSize;}
__hostdev__ uint32_t pos() const {return mPos;}
__hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); mParent->tile(mPos)->origin();}
- __hostdev__ ValueIterator& operator++() {
+ __hostdev__ __global__ ValueIterator& operator++() {
NANOVDB_ASSERT(mParent);
++mPos;
while (mPos < mSize && mParent->tile(mPos)->isChild()) ++mPos;
@@ -3154,20 +3536,20 @@ public:
class ValueOnIterator
{
- const DataType *mParent;
+ __global__ const DataType *mParent;
uint32_t mPos, mSize;
public:
__hostdev__ ValueOnIterator() : mParent(nullptr), mPos(0), mSize(0) {}
- __hostdev__ ValueOnIterator(const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){
+ __hostdev__ ValueOnIterator(__global__ const RootNode *parent) : mParent(parent->data()), mPos(0), mSize(parent->tileCount()){
NANOVDB_ASSERT(mParent);
while (mPos < mSize && !mParent->tile(mPos)->isActive()) ++mPos;
}
- ValueOnIterator& operator=(const ValueOnIterator&) = default;
+ __global__ ValueOnIterator& operator=(__global__ const ValueOnIterator&) = default;
__hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->tile(mPos)->value;}
__hostdev__ operator bool() const {return mPos < mSize;}
__hostdev__ uint32_t pos() const {return mPos;}
__hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); mParent->tile(mPos)->origin();}
- __hostdev__ ValueOnIterator& operator++() {
+ __hostdev__ __global__ ValueOnIterator& operator++() {
NANOVDB_ASSERT(mParent);
++mPos;
while (mPos < mSize && !mParent->tile(mPos)->isActive()) ++mPos;
@@ -3183,75 +3565,107 @@ public:
ValueOnIterator beginValueOn() const {return ValueOnIterator(this);}
/// @brief This class cannot be constructed or deleted
- RootNode() = delete;
- RootNode(const RootNode&) = delete;
- RootNode& operator=(const RootNode&) = delete;
- ~RootNode() = delete;
+ RootNode() __global__ = delete;
+ RootNode(__global__ const RootNode&) __global__ = delete;
+ __global__ RootNode& operator=(__global__ const RootNode&) __global__ = delete;
+ ~RootNode() __global__ = delete;
- __hostdev__ AccessorType getAccessor() const { return AccessorType(*this); }
+ __hostdev__ AccessorType getAccessor() const __global__ { return AccessorType(*this); }
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() const __global__ { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief Return a const reference to the index bounding box of all the active values in this tree, i.e. in all nodes of the tree
- __hostdev__ const BBoxType& bbox() const { return DataType::mBBox; }
+ __hostdev__ __global__ const BBoxType& bbox() const __global__ { return BASE(mBBox); }
/// @brief Return the total number of active voxels in the root and all its child nodes.
/// @brief Return a const reference to the background value, i.e. the value associated with
/// any coordinate location that has not been set explicitly.
- __hostdev__ const ValueType& background() const { return DataType::mBackground; }
+ __hostdev__ __global__ const ValueType& background() const __global__ { return DataType::mBackground; }
/// @brief Return the number of tiles encoded in this root node
- __hostdev__ const uint32_t& tileCount() const { return DataType::mTableSize; }
+ __hostdev__ __global__ const uint32_t& tileCount() const __global__ { return DataType::mTableSize; }
/// @brief Return a const reference to the minimum active value encoded in this root node and any of its child nodes
- __hostdev__ const ValueType& minimum() const { return this->getMin(); }
+ __hostdev__ __global__ const ValueType& minimum() const __global__ { return this->getMin(); }
/// @brief Return a const reference to the maximum active value encoded in this root node and any of its child nodes
- __hostdev__ const ValueType& maximum() const { return this->getMax(); }
+ __hostdev__ __global__ const ValueType& maximum() const __global__ { return this->getMax(); }
/// @brief Return a const reference to the average of all the active values encoded in this root node and any of its child nodes
- __hostdev__ const FloatType& average() const { return DataType::mAverage; }
+ __hostdev__ __global__ const FloatType& average() const __global__ { return DataType::mAverage; }
/// @brief Return the variance of all the active values encoded in this root node and any of its child nodes
- __hostdev__ FloatType variance() const { return DataType::mStdDevi * DataType::mStdDevi; }
+ __hostdev__ FloatType variance() const __global__ { return DataType::mStdDevi * DataType::mStdDevi; }
/// @brief Return a const reference to the standard deviation of all the active values encoded in this root node and any of its child nodes
- __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; }
+ __hostdev__ __global__ const FloatType& stdDeviation() const __global__ { return DataType::mStdDevi; }
/// @brief Return the expected memory footprint in bytes with the specified number of tiles
__hostdev__ static uint64_t memUsage(uint32_t tableSize) { return sizeof(RootNode) + tableSize * sizeof(Tile); }
/// @brief Return the actual memory footprint of this root node
- __hostdev__ uint64_t memUsage() const { return sizeof(RootNode) + DataType::mTableSize * sizeof(Tile); }
+ __hostdev__ uint64_t memUsage() const __global__ { return sizeof(RootNode) + DataType::mTableSize * sizeof(Tile); }
/// @brief Return the value of the given voxel
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
return tile->isChild() ? this->getChild(tile)->getValue(ijk) : tile->value;
}
return DataType::mBackground;
}
-
- __hostdev__ bool isActive(const CoordType& ijk) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
- return tile->isChild() ? this->getChild(tile)->isActive(ijk) : tile->state;
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? this->getChild(tile)->getValue(ijk) : tile->value;
+ }
+ return DataType::mBackground;
+ }
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __local__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? this->getChild(tile)->getValue(ijk) : tile->value;
+ }
+ return DataType::mBackground;
+ }
+#endif
+
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? BASE(getChild)(tile)->isActive(ijk) : tile->state;
}
return false;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __global__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? BASE(getChild)(tile)->isActive(ijk) : tile->state;
+ }
+ return false;
+ }
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ return tile->isChild() ? BASE(getChild)(tile)->isActive(ijk) : tile->state;
+ }
+ return false;
+ }
+#endif
/// @brief Return true if this RootNode is empty, i.e. contains no values or nodes
- __hostdev__ bool isEmpty() const { return DataType::mTableSize == uint32_t(0); }
+ __hostdev__ bool isEmpty() const __global__ { return BASE(mTableSize) == uint32_t(0); }
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = this->getChild(tile);
return child->probeValue(ijk, v);
}
v = tile->value;
@@ -3260,33 +3674,49 @@ public:
v = DataType::mBackground;
return false;
}
-
- __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool probeValue(__local__ const CoordType& ijk, __local__ ValueType& v) const __global__
{
- const Tile* tile = this->probeTile(ijk);
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ if (tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ return child->probeValue(ijk, v);
+ }
+ v = tile->value;
+ return tile->state;
+ }
+ v = BASE(mBackground);
+ return false;
+ }
+#endif
+
+ __hostdev__ __global__ const LeafNodeType* probeLeaf(__global__ const CoordType& ijk) const
+ {
+ __global__ const Tile* tile = this->probeTile(ijk);
if (tile && tile->isChild()) {
- const auto *child = this->getChild(tile);
+ const __global__ auto *child = this->getChild(tile);
return child->probeLeaf(ijk);
}
return nullptr;
}
- __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const
+ __hostdev__ __global__ const ChildNodeType* probeChild(__global__ const CoordType& ijk) const
{
- const Tile* tile = this->probeTile(ijk);
+ __global__ const Tile* tile = this->probeTile(ijk);
if (tile && tile->isChild()) {
return this->getChild(tile);
}
return nullptr;
}
+
/// @brief Find and return a Tile of this root node
- __hostdev__ const Tile* probeTile(const CoordType& ijk) const
+ __hostdev__ __global__ const Tile* probeTile(__global__ const CoordType& ijk) const __global__
{
- const Tile* tiles = reinterpret_cast<const Tile*>(this + 1);
- const auto key = DataType::CoordToKey(ijk);
+ __global__ const Tile* tiles = reinterpret_cast<__global__ const Tile*>(this + 1);
+ const auto key = BASE(CoordToKey)(ijk);
#if 1 // switch between linear and binary seach
- for (uint32_t i = 0; i < DataType::mTableSize; ++i) {
+ for (uint32_t i = 0; i < BASE(mTableSize); ++i) {
if (tiles[i].key == key) return &tiles[i];
}
#else// do not enable binary search if tiles are not guaranteed to be sorted!!!!!!
@@ -3306,6 +3736,33 @@ public:
#endif
return nullptr;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const Tile* findTile(__local__ const CoordType& ijk) const __global__
+ {
+ __global__ const Tile* tiles = reinterpret_cast<__global__ const Tile*>(this + 1);
+ const auto key = BASE(CoordToKey)(ijk);
+#if 1 // switch between linear and binary seach
+ for (uint32_t i = 0; i < BASE(mTableSize); ++i) {
+ if (tiles[i].key == key) return &tiles[i];
+ }
+#else// do not enable binary search if tiles are not guaranteed to be sorted!!!!!!
+ // binary-search of pre-sorted elements
+ int32_t low = 0, high = DataType::mTableSize; // low is inclusive and high is exclusive
+ while (low != high) {
+ int mid = low + ((high - low) >> 1);
+ const Tile* tile = &tiles[mid];
+ if (tile->key == key) {
+ return tile;
+ } else if (tile->key < key) {
+ low = mid + 1;
+ } else {
+ high = mid;
+ }
+ }
+#endif
+ return nullptr;
+ }
+#endif
private:
static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(RootData) is misaligned");
@@ -3319,12 +3776,12 @@ private:
/// @brief Private method to return node information and update a ReadAccessor
template<typename AccT>
- __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const
{
using NodeInfoT = typename AccT::NodeInfo;
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = this->getChild(tile);
acc.insert(ijk, child);
return child->getNodeInfoAndCache(ijk, acc);
}
@@ -3337,11 +3794,11 @@ private:
/// @brief Private method to return a voxel value and update a ReadAccessor
template<typename AccT>
- __hostdev__ ValueType getValueAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ ValueType getValueAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = this->getChild(tile);
acc.insert(ijk, child);
return child->getValueAndCache(ijk, acc);
}
@@ -3349,25 +3806,66 @@ private:
}
return DataType::mBackground;
}
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ ValueType getValueAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __global__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ if (tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ acc.insert(ijk, child);
+ return child->getValueAndCache(ijk, acc);
+ }
+ return tile->value;
+ }
+ return BASE(mBackground);
+ }
+ template<typename AccT>
+ __hostdev__ ValueType getValueAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __local__
+ {
+ if (__global__ const Tile* tile = this->findTile(ijk)) {
+ if (tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ acc.insert(ijk, child);
+ return child->getValueAndCache(ijk, acc);
+ }
+ return tile->value;
+ }
+ return BASE(mBackground);
+ }
+#endif
template<typename AccT>
- __hostdev__ bool isActiveAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ bool isActiveAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const
{
- const Tile* tile = this->probeTile(ijk);
+ __global__ const Tile* tile = this->probeTile(ijk);
if (tile && tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = BASE(getChild)(tile);
acc.insert(ijk, child);
return child->isActiveAndCache(ijk, acc);
}
return false;
}
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ bool isActiveAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __global__
+ {
+ __global__ const Tile* tile = this->findTile(ijk);
+ if (tile && tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ acc.insert(ijk, child);
+ return child->isActiveAndCache(ijk, acc);
+ }
+ return false;
+ }
+#endif
template<typename AccT>
- __hostdev__ bool probeValueAndCache(const CoordType& ijk, ValueType& v, const AccT& acc) const
+ __hostdev__ bool probeValueAndCache(__global__ const CoordType& ijk, __global__ ValueType& v, __global__ const AccT& acc) const
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = BASE(getChild)(tile);
acc.insert(ijk, child);
return child->probeValueAndCache(ijk, v, acc);
}
@@ -3379,11 +3877,11 @@ private:
}
template<typename AccT>
- __hostdev__ const LeafNodeType* probeLeafAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ __global__ const LeafNodeType* probeLeafAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const
{
- const Tile* tile = this->probeTile(ijk);
+ __global__ const Tile* tile = this->probeTile(ijk);
if (tile && tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = BASE(getChild)(tile);
acc.insert(ijk, child);
return child->probeLeafAndCache(ijk, acc);
}
@@ -3391,11 +3889,11 @@ private:
}
template<typename RayT, typename AccT>
- __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const
+ __hostdev__ uint32_t getDimAndCache(__global__ const CoordType& ijk, __global__ const RayT& ray, __global__ const AccT& acc) const __global__
{
- if (const Tile* tile = this->probeTile(ijk)) {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
if (tile->isChild()) {
- const auto *child = this->getChild(tile);
+ __global__ const auto *child = BASE(getChild)(tile);
acc.insert(ijk, child);
return child->getDimAndCache(ijk, ray, acc);
}
@@ -3403,7 +3901,23 @@ private:
}
return ChildNodeType::dim(); // background
}
+#if defined(__KERNEL_METAL__)
+ template<typename RayT, typename AccT>
+ __hostdev__ uint32_t getDimAndCache(__local__ const CoordType& ijk, __local__ const RayT& ray, __local__ const AccT& acc) const __global__
+ {
+ if (__global__ const Tile* tile = this->probeTile(ijk)) {
+ if (tile->isChild()) {
+ __global__ const auto *child = BASE(getChild)(tile);
+ acc.insert(ijk, child);
+ return child->getDimAndCache(ijk, ray, acc);
+ }
+ return 1 << ChildT::TOTAL; //tile value
+ }
+ return ChildNodeType::dim(); // background
+ }
+#endif
+#undef BASE
}; // RootNode class
// After the RootNode the memory layout is assumed to be the sorted Tiles
@@ -3421,7 +3935,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData
using StatsT = typename ChildT::FloatType;
using CoordT = typename ChildT::CoordType;
using MaskT = typename ChildT::template MaskType<LOG2DIM>;
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
union Tile
{
@@ -3429,8 +3943,8 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData
int64_t child;//signed 64 bit byte offset relative to the InternalData!!
/// @brief This class cannot be constructed or deleted
Tile() = delete;
- Tile(const Tile&) = delete;
- Tile& operator=(const Tile&) = delete;
+ Tile(__global__ const Tile&) = delete;
+ __global__ Tile& operator=(__global__ const Tile&) = delete;
~Tile() = delete;
};
@@ -3456,32 +3970,32 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData
__hostdev__ static uint64_t memUsage() { return sizeof(InternalData); }
- __hostdev__ void setChild(uint32_t n, const void *ptr)
+ __hostdev__ void setChild(uint32_t n, __global__ const void *ptr)
{
NANOVDB_ASSERT(mChildMask.isOn(n));
mTable[n].child = PtrDiff(ptr, this);
}
template <typename ValueT>
- __hostdev__ void setValue(uint32_t n, const ValueT &v)
+ __hostdev__ void setValue(uint32_t n, __global__ const ValueT &v)
{
NANOVDB_ASSERT(!mChildMask.isOn(n));
mTable[n].value = v;
}
/// @brief Returns a pointer to the child node at the specifed linear offset.
- __hostdev__ ChildT* getChild(uint32_t n)
+ __hostdev__ __global__ ChildT* getChild(uint32_t n) __global__
{
NANOVDB_ASSERT(mChildMask.isOn(n));
return PtrAdd<ChildT>(this, mTable[n].child);
}
- __hostdev__ const ChildT* getChild(uint32_t n) const
+ __hostdev__ __global__ const ChildT* getChild(uint32_t n) const __global__
{
NANOVDB_ASSERT(mChildMask.isOn(n));
return PtrAdd<ChildT>(this, mTable[n].child);
}
- __hostdev__ ValueT getValue(uint32_t n) const
+ __hostdev__ ValueT getValue(uint32_t n) const __global__
{
NANOVDB_ASSERT(!mChildMask.isOn(n));
return mTable[n].value;
@@ -3496,29 +4010,38 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData
__hostdev__ bool isChild(uint32_t n) const {return mChildMask.isOn(n);}
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBox[0] = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) { mBBox[0] = ijk; }
- __hostdev__ const ValueT& getMin() const { return mMinimum; }
- __hostdev__ const ValueT& getMax() const { return mMaximum; }
- __hostdev__ const StatsT& average() const { return mAverage; }
- __hostdev__ const StatsT& stdDeviation() const { return mStdDevi; }
+ __hostdev__ __global__ const ValueT& getMin() const { return mMinimum; }
+ __hostdev__ __global__ const ValueT& getMax() const { return mMaximum; }
+ __hostdev__ __global__ const StatsT& average() const { return mAverage; }
+ __hostdev__ __global__ const StatsT& stdDeviation() const { return mStdDevi; }
- __hostdev__ void setMin(const ValueT& v) { mMinimum = v; }
- __hostdev__ void setMax(const ValueT& v) { mMaximum = v; }
- __hostdev__ void setAvg(const StatsT& v) { mAverage = v; }
- __hostdev__ void setDev(const StatsT& v) { mStdDevi = v; }
+ __hostdev__ void setMin(__global__ const ValueT& v) { mMinimum = v; }
+ __hostdev__ void setMax(__global__ const ValueT& v) { mMaximum = v; }
+ __hostdev__ void setAvg(__global__ const StatsT& v) { mAverage = v; }
+ __hostdev__ void setDev(__global__ const StatsT& v) { mStdDevi = v; }
/// @brief This class cannot be constructed or deleted
InternalData() = delete;
- InternalData(const InternalData&) = delete;
- InternalData& operator=(const InternalData&) = delete;
+ InternalData(__global__ const InternalData&) = delete;
+ __global__ InternalData& operator=(__global__ const InternalData&) = delete;
~InternalData() = delete;
}; // InternalData
/// @brief Internal nodes of a VDB treedim(),
template<typename ChildT, uint32_t Log2Dim = ChildT::LOG2DIM + 1>
-class InternalNode : private InternalData<ChildT, Log2Dim>
+class InternalNode
+#if !defined(__KERNEL_METAL__)
+ : private InternalData<ChildT, Log2Dim>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ InternalData<ChildT, Log2Dim> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
public:
using DataType = InternalData<ChildT, Log2Dim>;
using ValueType = typename DataType::ValueT;
@@ -3527,76 +4050,109 @@ public:
using LeafNodeType = typename ChildT::LeafNodeType;
using ChildNodeType = ChildT;
using CoordType = typename ChildT::CoordType;
- static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
+ static __constant__ constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
template<uint32_t LOG2>
using MaskType = typename ChildT::template MaskType<LOG2>;
template<bool On>
using MaskIterT = typename Mask<Log2Dim>::template Iterator<On>;
- static constexpr uint32_t LOG2DIM = Log2Dim;
- static constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; // dimension in index space
- static constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node
- static constexpr uint32_t SIZE = 1u << (3 * LOG2DIM); // number of tile values (or child pointers)
- static constexpr uint32_t MASK = (1u << TOTAL) - 1u;
- static constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf
- static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node
+ static __constant__ constexpr uint32_t LOG2DIM = Log2Dim;
+ static __constant__ constexpr uint32_t TOTAL = LOG2DIM + ChildT::TOTAL; // dimension in index space
+ static __constant__ constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node
+ static __constant__ constexpr uint32_t SIZE = 1u << (3 * LOG2DIM); // number of tile values (or child pointers)
+ static __constant__ constexpr uint32_t MASK = (1u << TOTAL) - 1u;
+ static __constant__ constexpr uint32_t LEVEL = 1 + ChildT::LEVEL; // level 0 = leaf
+ static __constant__ constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node
/// @brief Visits child nodes of this node only
- class ChildIterator : public MaskIterT<true>
+ class ChildIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<true>
+#endif
{
+#if defined (__KERNEL_METAL__)
+ MaskIterT<true> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<true>;
- const DataType *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const DataType *mParent;
public:
__hostdev__ ChildIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ChildIterator(const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOn()), mParent(parent->data()) {}
- ChildIterator& operator=(const ChildIterator&) = default;
- __hostdev__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(BaseT::pos());}
- __hostdev__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(BaseT::pos());}
+ __hostdev__ ChildIterator(__global__ const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOn()), mParent(parent->data()) {}
+ __global__ ChildIterator& operator=(__global__ const ChildIterator&) = default;
+ __hostdev__ __global__ const ChildT& operator*() const {NANOVDB_ASSERT(*this); return *mParent->getChild(BASE(pos)());}
+ __hostdev__ __global__ const ChildT* operator->() const {NANOVDB_ASSERT(*this); return mParent->getChild(BASE(pos)());}
__hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return (*this)->origin();}
}; // Member class ChildIterator
ChildIterator beginChild() const {return ChildIterator(this);}
/// @brief Visits all tile values in this node, i.e. both inactive and active tiles
- class ValueIterator : public MaskIterT<false>
+ class ValueIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<false>
+#endif
{
+#if defined (__KERNEL_METAL__)
+ MaskIterT<false> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<false>;
- const InternalNode *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const InternalNode *mParent;
public:
__hostdev__ ValueIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ValueIterator(const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOff()), mParent(parent) {}
- ValueIterator& operator=(const ValueIterator&) = default;
- __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BaseT::pos());}
- __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BaseT::pos());}
- __hostdev__ bool isActive() const { NANOVDB_ASSERT(*this); return mParent->data()->isActive(BaseT::mPos);}
+ __hostdev__ ValueIterator(__global__ const InternalNode* parent) : BaseT(parent->data()->mChildMask.beginOff()), mParent(parent) {}
+ __global__ ValueIterator& operator=(__global__ const ValueIterator&) = default;
+ __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BASE(pos)());}
+ __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BASE(pos)());}
+ __hostdev__ bool isActive() const { NANOVDB_ASSERT(*this); return mParent->data()->isActive(BASE(mPos));}
}; // Member class ValueIterator
ValueIterator beginValue() const {return ValueIterator(this);}
/// @brief Visits active tile values of this node only
- class ValueOnIterator : public MaskIterT<true>
+ class ValueOnIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<true>
+#endif
{
+#if defined (__KERNEL_METAL__)
+ MaskIterT<true> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<true>;
- const InternalNode *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const InternalNode *mParent;
public:
__hostdev__ ValueOnIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ValueOnIterator(const InternalNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {}
- ValueOnIterator& operator=(const ValueOnIterator&) = default;
- __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BaseT::pos());}
- __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BaseT::pos());}
+ __hostdev__ ValueOnIterator(__global__ const InternalNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {}
+ __global__ ValueOnIterator& operator=(__global__ const ValueOnIterator&) = default;
+ __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->data()->getValue(BASE(pos)());}
+ __hostdev__ CoordType getOrigin() const { NANOVDB_ASSERT(*this); return mParent->localToGlobalCoord(BASE(pos)());}
}; // Member class ValueOnIterator
ValueOnIterator beginValueOn() const {return ValueOnIterator(this);}
+#if defined(__KERNEL_METAL__)
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
+
/// @brief This class cannot be constructed or deleted
- InternalNode() = delete;
- InternalNode(const InternalNode&) = delete;
- InternalNode& operator=(const InternalNode&) = delete;
+ InternalNode() __global__ = delete;
+ InternalNode(__global__ const InternalNode&) __global__ = delete;
+ __global__ InternalNode& operator=(__global__ const InternalNode&) __global__ = delete;
~InternalNode() = delete;
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() const __global__ { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief Return the dimension, in voxel units, of this internal node (typically 8*16 or 8*16*32)
__hostdev__ static uint32_t dim() { return 1u << TOTAL; }
@@ -3605,47 +4161,66 @@ public:
__hostdev__ static size_t memUsage() { return DataType::memUsage(); }
/// @brief Return a const reference to the bit mask of active voxels in this internal node
- __hostdev__ const MaskType<LOG2DIM>& valueMask() const { return DataType::mValueMask; }
+ __hostdev__ __global__ const MaskType<LOG2DIM>& valueMask() const __global__ { return BASE(mValueMask); }
/// @brief Return a const reference to the bit mask of child nodes in this internal node
- __hostdev__ const MaskType<LOG2DIM>& childMask() const { return DataType::mChildMask; }
+ __hostdev__ __global__ const MaskType<LOG2DIM>& childMask() const __global__ { return DataType::mChildMask; }
/// @brief Return the origin in index space of this leaf node
- __hostdev__ CoordType origin() const { return DataType::mBBox.min() & ~MASK; }
+ __hostdev__ CoordType origin() const __global__ { return DataType::mBBox.min() & ~MASK; }
/// @brief Return a const reference to the minimum active value encoded in this internal node and any of its child nodes
- __hostdev__ const ValueType& minimum() const { return this->getMin(); }
+ __hostdev__ __global__ const ValueType& minimum() const __global__ { return this->getMin(); }
/// @brief Return a const reference to the maximum active value encoded in this internal node and any of its child nodes
- __hostdev__ const ValueType& maximum() const { return this->getMax(); }
+ __hostdev__ __global__ const ValueType& maximum() const __global__ { return this->getMax(); }
/// @brief Return a const reference to the average of all the active values encoded in this internal node and any of its child nodes
- __hostdev__ const FloatType& average() const { return DataType::mAverage; }
+ __hostdev__ __global__ const FloatType& average() const __global__ { return DataType::mAverage; }
/// @brief Return the variance of all the active values encoded in this internal node and any of its child nodes
- __hostdev__ FloatType variance() const { return DataType::mStdDevi*DataType::mStdDevi; }
+ __hostdev__ FloatType variance() const __global__ { return DataType::mStdDevi*DataType::mStdDevi; }
/// @brief Return a const reference to the standard deviation of all the active values encoded in this internal node and any of its child nodes
- __hostdev__ const FloatType& stdDeviation() const { return DataType::mStdDevi; }
+ __hostdev__ __global__ const FloatType& stdDeviation() const __global__ { return DataType::mStdDevi; }
/// @brief Return a const reference to the bounding box in index space of active values in this internal node and any of its child nodes
- __hostdev__ const BBox<CoordType>& bbox() const { return DataType::mBBox; }
+ __hostdev__ __global__ const BBox<CoordType>& bbox() const __global__ { return DataType::mBBox; }
/// @brief Return the value of the given voxel
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
const uint32_t n = CoordToOffset(ijk);
return DataType::mChildMask.isOn(n) ? this->getChild(n)->getValue(ijk) : DataType::getValue(n);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ return DataType::mChildMask.isOn(n) ? this->getChild(n)->getValue(ijk) : DataType::mTable[n].value;
+ }
+#endif
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
{
const uint32_t n = CoordToOffset(ijk);
return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::isActive(n);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::isActive(n);
+ }
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ return DataType::mChildMask.isOn(n) ? this->getChild(n)->isActive(ijk) : DataType::isActive(n);
+ }
+#endif
/// @brief return the state and updates the value of the specified voxel
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (DataType::mChildMask.isOn(n))
@@ -3653,8 +4228,18 @@ public:
v = DataType::getValue(n);
return DataType::isActive(n);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool probeValue(__local__ const CoordType& ijk, __local__ ValueType& v) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ if (DataType::mChildMask.isOn(n))
+ return this->getChild(n)->probeValue(ijk, v);
+ v = DataType::getValue(n);
+ return DataType::isActive(n);
+ }
+#endif
- __hostdev__ const LeafNodeType* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafNodeType* probeLeaf(__global__ const CoordType& ijk) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (DataType::mChildMask.isOn(n))
@@ -3662,14 +4247,14 @@ public:
return nullptr;
}
- __hostdev__ const ChildNodeType* probeChild(const CoordType& ijk) const
+ __hostdev__ __global__ const ChildNodeType* probeChild(__global__ const CoordType& ijk) const __global__
{
const uint32_t n = CoordToOffset(ijk);
return DataType::mChildMask.isOn(n) ? this->getChild(n) : nullptr;
}
/// @brief Return the linear offset corresponding to the given coordinate
- __hostdev__ static uint32_t CoordToOffset(const CoordType& ijk)
+ __hostdev__ static uint32_t CoordToOffset(__global__ const CoordType& ijk)
{
#if 0
return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) +
@@ -3681,6 +4266,20 @@ public:
((ijk[2] & MASK) >> ChildT::TOTAL);
#endif
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ static uint32_t CoordToOffset(__local__ const CoordType& ijk)
+ {
+#if 0
+ return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) +
+ (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) +
+ ((ijk[2] & MASK) >> ChildT::TOTAL);
+#else
+ return (((ijk[0] & MASK) >> ChildT::TOTAL) << (2 * LOG2DIM)) |
+ (((ijk[1] & MASK) >> ChildT::TOTAL) << (LOG2DIM)) |
+ ((ijk[2] & MASK) >> ChildT::TOTAL);
+#endif
+ }
+#endif
/// @return the local coordinate of the n'th tile or child node
__hostdev__ static Coord OffsetToLocalCoord(uint32_t n)
@@ -3691,13 +4290,13 @@ public:
}
/// @brief modifies local coordinates to global coordinates of a tile or child node
- __hostdev__ void localToGlobalCoord(Coord& ijk) const
+ __hostdev__ void localToGlobalCoord(__global__ Coord& ijk) const __global__
{
ijk <<= ChildT::TOTAL;
ijk += this->origin();
}
- __hostdev__ Coord offsetToGlobalCoord(uint32_t n) const
+ __hostdev__ Coord offsetToGlobalCoord(uint32_t n) const __global__
{
Coord ijk = InternalNode::OffsetToLocalCoord(n);
this->localToGlobalCoord(ijk);
@@ -3705,13 +4304,24 @@ public:
}
/// @brief Return true if this node or any of its child nodes contain active values
- __hostdev__ bool isActive() const
+ __hostdev__ bool isActive() const __global__
{
return DataType::mFlags & uint32_t(2);
}
+#if defined(__KERNEL_METAL__)
+ /// @brief Retrun true if this node or any of its child nodes contain active values
+ __hostdev__ bool isActive() const __local__
+ {
+ return DataType::mFlags & uint32_t(2);
+ }
+#endif
private:
+#if !defined(__KERNEL_METAL__)
static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(InternalData) is misaligned");
+#else
+ static_assert(sizeof(_base) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(InternalData) is misaligned");
+#endif
//static_assert(offsetof(DataType, mTable) % 32 == 0, "InternalData::mTable is misaligned");
template<typename, int, int, int>
@@ -3724,18 +4334,30 @@ private:
/// @brief Private read access method used by the ReadAccessor
template<typename AccT>
- __hostdev__ ValueType getValueAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ ValueType getValueAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (!DataType::mChildMask.isOn(n))
- return DataType::getValue(n);
- const ChildT* child = this->getChild(n);
+ return BASE(getValue)(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->getValueAndCache(ijk, acc);
}
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ ValueType getValueAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ if (!BASE(mChildMask).isOn(n))
+ return BASE(getValue)(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
+ acc.insert(ijk, child);
+ return child->getValueAndCache(ijk, acc);
+ }
+#endif
template<typename AccT>
- __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
using NodeInfoT = typename AccT::NodeInfo;
const uint32_t n = CoordToOffset(ijk);
@@ -3743,61 +4365,91 @@ private:
return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(), this->average(),
this->stdDeviation(), this->bbox()[0], this->bbox()[1]};
}
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->getNodeInfoAndCache(ijk, acc);
}
template<typename AccT>
- __hostdev__ bool isActiveAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ bool isActiveAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (!DataType::mChildMask.isOn(n))
return DataType::isActive(n);
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->isActiveAndCache(ijk, acc);
}
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ bool isActiveAndCache(__local__ const CoordType& ijk, __local__ const AccT& acc) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ if (!BASE(mChildMask).isOn(n))
+ return BASE(mValueMask).isOn(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
+ acc.insert(ijk, child);
+ return child->isActiveAndCache(ijk, acc);
+ }
+#endif
template<typename AccT>
- __hostdev__ bool probeValueAndCache(const CoordType& ijk, ValueType& v, const AccT& acc) const
+ __hostdev__ bool probeValueAndCache(__global__ const CoordType& ijk, __global__ ValueType& v, __global__ const AccT& acc) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (!DataType::mChildMask.isOn(n)) {
v = DataType::getValue(n);
return DataType::isActive(n);
}
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->probeValueAndCache(ijk, v, acc);
}
template<typename AccT>
- __hostdev__ const LeafNodeType* probeLeafAndCache(const CoordType& ijk, const AccT& acc) const
+ __hostdev__ __global__ const LeafNodeType* probeLeafAndCache(__global__ const CoordType& ijk, __global__ const AccT& acc) const __global__
{
const uint32_t n = CoordToOffset(ijk);
if (!DataType::mChildMask.isOn(n))
return nullptr;
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->probeLeafAndCache(ijk, acc);
}
template<typename RayT, typename AccT>
- __hostdev__ uint32_t getDimAndCache(const CoordType& ijk, const RayT& ray, const AccT& acc) const
+ __hostdev__ uint32_t getDimAndCache(__global__ const CoordType& ijk, __global__ const RayT& ray, __global__ const AccT& acc) const __global__
{
if (DataType::mFlags & uint32_t(1u)) return this->dim(); // skip this node if the 1st bit is set
//if (!ray.intersects( this->bbox() )) return 1<<TOTAL;
const uint32_t n = CoordToOffset(ijk);
if (DataType::mChildMask.isOn(n)) {
- const ChildT* child = this->getChild(n);
+ __global__ const ChildT* child = BASE(getChild)(n);
acc.insert(ijk, child);
return child->getDimAndCache(ijk, ray, acc);
}
return ChildNodeType::dim(); // tile value
}
+#if defined(__KERNEL_METAL__)
+ template<typename RayT, typename AccT>
+ __hostdev__ uint32_t getDimAndCache(__local__ const CoordType& ijk, __local__ const RayT& ray, __local__ const AccT& acc) const __global__
+ {
+ if (BASE(mFlags) & uint32_t(1))
+ this->dim(); //ship this node if first bit is set
+ //if (!ray.intersects( this->bbox() )) return 1<<TOTAL;
+ const uint32_t n = CoordToOffset(ijk);
+ if (BASE(mChildMask).isOn(n)) {
+ __global__ const ChildT* child = BASE(getChild)(n);
+ acc.insert(ijk, child);
+ return child->getDimAndCache(ijk, ray, acc);
+ }
+ return ChildNodeType::dim(); // tile value
+ }
+#endif
+
+#undef BASE
}; // InternalNode class
// --------------------------> LeafNode <------------------------------------
@@ -3814,7 +4466,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData
using BuildType = ValueT;
using FloatType = typename FloatTraits<ValueT>::FloatType;
using ArrayType = ValueT;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
CoordT mBBoxMin; // 12B.
uint8_t mBBoxDif[3]; // 3B.
@@ -3826,7 +4478,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData
FloatType mAverage; // typically 4B, average of all the active values in this node and its child nodes
FloatType mStdDevi; // typically 4B, standard deviation of all the active values in this node and its child nodes
alignas(32) ValueType mValues[1u << 3 * LOG2DIM];
-
+
/// @brief Return padding of this class in bytes, due to aliasing and 32B alignment
///
/// @note The extra bytes are not necessarily at the end, but can come from aliasing of individual data members.
@@ -3838,32 +4490,35 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData
__hostdev__ static uint64_t memUsage() { return sizeof(LeafData); }
//__hostdev__ const ValueType* values() const { return mValues; }
- __hostdev__ ValueType getValue(uint32_t i) const { return mValues[i]; }
- __hostdev__ void setValueOnly(uint32_t offset, const ValueType& value) { mValues[offset] = value; }
- __hostdev__ void setValue(uint32_t offset, const ValueType& value)
+ __hostdev__ ValueType getValue(uint32_t i) const __global__ { return mValues[i]; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(uint32_t i) const __local__ { return mValues[i]; }
+#endif
+ __hostdev__ void setValueOnly(uint32_t offset, __global__ const ValueType& value) __global__ { mValues[offset] = value; }
+ __hostdev__ void setValue(uint32_t offset, __global__ const ValueType& value) __global__
{
mValueMask.setOn(offset);
mValues[offset] = value;
}
- __hostdev__ ValueType getMin() const { return mMinimum; }
- __hostdev__ ValueType getMax() const { return mMaximum; }
- __hostdev__ FloatType getAvg() const { return mAverage; }
- __hostdev__ FloatType getDev() const { return mStdDevi; }
+ __hostdev__ ValueType getMin() const __global__ { return mMinimum; }
+ __hostdev__ ValueType getMax() const __global__ { return mMaximum; }
+ __hostdev__ FloatType getAvg() const __global__ { return mAverage; }
+ __hostdev__ FloatType getDev() const __global__ { return mStdDevi; }
- __hostdev__ void setMin(const ValueType& v) { mMinimum = v; }
- __hostdev__ void setMax(const ValueType& v) { mMaximum = v; }
- __hostdev__ void setAvg(const FloatType& v) { mAverage = v; }
- __hostdev__ void setDev(const FloatType& v) { mStdDevi = v; }
+ __hostdev__ void setMin(__global__ const ValueType& v) __global__ { mMinimum = v; }
+ __hostdev__ void setMax(__global__ const ValueType& v) __global__ { mMaximum = v; }
+ __hostdev__ void setAvg(__global__ const FloatType& v) __global__ { mAverage = v; }
+ __hostdev__ void setDev(__global__ const FloatType& v) __global__ { mStdDevi = v; }
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) __global__ { mBBoxMin = ijk; }
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
}; // LeafData<ValueT>
/// @brief Base-class for quantized float leaf nodes
@@ -3892,39 +4547,39 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafFnBase
__hostdev__ static constexpr uint32_t padding() {
return sizeof(LeafFnBase) - (12 + 3 + 1 + sizeof(MaskT<LOG2DIM>) + 2*4 + 4*2);
}
- __hostdev__ void init(float min, float max, uint8_t bitWidth)
+ __hostdev__ void init(float min, float max, uint8_t bitWidth) __global__
{
mMinimum = min;
mQuantum = (max - min)/float((1 << bitWidth)-1);
}
/// @brief return the quantized minimum of the active values in this node
- __hostdev__ float getMin() const { return mMin*mQuantum + mMinimum; }
+ __hostdev__ float getMin() const __global__ { return mMin*mQuantum + mMinimum; }
/// @brief return the quantized maximum of the active values in this node
- __hostdev__ float getMax() const { return mMax*mQuantum + mMinimum; }
+ __hostdev__ float getMax() const __global__ { return mMax*mQuantum + mMinimum; }
/// @brief return the quantized average of the active values in this node
- __hostdev__ float getAvg() const { return mAvg*mQuantum + mMinimum; }
+ __hostdev__ float getAvg() const __global__ { return mAvg*mQuantum + mMinimum; }
/// @brief return the quantized standard deviation of the active values in this node
/// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1
- __hostdev__ float getDev() const { return mDev*mQuantum; }
+ __hostdev__ float getDev() const __global__ { return mDev*mQuantum; }
/// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1
- __hostdev__ void setMin(float min) { mMin = uint16_t((min - mMinimum)/mQuantum + 0.5f); }
+ __hostdev__ void setMin(float min) __global__ { mMin = uint16_t((min - mMinimum)/mQuantum + 0.5f); }
/// @note min <= X <= max or 0 <= (X-min)/(min-max) <= 1
- __hostdev__ void setMax(float max) { mMax = uint16_t((max - mMinimum)/mQuantum + 0.5f); }
+ __hostdev__ void setMax(float max) __global__ { mMax = uint16_t((max - mMinimum)/mQuantum + 0.5f); }
/// @note min <= avg <= max or 0 <= (avg-min)/(min-max) <= 1
- __hostdev__ void setAvg(float avg) { mAvg = uint16_t((avg - mMinimum)/mQuantum + 0.5f); }
+ __hostdev__ void setAvg(float avg) __global__ { mAvg = uint16_t((avg - mMinimum)/mQuantum + 0.5f); }
/// @note 0 <= StdDev <= max-min or 0 <= StdDev/(max-min) <= 1
- __hostdev__ void setDev(float dev) { mDev = uint16_t(dev/mQuantum + 0.5f); }
+ __hostdev__ void setDev(float dev) __global__ { mDev = uint16_t(dev/mQuantum + 0.5f); }
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) __global__ { mBBoxMin = ijk; }
};// LeafFnBase
/// @brief Stuct with all the member data of the LeafNode (useful during serialization of an openvdb LeafNode)
@@ -3932,12 +4587,24 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafFnBase
/// @note No client code should (or can) interface with this struct so it can safely be ignored!
template<typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<Fp4, CoordT, MaskT, LOG2DIM>
+#if !defined(__KERNEL_METAL__)
: public LeafFnBase<CoordT, MaskT, LOG2DIM>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ LeafFnBase<CoordT, MaskT, LOG2DIM> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
using BaseT = LeafFnBase<CoordT, MaskT, LOG2DIM>;
using BuildType = Fp4;
using ArrayType = uint8_t;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+#if defined(__KERNEL_METAL__)
+ using ValueType = typename BaseT::ValueType;
+ using FloatType = typename BaseT::FloatType;
+#endif
+ static __constant__ constexpr bool FIXED_SIZE = true;
alignas(32) uint8_t mCode[1u << (3 * LOG2DIM - 1)];// LeafFnBase is 32B aligned and so is mCode
__hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); }
@@ -3947,31 +4614,53 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<Fp4, CoordT, MaskT, LOG2DI
}
__hostdev__ static constexpr uint8_t bitWidth() { return 4u; }
- __hostdev__ float getValue(uint32_t i) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ float getValue(uint32_t i) const __global__
{
#if 0
const uint8_t c = mCode[i>>1];
return ( (i&1) ? c >> 4 : c & uint8_t(15) )*BaseT::mQuantum + BaseT::mMinimum;
#else
- return ((mCode[i>>1] >> ((i&1)<<2)) & uint8_t(15))*BaseT::mQuantum + BaseT::mMinimum;
+ return ((mCode[i>>1] >> ((i&1)<<2)) & uint8_t(15))*BASE(mQuantum) + BASE(mMinimum);
#endif
}
+#endif
+#if defined(__KERNEL_METAL__)
+__hostdev__ float getValue(uint32_t i) const __local__
+ {
+#if 0
+ const uint8_t c = mCode[i>>1];
+ return ( (i&1) ? c >> 4 : c & uint8_t(15) )*BaseT::mQuantum + BaseT::mMinimum;
+#else
+ return ((mCode[i>>1] >> ((i&1)<<2)) & uint8_t(15))*BASE(mQuantum) + BASE(mMinimum);
+#endif
+ }
+#endif
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
+#undef BASE
}; // LeafData<Fp4>
template<typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<Fp8, CoordT, MaskT, LOG2DIM>
+#if !defined(__KERNEL_METAL__)
: public LeafFnBase<CoordT, MaskT, LOG2DIM>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ LeafFnBase<CoordT, MaskT, LOG2DIM> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
using BaseT = LeafFnBase<CoordT, MaskT, LOG2DIM>;
using BuildType = Fp8;
using ArrayType = uint8_t;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
alignas(32) uint8_t mCode[1u << 3 * LOG2DIM];
__hostdev__ static constexpr int64_t memUsage() { return sizeof(LeafData); }
__hostdev__ static constexpr uint32_t padding() {
@@ -3980,25 +4669,44 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<Fp8, CoordT, MaskT, LOG2DI
}
__hostdev__ static constexpr uint8_t bitWidth() { return 8u; }
- __hostdev__ float getValue(uint32_t i) const
+ __hostdev__ float getValue(uint32_t i) const __global__
{
return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/255 + min
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ float getValue(uint32_t i) const __local__
+ {
+ return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/255 + min
+ }
+#endif
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
+#undef BASE
}; // LeafData<Fp8>
template<typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<Fp16, CoordT, MaskT, LOG2DIM>
+#if !defined(__KERNEL_METAL__)
: public LeafFnBase<CoordT, MaskT, LOG2DIM>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ LeafFnBase<CoordT, MaskT, LOG2DIM> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
using BaseT = LeafFnBase<CoordT, MaskT, LOG2DIM>;
using BuildType = Fp16;
using ArrayType = uint16_t;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+#if defined(__KERNEL_METAL__)
+ using ValueType = typename BaseT::ValueType;
+ using FloatType = typename BaseT::FloatType;
+#endif
+ static __constant__ constexpr bool FIXED_SIZE = true;
alignas(32) uint16_t mCode[1u << 3 * LOG2DIM];
__hostdev__ static constexpr uint64_t memUsage() { return sizeof(LeafData); }
@@ -4008,35 +4716,93 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<Fp16, CoordT, MaskT, LOG2D
}
__hostdev__ static constexpr uint8_t bitWidth() { return 16u; }
- __hostdev__ float getValue(uint32_t i) const
+ __hostdev__ float getValue(uint32_t i) const __global__
{
- return mCode[i]*BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/65535 + min
+ return mCode[i]*BASE(mQuantum) + BASE(mMinimum);// code * (max-min)/65535 + min
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ float getValue(uint32_t i) const __local__
+ {
+ return mCode[i]*BaseT::Quantum + BaseT::mMinimum;// code * (max-min)/65535 + min
+ }
+#endif
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
+#undef BASE
}; // LeafData<Fp16>
template<typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<FpN, CoordT, MaskT, LOG2DIM>
+#if !defined(__KERNEL_METAL__)
: public LeafFnBase<CoordT, MaskT, LOG2DIM>
+#endif
{// this class has no data members, however every instance is immediately followed
// bitWidth*64 bytes. Since its base class is 32B aligned so are the bitWidth*64 bytes
+#if defined(__KERNEL_METAL__)
+ LeafFnBase<CoordT, MaskT, LOG2DIM> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
using BaseT = LeafFnBase<CoordT, MaskT, LOG2DIM>;
using BuildType = FpN;
- static constexpr bool FIXED_SIZE = false;
+ static __constant__ constexpr bool FIXED_SIZE = false;
+#if defined(__KERNEL_METAL__)
+ using ValueType = typename BaseT::ValueType;
+ using FloatType = typename BaseT::FloatType;
+#endif
__hostdev__ static constexpr uint32_t padding() {
static_assert(BaseT::padding()==0, "expected no padding in LeafFnBase");
return 0;
}
- __hostdev__ uint8_t bitWidth() const { return 1 << (BaseT::mFlags >> 5); }// 4,8,16,32 = 2^(2,3,4,5)
- __hostdev__ size_t memUsage() const { return sizeof(*this) + this->bitWidth()*64; }
+ __hostdev__ uint8_t bitWidth() const __global__ { return 1 << (BaseT::mFlags >> 5); }// 4,8,16,32 = 2^(2,3,4,5)
+ __hostdev__ size_t memUsage() const __global__ { return sizeof(*this) + this->bitWidth()*64; }
__hostdev__ static size_t memUsage(uint32_t bitWidth) { return 96u + bitWidth*64; }
- __hostdev__ float getValue(uint32_t i) const
+ __hostdev__ float getValue(uint32_t i) const __global__
+ {
+#ifdef NANOVDB_FPN_BRANCHLESS// faster
+ const int b = BASE(mFlags) >> 5;// b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits
+#if 0// use LUT
+ uint16_t code = reinterpret_cast<const uint16_t*>(this + 1)[i >> (4 - b)];
+ const static uint8_t shift[5] = {15, 7, 3, 1, 0};
+ const static uint16_t mask[5] = {1, 3, 15, 255, 65535};
+ code >>= (i & shift[b]) << b;
+ code &= mask[b];
+#else// no LUT
+ uint32_t code = reinterpret_cast<__global__ const uint32_t*>(this + 1)[i >> (5 - b)];
+ //code >>= (i & ((16 >> b) - 1)) << b;
+ code >>= (i & ((32 >> b) - 1)) << b;
+ code &= (1 << (1 << b)) - 1;
+#endif
+#else// use branched version (slow)
+ float code;
+ __global__ auto *values = reinterpret_cast<const uint8_t*>(this+1);
+ switch (BaseT::mFlags >> 5) {
+ case 0u:// 1 bit float
+ code = float((values[i>>3] >> (i&7) ) & uint8_t(1));
+ break;
+ case 1u:// 2 bits float
+ code = float((values[i>>2] >> ((i&3)<<1)) & uint8_t(3));
+ break;
+ case 2u:// 4 bits float
+ code = float((values[i>>1] >> ((i&1)<<2)) & uint8_t(15));
+ break;
+ case 3u:// 8 bits float
+ code = float(values[i]);
+ break;
+ default:// 16 bits float
+ code = float(reinterpret_cast<const uint16_t*>(values)[i]);
+ }
+#endif
+ return float(code) * BASE(mQuantum) + BASE(mMinimum);// code * (max-min)/UNITS + min
+ }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ float getValue(uint32_t i) const __local__
{
#ifdef NANOVDB_FPN_BRANCHLESS// faster
const int b = BaseT::mFlags >> 5;// b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits
@@ -4047,14 +4813,14 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<FpN, CoordT, MaskT, LOG2DI
code >>= (i & shift[b]) << b;
code &= mask[b];
#else// no LUT
- uint32_t code = reinterpret_cast<const uint32_t*>(this + 1)[i >> (5 - b)];
+ uint32_t code = reinterpret_cast<__global__ const uint32_t*>(this + 1)[i >> (5 - b)];
//code >>= (i & ((16 >> b) - 1)) << b;
code >>= (i & ((32 >> b) - 1)) << b;
code &= (1 << (1 << b)) - 1;
#endif
#else// use branched version (slow)
float code;
- auto *values = reinterpret_cast<const uint8_t*>(this+1);
+ __global__ auto *values = reinterpret_cast<const uint8_t*>(this+1);
switch (BaseT::mFlags >> 5) {
case 0u:// 1 bit float
code = float((values[i>>3] >> (i&7) ) & uint8_t(1));
@@ -4074,12 +4840,15 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<FpN, CoordT, MaskT, LOG2DI
#endif
return float(code) * BaseT::mQuantum + BaseT::mMinimum;// code * (max-min)/UNITS + min
}
+#endif
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
+
+#undef BASE
}; // LeafData<FpN>
// Partial template specialization of LeafData with bool
@@ -4092,7 +4861,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<bool, CoordT, MaskT, LOG2D
using BuildType = bool;
using FloatType = bool;// dummy value type
using ArrayType = MaskT<LOG2DIM>;// type used for the internal mValue array
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
CoordT mBBoxMin; // 12B.
uint8_t mBBoxDif[3]; // 3B.
@@ -4104,31 +4873,34 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<bool, CoordT, MaskT, LOG2D
__hostdev__ static constexpr uint32_t padding() {return sizeof(LeafData) - 12u - 3u - 1u - 2*sizeof(MaskT<LOG2DIM>) - 16u;}
__hostdev__ static uint64_t memUsage() { return sizeof(LeafData); }
- //__hostdev__ const ValueType* values() const { return nullptr; }
- __hostdev__ bool getValue(uint32_t i) const { return mValues.isOn(i); }
- __hostdev__ bool getMin() const { return false; }// dummy
- __hostdev__ bool getMax() const { return false; }// dummy
- __hostdev__ bool getAvg() const { return false; }// dummy
- __hostdev__ bool getDev() const { return false; }// dummy
- __hostdev__ void setValue(uint32_t offset, bool v)
+ //__hostdev__ __global__ const ValueType* values() const __global__ { return nullptr; }
+ __hostdev__ bool getValue(uint32_t i) const __global__ { return mValues.isOn(i); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool getValue(uint32_t i) const __local__ { return mValues.isOn(i); }
+#endif
+ __hostdev__ bool getMin() const __global__ { return false; }// dummy
+ __hostdev__ bool getMax() const __global__ { return false; }// dummy
+ __hostdev__ bool getAvg() const __global__ { return false; }// dummy
+ __hostdev__ bool getDev() const __global__ { return false; }// dummy
+ __hostdev__ void setValue(uint32_t offset, bool v) __global__
{
mValueMask.setOn(offset);
mValues.set(offset, v);
}
- __hostdev__ void setMin(const bool&) {}// no-op
- __hostdev__ void setMax(const bool&) {}// no-op
- __hostdev__ void setAvg(const bool&) {}// no-op
- __hostdev__ void setDev(const bool&) {}// no-op
+ __hostdev__ void setMin(__global__ const bool&) __global__ {}// no-op
+ __hostdev__ void setMax(__global__ const bool&) __global__ {}// no-op
+ __hostdev__ void setAvg(__global__ const bool&) __global__ {}// no-op
+ __hostdev__ void setDev(__global__ const bool&) __global__ {}// no-op
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) __global__ { mBBoxMin = ijk; }
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
}; // LeafData<bool>
// Partial template specialization of LeafData with ValueMask
@@ -4141,7 +4913,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<ValueMask, CoordT, MaskT,
using BuildType = ValueMask;
using FloatType = bool;// dummy value type
using ArrayType = void;// type used for the internal mValue array - void means missing
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
CoordT mBBoxMin; // 12B.
uint8_t mBBoxDif[3]; // 3B.
@@ -4156,29 +4928,32 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<ValueMask, CoordT, MaskT,
}
//__hostdev__ const ValueType* values() const { return nullptr; }
- __hostdev__ bool getValue(uint32_t i) const { return mValueMask.isOn(i); }
- __hostdev__ bool getMin() const { return false; }// dummy
- __hostdev__ bool getMax() const { return false; }// dummy
- __hostdev__ bool getAvg() const { return false; }// dummy
- __hostdev__ bool getDev() const { return false; }// dummy
- __hostdev__ void setValue(uint32_t offset, bool)
+ __hostdev__ bool getValue(uint32_t i) const __global__ { return mValueMask.isOn(i); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool getValue(uint32_t i) const __local__ { return mValueMask.isOn(i); }
+#endif
+ __hostdev__ bool getMin() const __global__ { return false; }// dummy
+ __hostdev__ bool getMax() const __global__ { return false; }// dummy
+ __hostdev__ bool getAvg() const __global__ { return false; }// dummy
+ __hostdev__ bool getDev() const __global__ { return false; }// dummy
+ __hostdev__ void setValue(uint32_t offset, bool) __global__
{
mValueMask.setOn(offset);
}
- __hostdev__ void setMin(const ValueType&) {}// no-op
- __hostdev__ void setMax(const ValueType&) {}// no-op
- __hostdev__ void setAvg(const FloatType&) {}// no-op
- __hostdev__ void setDev(const FloatType&) {}// no-op
+ __hostdev__ void setMin(__global__ const ValueType&) __global__ {}// no-op
+ __hostdev__ void setMax(__global__ const ValueType&) __global__ {}// no-op
+ __hostdev__ void setAvg(__global__ const FloatType&) __global__ {}// no-op
+ __hostdev__ void setDev(__global__ const FloatType&) __global__ {}// no-op
template <typename T>
- __hostdev__ void setOrigin(const T& ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T& ijk) __global__ { mBBoxMin = ijk; }
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
}; // LeafData<ValueMask>
// Partial template specialization of LeafData with ValueIndex
@@ -4191,7 +4966,7 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<ValueIndex, CoordT, MaskT,
using BuildType = ValueIndex;
using FloatType = uint64_t;
using ArrayType = void;// type used for the internal mValue array - void means missing
- static constexpr bool FIXED_SIZE = true;
+ static __constant__ constexpr bool FIXED_SIZE = true;
CoordT mBBoxMin; // 12B.
uint8_t mBBoxDif[3]; // 3B.
@@ -4208,39 +4983,47 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) LeafData<ValueIndex, CoordT, MaskT,
__hostdev__ static uint64_t memUsage() { return sizeof(LeafData); }
- __hostdev__ uint64_t getMin() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 0; }
- __hostdev__ uint64_t getMax() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 1; }
- __hostdev__ uint64_t getAvg() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 2; }
- __hostdev__ uint64_t getDev() const { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 3; }
+ __hostdev__ uint64_t getMin() const __global__ { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 0; }
+ __hostdev__ uint64_t getMax() const __global__ { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 1; }
+ __hostdev__ uint64_t getAvg() const __global__ { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 2; }
+ __hostdev__ uint64_t getDev() const __global__ { NANOVDB_ASSERT(mStatsOff); return mStatsOff + 3; }
__hostdev__ void setValue(uint32_t offset, uint64_t)
{
mValueMask.setOn(offset);
}
- __hostdev__ uint64_t getValue(uint32_t i) const
+ __hostdev__ uint64_t getValue(uint32_t i) const __global__
{
if (mFlags & uint8_t(16u)) {// if 4th bit is set only active voxels are indexed
return mValueMask.isOn(i) ? mValueOff + mValueMask.countOn(i) : 0;// 0 is background
}
return mValueOff + i;// dense array of active and inactive voxels
}
-
+#if defined(__KERNEL_METAL__)
+ __hostdev__ uint64_t getValue(uint32_t i) const __local__
+ {
+ if (mFlags & uint8_t(16u)) {// if 4th bit is set only active voxels are indexed
+ return mValueMask.isOn(i) ? mValueOff + mValueMask.countOn(i) : 0;// 0 is background
+ }
+ return mValueOff + i;// dense array of active and inactive voxels
+ }
+#endif
template <typename T>
- __hostdev__ void setMin(const T &min, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 0] = min; }
+ __hostdev__ void setMin(__global__ const T &min, __global__ T *p) __global__ { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 0] = min; }
template <typename T>
- __hostdev__ void setMax(const T &max, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 1] = max; }
+ __hostdev__ void setMax(__global__ const T &max, __global__ T *p) __global__ { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 1] = max; }
template <typename T>
- __hostdev__ void setAvg(const T &avg, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 2] = avg; }
+ __hostdev__ void setAvg(__global__ const T &avg, __global__ T *p) __global__ { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 2] = avg; }
template <typename T>
- __hostdev__ void setDev(const T &dev, T *p) { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 3] = dev; }
+ __hostdev__ void setDev(__global__ const T &dev, __global__ T *p) __global__ { NANOVDB_ASSERT(mStatsOff); p[mStatsOff + 3] = dev; }
template <typename T>
- __hostdev__ void setOrigin(const T &ijk) { mBBoxMin = ijk; }
+ __hostdev__ void setOrigin(__global__ const T &ijk) __global__ { mBBoxMin = ijk; }
/// @brief This class cannot be constructed or deleted
- LeafData() = delete;
- LeafData(const LeafData&) = delete;
- LeafData& operator=(const LeafData&) = delete;
- ~LeafData() = delete;
+ LeafData() __global__ = delete;
+ LeafData(__global__ const LeafData&) __global__ = delete;
+ __global__ LeafData& operator=(__global__ const LeafData&) __global__ = delete;
+ ~LeafData() __global__ = delete;
}; // LeafData<ValueIndex>
/// @brief Leaf nodes of the VDB tree. (defaults to 8x8x8 = 512 voxels)
@@ -4248,13 +5031,22 @@ template<typename BuildT,
typename CoordT = Coord,
template<uint32_t> class MaskT = Mask,
uint32_t Log2Dim = 3>
-class LeafNode : private LeafData<BuildT, CoordT, MaskT, Log2Dim>
+class LeafNode
+#if !defined(__KERNEL_METAL__)
+ : private LeafData<BuildT, CoordT, MaskT, Log2Dim>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ LeafData<BuildT, CoordT, MaskT, Log2Dim> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
public:
struct ChildNodeType
{
- static constexpr uint32_t TOTAL = 0;
- static constexpr uint32_t DIM = 1;
+ static __constant__ constexpr uint32_t TOTAL = 0;
+ static __constant__ constexpr uint32_t DIM = 1;
__hostdev__ static uint32_t dim() { return 1u; }
}; // Voxel
using LeafNodeType = LeafNode<BuildT, CoordT, MaskT, Log2Dim>;
@@ -4263,38 +5055,56 @@ public:
using FloatType = typename DataType::FloatType;
using BuildType = typename DataType::BuildType;
using CoordType = CoordT;
- static constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
+ static __constant__ constexpr bool FIXED_SIZE = DataType::FIXED_SIZE;
template<uint32_t LOG2>
using MaskType = MaskT<LOG2>;
template<bool ON>
using MaskIterT = typename Mask<Log2Dim>::template Iterator<ON>;
/// @brief Visits all active values in a leaf node
- class ValueOnIterator : public MaskIterT<true>
+ class ValueOnIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<true>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ MaskIterT<true> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<true>;
- const LeafNode *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const LeafNode *mParent;
public:
__hostdev__ ValueOnIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ValueOnIterator(const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {}
- ValueOnIterator& operator=(const ValueOnIterator&) = default;
- __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BaseT::pos());}
- __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());}
+ __hostdev__ ValueOnIterator(__global__ const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOn()), mParent(parent) {}
+ __global__ ValueOnIterator& operator=(__global__ const ValueOnIterator&) = default;
+ __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BASE(pos)());}
+ __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BASE(pos)());}
}; // Member class ValueOnIterator
ValueOnIterator beginValueOn() const {return ValueOnIterator(this);}
/// @brief Visits all inactive values in a leaf node
- class ValueOffIterator : public MaskIterT<false>
+ class ValueOffIterator
+#if !defined (__KERNEL_METAL__)
+ : public MaskIterT<false>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ MaskIterT<false> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = MaskIterT<false>;
- const LeafNode *mParent;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const LeafNode *mParent;
public:
__hostdev__ ValueOffIterator() : BaseT(), mParent(nullptr) {}
- __hostdev__ ValueOffIterator(const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOff()), mParent(parent) {}
- ValueOffIterator& operator=(const ValueOffIterator&) = default;
- __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BaseT::pos());}
- __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BaseT::pos());}
+ __hostdev__ ValueOffIterator(__global__ const LeafNode* parent) : BaseT(parent->data()->mValueMask.beginOff()), mParent(parent) {}
+ __global__ ValueOffIterator& operator=(__global__ const ValueOffIterator&) = default;
+ __hostdev__ ValueType operator*() const {NANOVDB_ASSERT(*this); return mParent->getValue(BASE(pos)());}
+ __hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(BASE(pos)());}
}; // Member class ValueOffIterator
ValueOffIterator beginValueOff() const {return ValueOffIterator(this);}
@@ -4302,17 +5112,17 @@ public:
/// @brief Visits all values in a leaf node, i.e. both active and inactive values
class ValueIterator
{
- const LeafNode *mParent;
+ __global__ const LeafNode *mParent;
uint32_t mPos;
public:
__hostdev__ ValueIterator() : mParent(nullptr), mPos(1u << 3 * Log2Dim) {}
- __hostdev__ ValueIterator(const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);}
- ValueIterator& operator=(const ValueIterator&) = default;
+ __hostdev__ ValueIterator(__global__ const LeafNode* parent) : mParent(parent), mPos(0) {NANOVDB_ASSERT(parent);}
+ __global__ ValueIterator& operator=(__global__ const ValueIterator&) = default;
__hostdev__ ValueType operator*() const { NANOVDB_ASSERT(*this); return mParent->getValue(mPos);}
__hostdev__ CoordT getCoord() const { NANOVDB_ASSERT(*this); return mParent->offsetToGlobalCoord(mPos);}
__hostdev__ bool isActive() const { NANOVDB_ASSERT(*this); return mParent->isActive(mPos);}
__hostdev__ operator bool() const {return mPos < (1u << 3 * Log2Dim);}
- __hostdev__ ValueIterator& operator++() {++mPos; return *this;}
+ __hostdev__ __global__ ValueIterator& operator++() {++mPos; return *this;}
__hostdev__ ValueIterator operator++(int) {
auto tmp = *this;
++(*this);
@@ -4320,43 +5130,49 @@ public:
}
}; // Member class ValueIterator
+#if defined(__KERNEL_METAL__)
+#define BASE(v) _base.v
+#else
+#define BASE(v) DataType::v
+#endif
+
ValueIterator beginValue() const {return ValueIterator(this);}
static_assert(is_same<ValueType,typename BuildToValueMap<BuildType>::Type>::value, "Mismatching BuildType");
- static constexpr uint32_t LOG2DIM = Log2Dim;
- static constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes
- static constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node
- static constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node
- static constexpr uint32_t MASK = (1u << LOG2DIM) - 1u; // mask for bit operations
- static constexpr uint32_t LEVEL = 0; // level 0 = leaf
- static constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node
+ static __constant__ constexpr uint32_t LOG2DIM = Log2Dim;
+ static __constant__ constexpr uint32_t TOTAL = LOG2DIM; // needed by parent nodes
+ static __constant__ constexpr uint32_t DIM = 1u << TOTAL; // number of voxels along each axis of this node
+ static __constant__ constexpr uint32_t SIZE = 1u << 3 * LOG2DIM; // total number of voxels represented by this node
+ static __constant__ constexpr uint32_t MASK = (1u << LOG2DIM) - 1u; // mask for bit operations
+ static __constant__ constexpr uint32_t LEVEL = 0; // level 0 = leaf
+ static __constant__ constexpr uint64_t NUM_VALUES = uint64_t(1) << (3 * TOTAL); // total voxel count represented by this node
- __hostdev__ DataType* data() { return reinterpret_cast<DataType*>(this); }
+ __hostdev__ __global__ DataType* data() __global__ { return reinterpret_cast<__global__ DataType*>(this); }
- __hostdev__ const DataType* data() const { return reinterpret_cast<const DataType*>(this); }
+ __hostdev__ __global__ const DataType* data() __global__ const { return reinterpret_cast<__global__ const DataType*>(this); }
/// @brief Return a const reference to the bit mask of active voxels in this leaf node
- __hostdev__ const MaskType<LOG2DIM>& valueMask() const { return DataType::mValueMask; }
+ __hostdev__ __global__ const MaskType<LOG2DIM>& valueMask() const __global__ { return DataType::mValueMask; }
/// @brief Return a const reference to the minimum active value encoded in this leaf node
- __hostdev__ ValueType minimum() const { return this->getMin(); }
+ __hostdev__ ValueType minimum() const __global__ { return this->getMin(); }
/// @brief Return a const reference to the maximum active value encoded in this leaf node
- __hostdev__ ValueType maximum() const { return this->getMax(); }
+ __hostdev__ ValueType maximum() const __global__ { return this->getMax(); }
/// @brief Return a const reference to the average of all the active values encoded in this leaf node
- __hostdev__ FloatType average() const { return DataType::getAvg(); }
+ __hostdev__ FloatType average() const __global__ { return DataType::getAvg(); }
/// @brief Return the variance of all the active values encoded in this leaf node
- __hostdev__ FloatType variance() const { return DataType::getDev()*DataType::getDev(); }
+ __hostdev__ FloatType variance() const __global__ { return DataType::getDev()*DataType::getDev(); }
/// @brief Return a const reference to the standard deviation of all the active values encoded in this leaf node
- __hostdev__ FloatType stdDeviation() const { return DataType::getDev(); }
+ __hostdev__ FloatType stdDeviation() const __global__ { return DataType::getDev(); }
- __hostdev__ uint8_t flags() const { return DataType::mFlags; }
+ __hostdev__ uint8_t flags() const __global__ { return DataType::mFlags; }
/// @brief Return the origin in index space of this leaf node
- __hostdev__ CoordT origin() const { return DataType::mBBoxMin & ~MASK; }
+ __hostdev__ CoordT origin() const __global__ { return DataType::mBBoxMin & ~MASK; }
__hostdev__ static CoordT OffsetToLocalCoord(uint32_t n)
{
@@ -4366,9 +5182,9 @@ public:
}
/// @brief Converts (in place) a local index coordinate to a global index coordinate
- __hostdev__ void localToGlobalCoord(Coord& ijk) const { ijk += this->origin(); }
+ __hostdev__ void localToGlobalCoord(__global__ Coord& ijk) const __global__ { ijk += this->origin(); }
- __hostdev__ CoordT offsetToGlobalCoord(uint32_t n) const
+ __hostdev__ CoordT offsetToGlobalCoord(uint32_t n) const __global__
{
return OffsetToLocalCoord(n) + this->origin();
}
@@ -4377,7 +5193,7 @@ public:
__hostdev__ static uint32_t dim() { return 1u << LOG2DIM; }
/// @brief Return the bounding box in index space of active values in this leaf node
- __hostdev__ BBox<CoordT> bbox() const
+ __hostdev__ BBox<CoordT> bbox() const __global__
{
BBox<CoordT> bbox(DataType::mBBoxMin, DataType::mBBoxMin);
if ( this->hasBBox() ) {
@@ -4399,54 +5215,85 @@ public:
__hostdev__ uint64_t memUsage() { return DataType::memUsage(); }
/// @brief This class cannot be constructed or deleted
- LeafNode() = delete;
- LeafNode(const LeafNode&) = delete;
- LeafNode& operator=(const LeafNode&) = delete;
- ~LeafNode() = delete;
+ LeafNode() __global__ = delete;
+ LeafNode(__global__ const LeafNode&) __global__ = delete;
+ __global__ LeafNode& operator=(__global__ const LeafNode&) __global__ = delete;
+ ~LeafNode() __global__ = delete;
/// @brief Return the voxel value at the given offset.
- __hostdev__ ValueType getValue(uint32_t offset) const { return DataType::getValue(offset); }
+
+ __hostdev__ ValueType getValue(uint32_t offset) const __global__ { return DataType::getValue(offset); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(uint32_t offset) const __local__ { return DataType::getValue(offset); }
+#endif
/// @brief Return the voxel value at the given coordinate.
- __hostdev__ ValueType getValue(const CoordT& ijk) const { return DataType::getValue(CoordToOffset(ijk)); }
+ __hostdev__ ValueType getValue(__global__ const CoordT& ijk) const __global__ { return BASE(getValue)(CoordToOffset(ijk)); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordT& ijk) const __global__ { return BASE(getValue)(CoordToOffset(ijk)); }
+ __hostdev__ ValueType getValue(__local__ const CoordT& ijk) const __local__ { return BASE(getValue)(CoordToOffset(ijk)); }
+#endif
/// @brief Sets the value at the specified location and activate its state.
///
/// @note This is safe since it does not change the topology of the tree (unlike setValue methods on the other nodes)
- __hostdev__ void setValue(const CoordT& ijk, const ValueType& v) { DataType::setValue(CoordToOffset(ijk), v); }
+ __hostdev__ void setValue(__global__ const CoordT& ijk, __global__ const ValueType& v) __global__ { DataType::setValue(CoordToOffset(ijk), v); }
/// @brief Sets the value at the specified location but leaves its state unchanged.
///
/// @note This is safe since it does not change the topology of the tree (unlike setValue methods on the other nodes)
- __hostdev__ void setValueOnly(uint32_t offset, const ValueType& v) { DataType::setValueOnly(offset, v); }
- __hostdev__ void setValueOnly(const CoordT& ijk, const ValueType& v) { DataType::setValueOnly(CoordToOffset(ijk), v); }
+ __hostdev__ void setValueOnly(uint32_t offset, __global__ const ValueType& v) __global__ { DataType::setValueOnly(offset, v); }
+ __hostdev__ void setValueOnly(__global__ const CoordT& ijk, __global__ const ValueType& v) __global__ { DataType::setValueOnly(CoordToOffset(ijk), v); }
/// @brief Return @c true if the voxel value at the given coordinate is active.
- __hostdev__ bool isActive(const CoordT& ijk) const { return DataType::mValueMask.isOn(CoordToOffset(ijk)); }
- __hostdev__ bool isActive(uint32_t n) const { return DataType::mValueMask.isOn(n); }
+ __hostdev__ bool isActive(__global__ const CoordT& ijk) const __global__ { return BASE(mValueMask).isOn(CoordToOffset(ijk)); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordT& ijk) const __global__ { return BASE(mValueMask).isOn(CoordToOffset(ijk)); }
+ __hostdev__ bool isActive(__local__ const CoordT& ijk) const __local__ { return BASE(mValueMask).isOn(CoordToOffset(ijk)); }
+#endif
+
+ __hostdev__ bool isActive(uint32_t n) const __global__ { return BASE(mValueMask).isOn(n); }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(uint32_t n) const __local__ { return BASE(mValueMask).isOn(n); }
+#endif
/// @brief Return @c true if any of the voxel value are active in this leaf node.
- __hostdev__ bool isActive() const
+ __hostdev__ bool isActive() const __global__
{
//NANOVDB_ASSERT( bool(DataType::mFlags & uint8_t(2)) != DataType::mValueMask.isOff() );
//return DataType::mFlags & uint8_t(2);
return !DataType::mValueMask.isOff();
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive() const __local__
+ {
+ NANOVDB_ASSERT( bool(DataType::mFlags & uint8_t(2)) != BASE(mValueMask).isOff() );
+ return DataType::mFlags & uint8_t(2);
+ }
+#endif
__hostdev__ bool hasBBox() const {return DataType::mFlags & uint8_t(2);}
/// @brief Return @c true if the voxel value at the given coordinate is active and updates @c v with the value.
- __hostdev__ bool probeValue(const CoordT& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordT& ijk, __global__ ValueType& v) const __global__
{
const uint32_t n = CoordToOffset(ijk);
v = DataType::getValue(n);
- return DataType::mValueMask.isOn(n);
+ return BASE(mValueMask).isOn(n);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool probeValue(__local__ const CoordT& ijk, __local__ ValueType& v) const __global__
+ {
+ const uint32_t n = CoordToOffset(ijk);
+ v = BASE(getValue)(n);
+ return BASE(mValueMask).isOn(n);
+ }
+#endif
- __hostdev__ const LeafNode* probeLeaf(const CoordT&) const { return this; }
+ __hostdev__ __global__ const LeafNode* probeLeaf(__global__ const CoordT&) const __global__ { return this; }
/// @brief Return the linear offset corresponding to the given coordinate
- __hostdev__ static uint32_t CoordToOffset(const CoordT& ijk)
+ __hostdev__ static uint32_t CoordToOffset(__global__ const CoordT& ijk)
{
#if 0
return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK);
@@ -4454,6 +5301,16 @@ public:
return ((ijk[0] & MASK) << (2 * LOG2DIM)) | ((ijk[1] & MASK) << LOG2DIM) | (ijk[2] & MASK);
#endif
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ static uint32_t CoordToOffset(__local__ const CoordT& ijk)
+ {
+ #if 0
+ return ((ijk[0] & MASK) << (2 * LOG2DIM)) + ((ijk[1] & MASK) << LOG2DIM) + (ijk[2] & MASK);
+ #else
+ return ((ijk[0] & MASK) << (2 * LOG2DIM)) | ((ijk[1] & MASK) << LOG2DIM) | (ijk[2] & MASK);
+ #endif
+ }
+#endif
/// @brief Updates the local bounding box of active voxels in this node. Return true if bbox was updated.
///
@@ -4461,8 +5318,9 @@ public:
///
/// @details This method is based on few (intrinsic) bit operations and hence is relatively fast.
/// However, it should only only be called of either the value mask has changed or if the
+
/// active bounding box is still undefined. e.g. during construction of this node.
- __hostdev__ bool updateBBox();
+ __hostdev__ bool updateBBox() __global__;
private:
static_assert(sizeof(DataType) % NANOVDB_DATA_ALIGNMENT == 0, "sizeof(LeafData) is misaligned");
@@ -4478,49 +5336,77 @@ private:
/// @brief Private method to return a voxel value and update a (dummy) ReadAccessor
template<typename AccT>
- __hostdev__ ValueType getValueAndCache(const CoordT& ijk, const AccT&) const { return this->getValue(ijk); }
+ __hostdev__ ValueType getValueAndCache(__global__ const CoordT& ijk, __global__ const AccT&) const __global__ { return this->getValue(ijk); }
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ ValueType getValueAndCache(__local__ const CoordT& ijk, __local__ const AccT&) const __global__ { return this->getValue(ijk); }
+#endif
/// @brief Return the node information.
template<typename AccT>
- __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(const CoordType& /*ijk*/, const AccT& /*acc*/) const {
+ __hostdev__ typename AccT::NodeInfo getNodeInfoAndCache(__global__ const CoordType& /*ijk*/, __global__ const AccT& /*acc*/) const __global__ {
using NodeInfoT = typename AccT::NodeInfo;
return NodeInfoT{LEVEL, this->dim(), this->minimum(), this->maximum(),
this->average(), this->stdDeviation(), this->bbox()[0], this->bbox()[1]};
}
template<typename AccT>
- __hostdev__ bool isActiveAndCache(const CoordT& ijk, const AccT&) const { return this->isActive(ijk); }
+ __hostdev__ bool isActiveAndCache(__global__ const CoordT& ijk, __global__ const AccT&) const __global__ { return this->isActive(ijk); }
+#if defined(__KERNEL_METAL__)
+ template<typename AccT>
+ __hostdev__ bool isActiveAndCache(__local__ const CoordT& ijk, __local__ const AccT&) const __global__ { return this->isActive(ijk); }
+#endif
template<typename AccT>
- __hostdev__ bool probeValueAndCache(const CoordT& ijk, ValueType& v, const AccT&) const { return this->probeValue(ijk, v); }
+ __hostdev__ bool probeValueAndCache(__global__ const CoordT& ijk, __global__ ValueType& v, __global__ const AccT&) const __global__ { return this->probeValue(ijk, v); }
template<typename AccT>
- __hostdev__ const LeafNode* probeLeafAndCache(const CoordT&, const AccT&) const { return this; }
+ __hostdev__ __global__ const LeafNode* probeLeafAndCache(__global__ const CoordT&, __global__ const AccT&) const __global__ { return this; }
template<typename RayT, typename AccT>
- __hostdev__ uint32_t getDimAndCache(const CoordT&, const RayT& /*ray*/, const AccT&) const
+ __hostdev__ uint32_t getDimAndCache(__global__ const CoordT&, __global__ const RayT& /*ray*/, __global__ const AccT&) const __global__
{
if (DataType::mFlags & uint8_t(1u)) return this->dim(); // skip this node if the 1st bit is set
//if (!ray.intersects( this->bbox() )) return 1 << LOG2DIM;
return ChildNodeType::dim();
}
-
+#if defined(__KERNEL_METAL__)
+ template<typename RayT, typename AccT>
+ __hostdev__ uint32_t getDimAndCache(__local__ const CoordT&, __local__ const RayT& /*ray*/, __local__ const AccT&) const __global__
+ {
+ if (BASE(mFlags) & uint8_t(1))
+ return this->dim(); // skip this node if first bit is set
+ //if (!ray.intersects( this->bbox() )) return 1 << LOG2DIM;
+ return ChildNodeType::dim();
+ }
+#endif
+#undef BASE
}; // LeafNode class
template<typename ValueT, typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
-__hostdev__ inline bool LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
+__hostdev__ inline bool LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox() __global__
{
static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!");
if (DataType::mValueMask.isOff()) {
DataType::mFlags &= ~uint8_t(2);// set 2nd bit off, which indicates that this nodes has no bbox
return false;
}
+#if defined(__KERNEL_METAL__)
+ struct Update {
+ static void update(__global__ DataType &d, uint32_t min, uint32_t max, int axis) {
+ NANOVDB_ASSERT(min <= max && max < 8);
+ d.mBBoxMin[axis] = (d.mBBoxMin[axis] & ~MASK) + int(min);
+ d.mBBoxDif[axis] = uint8_t(max - min);
+ }
+ };
+#else
auto update = [&](uint32_t min, uint32_t max, int axis) {
NANOVDB_ASSERT(min <= max && max < 8);
DataType::mBBoxMin[axis] = (DataType::mBBoxMin[axis] & ~MASK) + int(min);
DataType::mBBoxDif[axis] = uint8_t(max - min);
};
+#endif
uint64_t word64 = DataType::mValueMask.template getWord<uint64_t>(0);
uint32_t Xmin = word64 ? 0u : 8u;
uint32_t Xmax = Xmin;
@@ -4534,6 +5420,17 @@ __hostdev__ inline bool LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
}
}
NANOVDB_ASSERT(word64);
+#if defined(__KERNEL_METAL__)
+ Update::update(this, Xmin, Xmax, 0);
+ Update::update(this, FindLowestOn(word64) >> 3, FindHighestOn(word64) >> 3, 1);
+ __local__ const uint32_t *p = reinterpret_cast<__local__ const uint32_t*>(&word64), word32 = p[0] | p[1];
+ __local__ const uint16_t *q = reinterpret_cast<__local__ const uint16_t*>(&word32), word16 = q[0] | q[1];
+ __local__ const uint8_t *b = reinterpret_cast<__local__ const uint8_t* >(&word16), byte = b[0] | b[1];
+ NANOVDB_ASSERT(byte);
+ Update::update(this, FindLowestOn(static_cast<uint32_t>(byte)), FindHighestOn(static_cast<uint32_t>(byte)), 2);
+ DataType::mFlags |= uint8_t(2);// set 2nd bit on, which indicates that this nodes has a bbox
+ return true;
+#else
update(Xmin, Xmax, 0);
update(FindLowestOn(word64) >> 3, FindHighestOn(word64) >> 3, 1);
const uint32_t *p = reinterpret_cast<const uint32_t*>(&word64), word32 = p[0] | p[1];
@@ -4541,8 +5438,9 @@ __hostdev__ inline bool LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
const uint8_t *b = reinterpret_cast<const uint8_t* >(&word16), byte = b[0] | b[1];
NANOVDB_ASSERT(byte);
update(FindLowestOn(static_cast<uint32_t>(byte)), FindHighestOn(static_cast<uint32_t>(byte)), 2);
- DataType::mFlags |= uint8_t(2);// set 2nd bit on, which indicates that this nodes has a bbox
+ DataType::mFlags |= uint8_t(2);// set 2nd bit on, which indicates that this nodes has a bbox
return true;
+#endif
} // LeafNode::updateBBox
// --------------------------> Template specializations and traits <------------------------------------
@@ -4651,12 +5549,12 @@ class ReadAccessor<BuildT, -1, -1, -1>
using FloatType = typename RootT::FloatType;
using CoordValueType = typename RootT::CoordType::ValueType;
- mutable const RootT* mRoot; // 8 bytes (mutable to allow for access methods to be const)
+ mutable __global__ const RootT* mRoot; // 8 bytes (mutable to allow for access methods to be const)
public:
using ValueType = typename RootT::ValueType;
using CoordType = typename RootT::CoordType;
- static const int CacheLevels = 0;
+ static __constant__ const int CacheLevels = 0;
struct NodeInfo {
uint32_t mLevel; // 4B
@@ -4670,60 +5568,77 @@ public:
};
/// @brief Constructor from a root node
- __hostdev__ ReadAccessor(const RootT& root) : mRoot{&root} {}
+ __hostdev__ ReadAccessor(__global__ const RootT& root) __local__ : mRoot{&root} {}
/// @brief Constructor from a grid
- __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {}
+ __hostdev__ ReadAccessor(__global__ const GridT& grid) __local__ : ReadAccessor(grid.tree().root()) {}
/// @brief Constructor from a tree
- __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {}
+ __hostdev__ ReadAccessor(__global__ const TreeT& tree) __local__ : ReadAccessor(tree.root()) {}
/// @brief Reset this access to its initial state, i.e. with an empty cache
/// @node Noop since this template specialization has no cache
__hostdev__ void clear() {}
- __hostdev__ const RootT& root() const { return *mRoot; }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *mRoot; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const RootT& root() const __local__ { return *mRoot; }
+#endif
/// @brief Defaults constructors
- ReadAccessor(const ReadAccessor&) = default;
- ~ReadAccessor() = default;
- ReadAccessor& operator=(const ReadAccessor&) = default;
+ ReadAccessor(__local__ const ReadAccessor&) __local__ = default;
+ ~ReadAccessor() __local__ = default;
+ __local__ ReadAccessor& operator=(__local__ const ReadAccessor&) __local__ = default;
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __local__
{
return mRoot->getValueAndCache(ijk, *this);
}
- __hostdev__ ValueType operator()(const CoordType& ijk) const
+
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __local__
+ {
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+#endif
+
+ __hostdev__ ValueType operator()(__global__ const CoordType& ijk) const __local__
{
return this->getValue(ijk);
}
- __hostdev__ ValueType operator()(int i, int j, int k) const
+ __hostdev__ ValueType operator()(int i, int j, int k) const __local__
{
return this->getValue(CoordType(i,j,k));
}
- __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const
+ __hostdev__ NodeInfo getNodeInfo(__global__ const CoordType& ijk) const __local__
{
return mRoot->getNodeInfoAndCache(ijk, *this);
}
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __local__
{
return mRoot->isActiveAndCache(ijk, *this);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__
+ {
+ return mRoot->isActiveAndCache(ijk, *this);
+ }
+#endif
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __local__
{
return mRoot->probeValueAndCache(ijk, v, *this);
}
- __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafT* probeLeaf(__global__ const CoordType& ijk) const __local__
{
return mRoot->probeLeafAndCache(ijk, *this);
}
template<typename RayT>
- __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __global__ const RayT& ray) const __local__
{
return mRoot->getDimAndCache(ijk, ray, *this);
}
@@ -4739,7 +5654,11 @@ private:
/// @brief No-op
template<typename NodeT>
- __hostdev__ void insert(const CoordType&, const NodeT*) const {}
+ __hostdev__ void insert(__global__ const CoordType&, __global__ const NodeT*) const __local__ {}
+#if defined(__KERNEL_METAL__)
+ template<typename NodeT>
+ __hostdev__ void insert(__local__ const CoordType&, __global__ const NodeT*) const __local__ {}
+#endif
}; // ReadAccessor<ValueT, -1, -1, -1> class
/// @brief Node caching at a single tree level
@@ -4761,19 +5680,19 @@ class ReadAccessor<BuildT, LEVEL0, -1, -1>//e.g. 0, 1, 2
// All member data are mutable to allow for access methods to be const
mutable CoordT mKey; // 3*4 = 12 bytes
- mutable const RootT* mRoot; // 8 bytes
- mutable const NodeT* mNode; // 8 bytes
+ mutable __global__ const RootT* mRoot; // 8 bytes
+ mutable __global__ const NodeT* mNode; // 8 bytes
public:
using ValueType = ValueT;
using CoordType = CoordT;
- static const int CacheLevels = 1;
+ static __constant__ const int CacheLevels = 1;
using NodeInfo = typename ReadAccessor<ValueT, -1, -1, -1>::NodeInfo;
/// @brief Constructor from a root node
- __hostdev__ ReadAccessor(const RootT& root)
+ __hostdev__ ReadAccessor(__global__ const RootT& root) __local__
: mKey(CoordType::max())
, mRoot(&root)
, mNode(nullptr)
@@ -4781,10 +5700,10 @@ public:
}
/// @brief Constructor from a grid
- __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {}
+ __hostdev__ ReadAccessor(__global__ const GridT& grid) __local__ : ReadAccessor(grid.tree().root()) {}
/// @brief Constructor from a tree
- __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {}
+ __hostdev__ ReadAccessor(__global__ const TreeT& tree) __local__ : ReadAccessor(tree.root()) {}
/// @brief Reset this access to its initial state, i.e. with an empty cache
__hostdev__ void clear()
@@ -4793,37 +5712,64 @@ public:
mNode = nullptr;
}
- __hostdev__ const RootT& root() const { return *mRoot; }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *mRoot; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const RootT& root() const __local__ { return *mRoot; }
+#endif
/// @brief Defaults constructors
- ReadAccessor(const ReadAccessor&) = default;
- ~ReadAccessor() = default;
- ReadAccessor& operator=(const ReadAccessor&) = default;
+ ReadAccessor(__global__ const ReadAccessor&) __global__ = default;
+ ~ReadAccessor() __global__ = default;
+ __global__ ReadAccessor& operator=(__global__ const ReadAccessor&) __global__ = default;
- __hostdev__ bool isCached(const CoordType& ijk) const
+ __hostdev__ bool isCached(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] & int32_t(~NodeT::MASK)) == mKey[0] &&
(ijk[1] & int32_t(~NodeT::MASK)) == mKey[1] &&
(ijk[2] & int32_t(~NodeT::MASK)) == mKey[2];
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isCached(__local__ const CoordType& ijk) const __global__
+ {
+ return (ijk[0] & int32_t(~NodeT::MASK)) == mKey[0] &&
+ (ijk[1] & int32_t(~NodeT::MASK)) == mKey[1] &&
+ (ijk[2] & int32_t(~NodeT::MASK)) == mKey[2];
+ }
+ __hostdev__ bool isCached(__local__ const CoordType& ijk) const __local__
+ {
+ return (ijk[0] & int32_t(~NodeT::MASK)) == mKey[0] &&
+ (ijk[1] & int32_t(~NodeT::MASK)) == mKey[1] &&
+ (ijk[2] & int32_t(~NodeT::MASK)) == mKey[2];
+ }
+#endif
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
if (this->isCached(ijk)) {
return mNode->getValueAndCache(ijk, *this);
}
return mRoot->getValueAndCache(ijk, *this);
}
- __hostdev__ ValueType operator()(const CoordType& ijk) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
+ {
+ if (this->isCached(ijk)) {
+ return mNode->getValueAndCache(ijk, *this);
+ }
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+#endif
+
+ __hostdev__ ValueType operator()(__global__ const CoordType& ijk) const __global__
{
return this->getValue(ijk);
}
- __hostdev__ ValueType operator()(int i, int j, int k) const
+ __hostdev__ ValueType operator()(int i, int j, int k) const __global__
{
return this->getValue(CoordType(i,j,k));
}
- __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const
+ __hostdev__ NodeInfo getNodeInfo(__global__ const CoordType& ijk) const __global__
{
if (this->isCached(ijk)) {
return mNode->getNodeInfoAndCache(ijk, *this);
@@ -4831,15 +5777,24 @@ public:
return mRoot->getNodeInfoAndCache(ijk, *this);
}
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
{
if (this->isCached(ijk)) {
return mNode->isActiveAndCache(ijk, *this);
}
return mRoot->isActiveAndCache(ijk, *this);
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __local__
+ {
+ if (this->isCached(ijk)) {
+ return mNode->isActiveAndCache(ijk, *this);
+ }
+ return mRoot->isActiveAndCache(ijk, *this);
+ }
+#endif
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
{
if (this->isCached(ijk)) {
return mNode->probeValueAndCache(ijk, v, *this);
@@ -4847,7 +5802,7 @@ public:
return mRoot->probeValueAndCache(ijk, v, *this);
}
- __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafT* probeLeaf(__global__ const CoordType& ijk) const __global__
{
if (this->isCached(ijk)) {
return mNode->probeLeafAndCache(ijk, *this);
@@ -4856,7 +5811,7 @@ public:
}
template<typename RayT>
- __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __global__ const RayT& ray) const __global__
{
if (this->isCached(ijk)) {
return mNode->getDimAndCache(ijk, ray, *this);
@@ -4874,15 +5829,26 @@ private:
friend class LeafNode;
/// @brief Inserts a leaf node and key pair into this ReadAccessor
- __hostdev__ void insert(const CoordType& ijk, const NodeT* node) const
+ __hostdev__ void insert(__global__ const CoordType& ijk, __global__ const NodeT* node) const __local__
{
mKey = ijk & ~NodeT::MASK;
mNode = node;
}
+#if defined(__KERNEL_METAL__)
+ __hostdev__ void insert(__local__ const CoordType& ijk, __global__ const NodeT* node) const __local__
+ {
+ mKey = ijk & ~NodeT::MASK;
+ mNode = node;
+ }
+#endif
// no-op
template<typename OtherNodeT>
- __hostdev__ void insert(const CoordType&, const OtherNodeT*) const {}
+ __hostdev__ void insert(__global__ const CoordType&, __global__ const OtherNodeT*) const __local__ {}
+#if defined(__KERNEL_METAL__)
+ template<typename OtherNodeT>
+ __hostdev__ void insert(__local__ const CoordType&, __global__ const OtherNodeT*) const __local__ {}
+#endif
}; // ReadAccessor<ValueT, LEVEL0>
@@ -4909,20 +5875,20 @@ class ReadAccessor<BuildT, LEVEL0, LEVEL1, -1>//e.g. (0,1), (1,2), (0,2)
#else // 68 bytes total
mutable CoordT mKeys[2]; // 2*3*4 = 24 bytes
#endif
- mutable const RootT* mRoot;
- mutable const Node1T* mNode1;
- mutable const Node2T* mNode2;
+ mutable __global__ const RootT* mRoot;
+ mutable __global__ const Node1T* mNode1;
+ mutable __global__ const Node2T* mNode2;
public:
using ValueType = ValueT;
using CoordType = CoordT;
- static const int CacheLevels = 2;
+ static __constant__ const int CacheLevels = 2;
using NodeInfo = typename ReadAccessor<ValueT,-1,-1,-1>::NodeInfo;
/// @brief Constructor from a root node
- __hostdev__ ReadAccessor(const RootT& root)
+ __hostdev__ ReadAccessor(__global__ const RootT& root) __local__
#ifdef USE_SINGLE_ACCESSOR_KEY
: mKey(CoordType::max())
#else
@@ -4935,10 +5901,10 @@ public:
}
/// @brief Constructor from a grid
- __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {}
+ __hostdev__ ReadAccessor(__global__ const GridT& grid) __local__ : ReadAccessor(grid.tree().root()) {}
/// @brief Constructor from a tree
- __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {}
+ __hostdev__ ReadAccessor(__global__ const TreeT& tree) __local__ : ReadAccessor(tree.root()) {}
/// @brief Reset this access to its initial state, i.e. with an empty cache
__hostdev__ void clear()
@@ -4952,15 +5918,18 @@ public:
mNode2 = nullptr;
}
- __hostdev__ const RootT& root() const { return *mRoot; }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *mRoot; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const RootT& root() const __local__ { return *mRoot; }
+#endif
/// @brief Defaults constructors
- ReadAccessor(const ReadAccessor&) = default;
+ ReadAccessor(__global__ const ReadAccessor&) __global__ = default;
~ReadAccessor() = default;
- ReadAccessor& operator=(const ReadAccessor&) = default;
+ __global__ ReadAccessor& operator=(__global__ const ReadAccessor&) __global__ = default;
#ifdef USE_SINGLE_ACCESSOR_KEY
- __hostdev__ bool isCached1(CoordValueType dirty) const
+ __hostdev__ bool isCached1(CoordValueType dirty) const __global__
{
if (!mNode1)
return false;
@@ -4970,7 +5939,7 @@ public:
}
return true;
}
- __hostdev__ bool isCached2(CoordValueType dirty) const
+ __hostdev__ bool isCached2(CoordValueType dirty) const __global__
{
if (!mNode2)
return false;
@@ -4980,18 +5949,18 @@ public:
}
return true;
}
- __hostdev__ CoordValueType computeDirty(const CoordType& ijk) const
+ __hostdev__ CoordValueType computeDirty(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] ^ mKey[0]) | (ijk[1] ^ mKey[1]) | (ijk[2] ^ mKey[2]);
}
#else
- __hostdev__ bool isCached1(const CoordType& ijk) const
+ __hostdev__ bool isCached1(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] & int32_t(~Node1T::MASK)) == mKeys[0][0] &&
(ijk[1] & int32_t(~Node1T::MASK)) == mKeys[0][1] &&
(ijk[2] & int32_t(~Node1T::MASK)) == mKeys[0][2];
}
- __hostdev__ bool isCached2(const CoordType& ijk) const
+ __hostdev__ bool isCached2(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] & int32_t(~Node2T::MASK)) == mKeys[1][0] &&
(ijk[1] & int32_t(~Node2T::MASK)) == mKeys[1][1] &&
@@ -4999,12 +5968,12 @@ public:
}
#endif
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->getValueAndCache(ijk, *this);
@@ -5013,21 +5982,37 @@ public:
}
return mRoot->getValueAndCache(ijk, *this);
}
- __hostdev__ ValueType operator()(const CoordType& ijk) const
- {
- return this->getValue(ijk);
- }
- __hostdev__ ValueType operator()(int i, int j, int k) const
- {
- return this->getValue(CoordType(i,j,k));
- }
-
- __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
+#endif
+ if (this->isCached1(dirty)) {
+ return mNode1->getValueAndCache(ijk, *this);
+ } else if (this->isCached2(dirty)) {
+ return mNode2->getValueAndCache(ijk, *this);
+ }
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+#endif
+ __hostdev__ ValueType operator()(__global__ const CoordType& ijk) const __global__
+ {
+ return this->getValue(ijk);
+ }
+ __hostdev__ ValueType operator()(int i, int j, int k) const __global__
+ {
+ return this->getValue(CoordType(i,j,k));
+ }
+
+ __hostdev__ NodeInfo getNodeInfo(__global__ const CoordType& ijk) const __global__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->getNodeInfoAndCache(ijk, *this);
@@ -5037,12 +6022,12 @@ public:
return mRoot->getNodeInfoAndCache(ijk, *this);
}
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->isActiveAndCache(ijk, *this);
@@ -5052,12 +6037,12 @@ public:
return mRoot->isActiveAndCache(ijk, *this);
}
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->probeValueAndCache(ijk, v, *this);
@@ -5067,12 +6052,12 @@ public:
return mRoot->probeValueAndCache(ijk, v, *this);
}
- __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafT* probeLeaf(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->probeLeafAndCache(ijk, *this);
@@ -5083,12 +6068,12 @@ public:
}
template<typename RayT>
- __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __global__ const RayT& ray) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached1(dirty)) {
return mNode1->getDimAndCache(ijk, ray, *this);
@@ -5108,7 +6093,7 @@ private:
friend class LeafNode;
/// @brief Inserts a leaf node and key pair into this ReadAccessor
- __hostdev__ void insert(const CoordType& ijk, const Node1T* node) const
+ __hostdev__ void insert(__global__ const CoordType& ijk, __global__ const Node1T* node) const __local__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
mKey = ijk;
@@ -5117,7 +6102,7 @@ private:
#endif
mNode1 = node;
}
- __hostdev__ void insert(const CoordType& ijk, const Node2T* node) const
+ __hostdev__ void insert(__local__ const CoordType& ijk, __global__ const Node2T* node) const __local__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
mKey = ijk;
@@ -5127,7 +6112,11 @@ private:
mNode2 = node;
}
template <typename OtherNodeT>
- __hostdev__ void insert(const CoordType&, const OtherNodeT*) const {}
+ __hostdev__ void insert(__global__ const CoordType&, __global__ const OtherNodeT*) const __local__ {}
+#if defined(__KERNEL_METAL__)
+ template <typename OtherNodeT>
+ __hostdev__ void insert(__local__ const CoordType&, __global__ const OtherNodeT*) const __local__ {}
+#endif
}; // ReadAccessor<BuildT, LEVEL0, LEVEL1>
@@ -5145,7 +6134,7 @@ class ReadAccessor<BuildT, 0, 1, 2>
using ValueT = typename RootT::ValueType;
using FloatType = typename RootT::FloatType;
- using CoordValueType = typename RootT::CoordT::ValueType;
+ using CoordValueType = typename RootT::CoordType::ValueType;
// All member data are mutable to allow for access methods to be const
#ifdef USE_SINGLE_ACCESSOR_KEY // 44 bytes total
@@ -5153,19 +6142,19 @@ class ReadAccessor<BuildT, 0, 1, 2>
#else // 68 bytes total
mutable CoordT mKeys[3]; // 3*3*4 = 36 bytes
#endif
- mutable const RootT* mRoot;
- mutable const void* mNode[3]; // 4*8 = 32 bytes
+ mutable __global__ const RootT* mRoot;
+ mutable __global__ const void* mNode[3]; // 4*8 = 32 bytes
public:
using ValueType = ValueT;
using CoordType = CoordT;
- static const int CacheLevels = 3;
+ static __constant__ const int CacheLevels = 3;
using NodeInfo = typename ReadAccessor<ValueT, -1, -1, -1>::NodeInfo;
/// @brief Constructor from a root node
- __hostdev__ ReadAccessor(const RootT& root)
+ __hostdev__ ReadAccessor(__global__ const RootT& root) __local__
#ifdef USE_SINGLE_ACCESSOR_KEY
: mKey(CoordType::max())
#else
@@ -5177,35 +6166,38 @@ public:
}
/// @brief Constructor from a grid
- __hostdev__ ReadAccessor(const GridT& grid) : ReadAccessor(grid.tree().root()) {}
+ __hostdev__ ReadAccessor(__global__ const GridT& grid) __local__ : ReadAccessor(grid.tree().root()) {}
/// @brief Constructor from a tree
- __hostdev__ ReadAccessor(const TreeT& tree) : ReadAccessor(tree.root()) {}
+ __hostdev__ ReadAccessor(__global__ const TreeT& tree) __local__ : ReadAccessor(tree.root()) {}
- __hostdev__ const RootT& root() const { return *mRoot; }
+ __hostdev__ __global__ const RootT& root() const __global__ { return *mRoot; }
+#if defined(__KERNEL_METAL__)
+ __hostdev__ __global__ const RootT& root() const __local__ { return *mRoot; }
+#endif
/// @brief Defaults constructors
- ReadAccessor(const ReadAccessor&) = default;
- ~ReadAccessor() = default;
- ReadAccessor& operator=(const ReadAccessor&) = default;
+ ReadAccessor(__local__ const ReadAccessor&) __local__ = default;
+ ~ReadAccessor() __global__ = default;
+ __global__ ReadAccessor& operator=(__global__ const ReadAccessor&) __global__ = default;
/// @brief Return a const point to the cached node of the specified type
///
/// @warning The return value could be NULL.
template<typename NodeT>
- __hostdev__ const NodeT* getNode() const
+ __hostdev__ __global__ const NodeT* getNode() const __global__
{
using T = typename NodeTrait<TreeT, NodeT::LEVEL>::type;
static_assert(is_same<T, NodeT>::value, "ReadAccessor::getNode: Invalid node type");
- return reinterpret_cast<const T*>(mNode[NodeT::LEVEL]);
+ return reinterpret_cast<__global__ const T*>(mNode[NodeT::LEVEL]);
}
template <int LEVEL>
- __hostdev__ const typename NodeTrait<TreeT, LEVEL>::type* getNode() const
+ __hostdev__ __global__ const typename NodeTrait<TreeT, LEVEL>::type* getNode() const
{
using T = typename NodeTrait<TreeT, LEVEL>::type;
static_assert(LEVEL>=0 && LEVEL<=2, "ReadAccessor::getNode: Invalid node type");
- return reinterpret_cast<const T*>(mNode[LEVEL]);
+ return reinterpret_cast<__global__ const T*>(mNode[LEVEL]);
}
@@ -5222,7 +6214,7 @@ public:
#ifdef USE_SINGLE_ACCESSOR_KEY
template<typename NodeT>
- __hostdev__ bool isCached(CoordValueType dirty) const
+ __hostdev__ bool isCached(CoordValueType dirty) const __global__
{
if (!mNode[NodeT::LEVEL])
return false;
@@ -5233,128 +6225,229 @@ public:
return true;
}
- __hostdev__ CoordValueType computeDirty(const CoordType& ijk) const
+ __hostdev__ CoordValueType computeDirty(const CoordType& ijk) const __global__
{
return (ijk[0] ^ mKey[0]) | (ijk[1] ^ mKey[1]) | (ijk[2] ^ mKey[2]);
}
#else
template<typename NodeT>
- __hostdev__ bool isCached(const CoordType& ijk) const
+ __hostdev__ bool isCached(__global__ const CoordType& ijk) const __global__
{
return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2];
}
+#if defined(__KERNEL_METAL__)
+ template<typename NodeT>
+ __hostdev__ bool isCached(__local__ const CoordType& ijk) const __global__
+ {
+ return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2];
+ }
+ template<typename NodeT>
+ __hostdev__ bool isCached(__local__ const CoordType& ijk) const __local__
+ {
+ return (ijk[0] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][0] && (ijk[1] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][1] && (ijk[2] & int32_t(~NodeT::MASK)) == mKeys[NodeT::LEVEL][2];
+ }
+#endif // __KERNEL_METAL__
#endif
- __hostdev__ ValueType getValue(const CoordType& ijk) const
+ __hostdev__ ValueType getValue(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->getValue(ijk);
+ return ((__global__ LeafT*)mNode[0])->getValue(ijk);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->getValueAndCache(ijk, *this);
+ return ((__global__ NodeT1*)mNode[1])->getValueAndCache(ijk, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->getValueAndCache(ijk, *this);
+ return ((__global__ NodeT2*)mNode[2])->getValueAndCache(ijk, *this);
}
return mRoot->getValueAndCache(ijk, *this);
}
- __hostdev__ ValueType operator()(const CoordType& ijk) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __global__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __local__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->getValue(ijk);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->getValueAndCache(ijk, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->getValueAndCache(ijk, *this);
+ }
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+ __hostdev__ ValueType getValue(__local__ const CoordType& ijk) const __local__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __local__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->getValue(ijk);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->getValueAndCache(ijk, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->getValueAndCache(ijk, *this);
+ }
+ return mRoot->getValueAndCache(ijk, *this);
+ }
+#endif // __KERNEL_METAL__
+
+ __hostdev__ ValueType operator()(__global__ const CoordType& ijk) const __global__
{
return this->getValue(ijk);
}
- __hostdev__ ValueType operator()(int i, int j, int k) const
+ __hostdev__ ValueType operator()(int i, int j, int k) const __global__
{
return this->getValue(CoordType(i,j,k));
}
- __hostdev__ NodeInfo getNodeInfo(const CoordType& ijk) const
+ __hostdev__ NodeInfo getNodeInfo(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->getNodeInfoAndCache(ijk, *this);
+ return ((__global__ LeafT*)mNode[0])->getNodeInfoAndCache(ijk, *this);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->getNodeInfoAndCache(ijk, *this);
+ return ((__global__ NodeT1*)mNode[1])->getNodeInfoAndCache(ijk, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->getNodeInfoAndCache(ijk, *this);
+ return ((__global__ NodeT2*)mNode[2])->getNodeInfoAndCache(ijk, *this);
}
return mRoot->getNodeInfoAndCache(ijk, *this);
}
- __hostdev__ bool isActive(const CoordType& ijk) const
+ __hostdev__ bool isActive(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->isActive(ijk);
+ return ((__global__ LeafT*)mNode[0])->isActive(ijk);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->isActiveAndCache(ijk, *this);
+ return ((__global__ NodeT1*)mNode[1])->isActiveAndCache(ijk, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->isActiveAndCache(ijk, *this);
+ return ((__global__ NodeT2*)mNode[2])->isActiveAndCache(ijk, *this);
}
return mRoot->isActiveAndCache(ijk, *this);
}
-
- __hostdev__ bool probeValue(const CoordType& ijk, ValueType& v) const
+#if defined(__KERNEL_METAL__)
+ __hostdev__ bool isActive(__local__ const CoordType& ijk) const __local__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __local__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->probeValue(ijk, v);
+ return ((__global__ LeafT*)mNode[0])->isActive(ijk);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->probeValueAndCache(ijk, v, *this);
+ return ((__global__ NodeT1*)mNode[1])->isActiveAndCache(ijk, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->probeValueAndCache(ijk, v, *this);
+ return ((__global__ NodeT2*)mNode[2])->isActiveAndCache(ijk, *this);
+ }
+ return mRoot->isActiveAndCache(ijk, *this);
+ }
+#endif
+
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ ValueType& v) const __global__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __global__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->probeValue(ijk, v);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->probeValueAndCache(ijk, v, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->probeValueAndCache(ijk, v, *this);
}
return mRoot->probeValueAndCache(ijk, v, *this);
}
- __hostdev__ const LeafT* probeLeaf(const CoordType& ijk) const
+ __hostdev__ __global__ const LeafT* probeLeaf(__global__ const CoordType& ijk) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0]);
+ return ((__global__ LeafT*)mNode[0]);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->probeLeafAndCache(ijk, *this);
+ return ((__global__ NodeT1*)mNode[1])->probeLeafAndCache(ijk, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->probeLeafAndCache(ijk, *this);
+ return ((__global__ NodeT2*)mNode[2])->probeLeafAndCache(ijk, *this);
}
return mRoot->probeLeafAndCache(ijk, *this);
}
template<typename RayT>
- __hostdev__ uint32_t getDim(const CoordType& ijk, const RayT& ray) const
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __global__ const RayT& ray) const __global__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
const CoordValueType dirty = this->computeDirty(ijk);
#else
- auto&& dirty = ijk;
+ __global__ auto&& dirty = ijk;
#endif
if (this->isCached<LeafT>(dirty)) {
- return ((LeafT*)mNode[0])->getDimAndCache(ijk, ray, *this);
+ return ((__global__ LeafT*)mNode[0])->getDimAndCache(ijk, ray, *this);
} else if (this->isCached<NodeT1>(dirty)) {
- return ((NodeT1*)mNode[1])->getDimAndCache(ijk, ray, *this);
+ return ((__global__ NodeT1*)mNode[1])->getDimAndCache(ijk, ray, *this);
} else if (this->isCached<NodeT2>(dirty)) {
- return ((NodeT2*)mNode[2])->getDimAndCache(ijk, ray, *this);
+ return ((__global__ NodeT2*)mNode[2])->getDimAndCache(ijk, ray, *this);
}
return mRoot->getDimAndCache(ijk, ray, *this);
}
+#if defined(__KERNEL_METAL__)
+ template<typename RayT>
+ __hostdev__ uint32_t getDim(__global__ const CoordType& ijk, __local__ const RayT& ray) const __global__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __global__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->getDimAndCache(ijk, ray, *this);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->getDimAndCache(ijk, ray, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->getDimAndCache(ijk, ray, *this);
+ }
+ return mRoot->getDimAndCache(ijk, ray, *this);
+ }
+ template<typename RayT>
+ __hostdev__ uint32_t getDim(__local__ const CoordType& ijk, __local__ const RayT& ray) const __local__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ const CoordValueType dirty = this->computeDirty(ijk);
+#else
+ __local__ auto&& dirty = ijk;
+#endif
+ if (this->isCached<LeafT>(dirty)) {
+ return ((__global__ LeafT*)mNode[0])->getDimAndCache(ijk, ray, *this);
+ } else if (this->isCached<NodeT1>(dirty)) {
+ return ((__global__ NodeT1*)mNode[1])->getDimAndCache(ijk, ray, *this);
+ } else if (this->isCached<NodeT2>(dirty)) {
+ return ((__global__ NodeT2*)mNode[2])->getDimAndCache(ijk, ray, *this);
+ }
+ return mRoot->getDimAndCache(ijk, ray, *this);
+ }
+#endif // __KERNEL_METAL__
private:
/// @brief Allow nodes to insert themselves into the cache.
@@ -5367,7 +6460,7 @@ private:
/// @brief Inserts a leaf node and key pair into this ReadAccessor
template<typename NodeT>
- __hostdev__ void insert(const CoordType& ijk, const NodeT* node) const
+ __hostdev__ void insert(__global__ const CoordType& ijk, __global__ const NodeT* node) const __local__
{
#ifdef USE_SINGLE_ACCESSOR_KEY
mKey = ijk;
@@ -5376,6 +6469,28 @@ private:
#endif
mNode[NodeT::LEVEL] = node;
}
+#if defined(__KERNEL_METAL__)
+ template<typename NodeT>
+ __hostdev__ void insert(__local__ const CoordType& ijk, __global__ const NodeT* node) const __local__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ mKey = ijk;
+#else
+ mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK;
+#endif
+ mNode[NodeT::LEVEL] = node;
+ }
+ template<typename NodeT>
+ __hostdev__ void insert(__local__ const CoordType& ijk, __global__ const NodeT* node) const __global__
+ {
+#ifdef USE_SINGLE_ACCESSOR_KEY
+ mKey = ijk;
+#else
+ mKeys[NodeT::LEVEL] = ijk & ~NodeT::MASK;
+#endif
+ mNode[NodeT::LEVEL] = node;
+ }
+#endif // __KERNEL_METAL__
}; // ReadAccessor<BuildT, 0, 1, 2>
//////////////////////////////////////////////////
@@ -5393,19 +6508,19 @@ private:
/// createAccessor<0,1,2>(grid): Caching of all nodes at all tree levels
template <int LEVEL0 = -1, int LEVEL1 = -1, int LEVEL2 = -1, typename ValueT = float>
-ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(const NanoGrid<ValueT> &grid)
+ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(__global__ const NanoGrid<ValueT> &grid)
{
return ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2>(grid);
}
template <int LEVEL0 = -1, int LEVEL1 = -1, int LEVEL2 = -1, typename ValueT = float>
-ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(const NanoTree<ValueT> &tree)
+ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(__global__ const NanoTree<ValueT> &tree)
{
return ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2>(tree);
}
template <int LEVEL0 = -1, int LEVEL1 = -1, int LEVEL2 = -1, typename ValueT = float>
-ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(const NanoRoot<ValueT> &root)
+ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2> createAccessor(__global__ const NanoRoot<ValueT> &root)
{
return ReadAccessor<ValueT, LEVEL0, LEVEL1, LEVEL2>(root);
}
@@ -5424,52 +6539,59 @@ class GridMetaData
// memory-layout of the data structure and the reasons why certain methods are safe
// to call and others are not!
using GridT = NanoGrid<int>;
- __hostdev__ const GridT& grid() const { return *reinterpret_cast<const GridT*>(this); }
+ __hostdev__ __global__ const GridT& grid() const __global__ { return *reinterpret_cast<__global__ const GridT*>(this); }
public:
- __hostdev__ bool isValid() const { return this->grid().isValid(); }
- __hostdev__ uint64_t gridSize() const { return this->grid().gridSize(); }
- __hostdev__ uint32_t gridIndex() const { return this->grid().gridIndex(); }
- __hostdev__ uint32_t gridCount() const { return this->grid().gridCount(); }
- __hostdev__ const char* shortGridName() const { return this->grid().shortGridName(); }
- __hostdev__ GridType gridType() const { return this->grid().gridType(); }
- __hostdev__ GridClass gridClass() const { return this->grid().gridClass(); }
- __hostdev__ bool isLevelSet() const { return this->grid().isLevelSet(); }
- __hostdev__ bool isFogVolume() const { return this->grid().isFogVolume(); }
- __hostdev__ bool isPointIndex() const { return this->grid().isPointIndex(); }
- __hostdev__ bool isPointData() const { return this->grid().isPointData(); }
- __hostdev__ bool isMask() const { return this->grid().isMask(); }
- __hostdev__ bool isStaggered() const { return this->grid().isStaggered(); }
- __hostdev__ bool isUnknown() const { return this->grid().isUnknown(); }
- __hostdev__ const Map& map() const { return this->grid().map(); }
- __hostdev__ const BBox<Vec3R>& worldBBox() const { return this->grid().worldBBox(); }
- __hostdev__ const BBox<Coord>& indexBBox() const { return this->grid().indexBBox(); }
- __hostdev__ Vec3R voxelSize() const { return this->grid().voxelSize(); }
- __hostdev__ int blindDataCount() const { return this->grid().blindDataCount(); }
- __hostdev__ const GridBlindMetaData& blindMetaData(uint32_t n) const { return this->grid().blindMetaData(n); }
- __hostdev__ uint64_t activeVoxelCount() const { return this->grid().activeVoxelCount(); }
- __hostdev__ const uint32_t& activeTileCount(uint32_t level) const { return this->grid().tree().activeTileCount(level); }
- __hostdev__ uint32_t nodeCount(uint32_t level) const { return this->grid().tree().nodeCount(level); }
- __hostdev__ uint64_t checksum() const { return this->grid().checksum(); }
- __hostdev__ bool isEmpty() const { return this->grid().isEmpty(); }
- __hostdev__ Version version() const { return this->grid().version(); }
+ __hostdev__ bool isValid() const __global__ { return this->grid().isValid(); }
+ __hostdev__ uint64_t gridSize() const __global__ { return this->grid().gridSize(); }
+ __hostdev__ uint32_t gridIndex() const __global__ { return this->grid().gridIndex(); }
+ __hostdev__ uint32_t gridCount() const __global__ { return this->grid().gridCount(); }
+ __hostdev__ __global__ const char* shortGridName() const __global__ { return this->grid().shortGridName(); }
+ __hostdev__ GridType gridType() const __global__ { return this->grid().gridType(); }
+ __hostdev__ GridClass gridClass() const __global__ { return this->grid().gridClass(); }
+ __hostdev__ bool isLevelSet() const __global__ { return this->grid().isLevelSet(); }
+ __hostdev__ bool isFogVolume() const __global__ { return this->grid().isFogVolume(); }
+ __hostdev__ bool isPointIndex() const __global__ { return this->grid().isPointIndex(); }
+ __hostdev__ bool isPointData() const __global__ { return this->grid().isPointData(); }
+ __hostdev__ bool isMask() const __global__ { return this->grid().isMask(); }
+ __hostdev__ bool isStaggered() const __global__ { return this->grid().isStaggered(); }
+ __hostdev__ bool isUnknown() const __global__ { return this->grid().isUnknown(); }
+ __hostdev__ __global__ const Map& map() const __global__ { return this->grid().map(); }
+ __hostdev__ __global__ const BBox<Vec3R>& worldBBox() const __global__ { return this->grid().worldBBox(); }
+ __hostdev__ __global__ const BBox<Coord>& indexBBox() const __global__ { return this->grid().indexBBox(); }
+ __hostdev__ Vec3R voxelSize() const __global__ { return this->grid().voxelSize(); }
+ __hostdev__ int blindDataCount() const __global__ { return this->grid().blindDataCount(); }
+ __hostdev__ __global__ const GridBlindMetaData& blindMetaData(uint32_t n) const __global__ { return this->grid().blindMetaData(n); }
+ __hostdev__ uint64_t activeVoxelCount() const __global__ { return this->grid().activeVoxelCount(); }
+ __hostdev__ __global__ const uint32_t& activeTileCount(uint32_t level) const __global__ { return this->grid().tree().activeTileCount(level); }
+ __hostdev__ uint32_t nodeCount(uint32_t level) const __global__ { return this->grid().tree().nodeCount(level); }
+ __hostdev__ uint64_t checksum() const __global__ { return this->grid().checksum(); }
+ __hostdev__ bool isEmpty() const __global__ { return this->grid().isEmpty(); }
+ __hostdev__ Version version() const __global__ { return this->grid().version(); }
}; // GridMetaData
/// @brief Class to access points at a specific voxel location
template<typename AttT>
-class PointAccessor : public DefaultReadAccessor<uint32_t>
+class PointAccessor
+#if !defined(__KERNEL_METAL__)
+ : public DefaultReadAccessor<uint32_t>
+#endif
{
+#if defined(__KERNEL_METAL__)
+ DefaultReadAccessor<uint32_t> AccT;
+#else
using AccT = DefaultReadAccessor<uint32_t>;
- const UInt32Grid* mGrid;
- const AttT* mData;
+#endif
+ const __global__ UInt32Grid* mGrid;
+ const __global__ AttT* mData;
public:
using LeafNodeType = typename NanoRoot<uint32_t>::LeafNodeType;
- PointAccessor(const UInt32Grid& grid)
+ PointAccessor(__global__ const UInt32Grid& grid) __local__
: AccT(grid.tree().root())
, mGrid(&grid)
- , mData(reinterpret_cast<const AttT*>(grid.blindData(0)))
+ , mData(reinterpret_cast<__global__ const AttT*>(grid.blindData(0)))
{
NANOVDB_ASSERT(grid.gridType() == GridType::UInt32);
NANOVDB_ASSERT((grid.gridClass() == GridClass::PointIndex && is_same<uint32_t, AttT>::value) ||
@@ -5478,7 +6600,7 @@ public:
}
/// @brief Return the total number of point in the grid and set the
/// iterators to the complete range of points.
- __hostdev__ uint64_t gridPoints(const AttT*& begin, const AttT*& end) const
+ __hostdev__ uint64_t gridPoints(__global__ const AttT*& begin, __global__ const AttT*& end) const __global__
{
const uint64_t count = mGrid->blindMetaData(0u).mElementCount;
begin = mData;
@@ -5488,9 +6610,9 @@ public:
/// @brief Return the number of points in the leaf node containing the coordinate @a ijk.
/// If this return value is larger than zero then the iterators @a begin and @a end
/// will point to all the attributes contained within that leaf node.
- __hostdev__ uint64_t leafPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const
+ __hostdev__ uint64_t leafPoints(__global__ const Coord& ijk, __global__ const AttT*& begin, __global__ const AttT*& end) const __global__
{
- auto* leaf = this->probeLeaf(ijk);
+ __global__ auto* leaf = this->probeLeaf(ijk);
if (leaf == nullptr) {
return 0;
}
@@ -5500,14 +6622,14 @@ public:
}
/// @brief get iterators over offsets to points at a specific voxel location
- __hostdev__ uint64_t voxelPoints(const Coord& ijk, const AttT*& begin, const AttT*& end) const
+ __hostdev__ uint64_t voxelPoints(__global__ const Coord& ijk, __global__ const AttT*& begin, __global__ const AttT*& end) const __global__
{
- auto* leaf = this->probeLeaf(ijk);
+ __global__ auto* leaf = this->probeLeaf(ijk);
if (leaf == nullptr)
return 0;
const uint32_t offset = LeafNodeType::CoordToOffset(ijk);
if (leaf->isActive(offset)) {
- auto* p = mData + leaf->minimum();
+ __global__ auto* p = mData + leaf->minimum();
begin = p + (offset == 0 ? 0 : leaf->getValue(offset - 1));
end = p + leaf->getValue(offset);
return end - begin;
@@ -5520,11 +6642,20 @@ public:
///
/// @note The ChannelT template parameter can be either const and non-const.
template<typename ChannelT>
-class ChannelAccessor : public DefaultReadAccessor<ValueIndex>
+class ChannelAccessor
+#if !defined (__KERNEL_METAL__)
+ : public DefaultReadAccessor<ValueIndex>
+#endif
{
+#if defined (__KERNEL_METAL__)
+ DefaultReadAccessor<ValueIndex> BaseT;
+#define BASE(v) BaseT.v
+#else
using BaseT = DefaultReadAccessor<ValueIndex>;
- const IndexGrid &mGrid;
- ChannelT *mChannel;
+#define BASE(v) BaseT::v
+#endif
+ __global__ const IndexGrid &mGrid;
+ __global__ ChannelT *mChannel;
public:
using ValueType = ChannelT;
@@ -5533,7 +6664,7 @@ public:
/// @brief Ctor from an IndexGrid and an integer ID of an internal channel
/// that is assumed to exist as blind data in the IndexGrid.
- __hostdev__ ChannelAccessor(const IndexGrid& grid, uint32_t channelID = 0u)
+ __hostdev__ ChannelAccessor(__global__ const IndexGrid& grid, uint32_t channelID = 0u)
: BaseT(grid.tree().root())
, mGrid(grid)
, mChannel(nullptr)
@@ -5544,7 +6675,7 @@ public:
}
/// @brief Ctor from an IndexGrid and an external channel
- __hostdev__ ChannelAccessor(const IndexGrid& grid, ChannelT *channelPtr)
+ __hostdev__ ChannelAccessor(__global__ const IndexGrid& grid, __global__ ChannelT *channelPtr)
: BaseT(grid.tree().root())
, mGrid(grid)
, mChannel(channelPtr)
@@ -5555,19 +6686,19 @@ public:
}
/// @brief Return a const reference to the IndexGrid
- __hostdev__ const IndexGrid &grid() const {return mGrid;}
+ __hostdev__ __global__ const IndexGrid &grid() const {return mGrid;}
/// @brief Return a const reference to the tree of the IndexGrid
- __hostdev__ const IndexTree &tree() const {return mGrid.tree();}
+ __hostdev__ __global__ const IndexTree &tree() const {return mGrid.tree();}
/// @brief Return a vector of the axial voxel sizes
- __hostdev__ const Vec3R& voxelSize() const { return mGrid.voxelSize(); }
+ __hostdev__ __global__ const Vec3R& voxelSize() const { return mGrid.voxelSize(); }
/// @brief Return total number of values indexed by the IndexGrid
- __hostdev__ const uint64_t& valueCount() const { return mGrid.valueCount(); }
+ __hostdev__ uint64_t valueCount() const { return mGrid.valueCount(); }
/// @brief Change to an external channel
- __hostdev__ void setChannel(ChannelT *channelPtr)
+ __hostdev__ void setChannel(__global__ ChannelT *channelPtr)
{
mChannel = channelPtr;
NANOVDB_ASSERT(mChannel);
@@ -5577,23 +6708,24 @@ public:
/// in the IndexGrid.
__hostdev__ void setChannel(uint32_t channelID)
{
- this->setChannel(reinterpret_cast<ChannelT*>(const_cast<void*>(mGrid.blindData(channelID))));
+ this->setChannel(reinterpret_cast<__global__ ChannelT*>(const_cast<__global__ void*>(mGrid.blindData(channelID))));
}
/// @brief Return the linear offset into a channel that maps to the specified coordinate
- __hostdev__ uint64_t getIndex(const Coord& ijk) const {return BaseT::getValue(ijk);}
- __hostdev__ uint64_t idx(int i, int j, int k) const {return BaseT::getValue(Coord(i,j,k));}
+ __hostdev__ uint64_t getIndex(__global__ const Coord& ijk) const {return BASE(getValue)(ijk);}
+ __hostdev__ uint64_t idx(int i, int j, int k) const {return BASE(getValue)(Coord(i,j,k));}
/// @brief Return the value from a cached channel that maps to the specified coordinate
- __hostdev__ ChannelT& getValue(const Coord& ijk) const {return mChannel[BaseT::getValue(ijk)];}
- __hostdev__ ChannelT& operator()(const Coord& ijk) const {return this->getValue(ijk);}
- __hostdev__ ChannelT& operator()(int i, int j, int k) const {return this->getValue(Coord(i,j,k));}
+ __hostdev__ __global__ ChannelT& getValue(__global__ const Coord& ijk) const {return mChannel[BASE(getValue)(ijk)];}
+ __hostdev__ __global__ ChannelT& operator()(__global__ const Coord& ijk) const {return this->getValue(ijk);}
+ __hostdev__ __global__ ChannelT& operator()(int i, int j, int k) const {return this->getValue(Coord(i,j,k));}
/// @brief return the state and updates the value of the specified voxel
- __hostdev__ bool probeValue(const CoordType& ijk, typename remove_const<ChannelT>::type &v) const
+ using CoordType = DefaultReadAccessor<ValueIndex>::CoordType;
+ __hostdev__ bool probeValue(__global__ const CoordType& ijk, __global__ typename remove_const<ChannelT>::type &v) const
{
uint64_t idx;
- const bool isActive = BaseT::probeValue(ijk, idx);
+ const bool isActive = BASE(probeValue)(ijk, idx);
v = mChannel[idx];
return isActive;
}
@@ -5601,7 +6733,7 @@ public:
///
/// @note The template parameter can be either const or non-const
template <typename T>
- __hostdev__ T& getValue(const Coord& ijk, T* channelPtr) const {return channelPtr[BaseT::getValue(ijk)];}
+ __hostdev__ __global__ T& getValue(__global__ const Coord& ijk, __global__ T* channelPtr) const {return channelPtr[BASE(getValue)(ijk)];}
}; // ChannelAccessor
@@ -5643,6 +6775,7 @@ namespace io {
/// @throw std::invalid_argument if buffer does not point to a valid NanoVDB grid.
///
/// @warning This is pretty ugly code that involves lots of pointer and bit manipulations - not for the faint of heart :)
+#if !defined(__KERNEL_METAL__)
template <typename StreamT>// StreamT class must support: "void write(char*, size_t)"
void writeUncompressedGrid(StreamT &os, const void *buffer)
{
@@ -5768,7 +6901,7 @@ VecT<GridHandleT> readUncompressedGrids(const char *fileName, const typename Gri
}
return readUncompressedGrids<GridHandleT, StreamT, VecT>(is, buffer);
}// readUncompressedGrids
-
+#endif // #if !defined(__KERNEL_METAL__)
} // namespace io
#endif// if !defined(__CUDA_ARCH__) && !defined(__HIP__)
diff --git a/nanovdb/nanovdb/util/SampleFromVoxels.h b/nanovdb/nanovdb/util/SampleFromVoxels.h
index e779d66..e2f9283 100644
--- a/nanovdb/nanovdb/util/SampleFromVoxels.h
+++ b/nanovdb/nanovdb/util/SampleFromVoxels.h
@@ -1,983 +1,1120 @@
-// Copyright Contributors to the OpenVDB Project
-// SPDX-License-Identifier: MPL-2.0
-
-//////////////////////////////////////////////////////////////////////////
-///
-/// @file SampleFromVoxels.h
-///
-/// @brief NearestNeighborSampler, TrilinearSampler, TriquadraticSampler and TricubicSampler
-///
-/// @note These interpolators employ internal caching for better performance when used repeatedly
-/// in the same voxel location, so try to reuse an instance of these classes more than once.
-///
-/// @warning While all the interpolators defined below work with both scalars and vectors
-/// values (e.g. float and Vec3<float>) TrilinarSampler::zeroCrossing and
-/// Trilinear::gradient will only compile with floating point value types.
-///
-/// @author Ken Museth
-///
-///////////////////////////////////////////////////////////////////////////
-
-#ifndef NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
-#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
-
-// Only define __hostdev__ when compiling as NVIDIA CUDA
-#if defined(__CUDACC__) || defined(__HIP__)
-#define __hostdev__ __host__ __device__
-#else
-#include <cmath> // for floor
-#define __hostdev__
-#endif
-
-namespace nanovdb {
-
-// Forward declaration of sampler with specific polynomial orders
-template<typename TreeT, int Order, bool UseCache = true>
-class SampleFromVoxels;
-
-/// @brief Factory free-function for a sampler of specific polynomial orders
-///
-/// @details This allows for the compact syntax:
-/// @code
-/// auto acc = grid.getAccessor();
-/// auto smp = nanovdb::createSampler<1>( acc );
-/// @endcode
-template<int Order, typename TreeOrAccT, bool UseCache = true>
-__hostdev__ SampleFromVoxels<TreeOrAccT, Order, UseCache> createSampler(const TreeOrAccT& acc)
-{
- return SampleFromVoxels<TreeOrAccT, Order, UseCache>(acc);
-}
-
-/// @brief Utility function that returns the Coord of the round-down of @a xyz
-/// and redefined @xyz as the fractional part, ie xyz-in = return-value + xyz-out
-template<typename CoordT, typename RealT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Floor(Vec3T<RealT>& xyz);
-
-/// @brief Template specialization of Floor for Vec3<float>
-template<typename CoordT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Floor(Vec3T<float>& xyz)
-{
- const float ijk[3] = {floorf(xyz[0]), floorf(xyz[1]), floorf(xyz[2])};
- xyz[0] -= ijk[0];
- xyz[1] -= ijk[1];
- xyz[2] -= ijk[2];
- return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
-}
-
-/// @brief Template specialization of Floor for Vec3<float>
-template<typename CoordT, template<typename> class Vec3T>
-__hostdev__ inline CoordT Floor(Vec3T<double>& xyz)
-{
- const double ijk[3] = {floor(xyz[0]), floor(xyz[1]), floor(xyz[2])};
- xyz[0] -= ijk[0];
- xyz[1] -= ijk[1];
- xyz[2] -= ijk[2];
- return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
-}
-
-// ------------------------------> NearestNeighborSampler <--------------------------------------
-
-/// @brief Nearest neighbor, i.e. zero order, interpolator with caching
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 0, true>
-{
-public:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- static const int ORDER = 0;
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc)
- : mAcc(acc)
- , mPos(CoordT::max())
- {
- }
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @note xyz is in index space space
- template<typename Vec3T>
- inline __hostdev__ ValueT operator()(const Vec3T& xyz) const;
-
- inline __hostdev__ ValueT operator()(const CoordT& ijk) const;
-
-private:
- const TreeOrAccT& mAcc;
- mutable CoordT mPos;
- mutable ValueT mVal; // private cache
-}; // SampleFromVoxels<TreeOrAccT, 0, true>
-
-/// @brief Nearest neighbor, i.e. zero order, interpolator without caching
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 0, false>
-{
-public:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
- static const int ORDER = 0;
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc)
- : mAcc(acc)
- {
- }
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @note xyz is in index space space
- template<typename Vec3T>
- inline __hostdev__ ValueT operator()(const Vec3T& xyz) const;
-
- inline __hostdev__ ValueT operator()(const CoordT& ijk) const { return mAcc.getValue(ijk);}
-
-private:
- const TreeOrAccT& mAcc;
-}; // SampleFromVoxels<TreeOrAccT, 0, false>
-
-template<typename TreeOrAccT>
-template<typename Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
-{
- const CoordT ijk = Round<CoordT>(xyz);
- if (ijk != mPos) {
- mPos = ijk;
- mVal = mAcc.getValue(mPos);
- }
- return mVal;
-}
-
-template<typename TreeOrAccT>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
-{
- if (ijk != mPos) {
- mPos = ijk;
- mVal = mAcc.getValue(mPos);
- }
- return mVal;
-}
-
-template<typename TreeOrAccT>
-template<typename Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
-{
- return mAcc.getValue(Round<CoordT>(xyz));
-}
-
-// ------------------------------> TrilinearSampler <--------------------------------------
-
-/// @brief Tri-linear sampler, i.e. first order, interpolator
-template<typename TreeOrAccT>
-class TrilinearSampler
-{
-protected:
- const TreeOrAccT& mAcc;
-
-public:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
- static const int ORDER = 1;
-
- /// @brief Protected constructor from a Tree or ReadAccessor
- __hostdev__ TrilinearSampler(const TreeOrAccT& acc) : mAcc(acc) {}
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @brief Extract the stencil of 8 values
- inline __hostdev__ void stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const;
-
- template<typename RealT, template<typename...> class Vec3T>
- static inline __hostdev__ ValueT sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2]);
-
- template<typename RealT, template<typename...> class Vec3T>
- static inline __hostdev__ Vec3T<ValueT> gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2]);
-
- static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[2][2][2]);
-}; // TrilinearSamplerBase
-
-template<typename TreeOrAccT>
-__hostdev__ void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
-{
- v[0][0][0] = mAcc.getValue(ijk); // i, j, k
-
- ijk[2] += 1;
- v[0][0][1] = mAcc.getValue(ijk); // i, j, k + 1
-
- ijk[1] += 1;
- v[0][1][1] = mAcc.getValue(ijk); // i, j+1, k + 1
-
- ijk[2] -= 1;
- v[0][1][0] = mAcc.getValue(ijk); // i, j+1, k
-
- ijk[0] += 1;
- ijk[1] -= 1;
- v[1][0][0] = mAcc.getValue(ijk); // i+1, j, k
-
- ijk[2] += 1;
- v[1][0][1] = mAcc.getValue(ijk); // i+1, j, k + 1
-
- ijk[1] += 1;
- v[1][1][1] = mAcc.getValue(ijk); // i+1, j+1, k + 1
-
- ijk[2] -= 1;
- v[1][1][0] = mAcc.getValue(ijk); // i+1, j+1, k
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
-{
-#if 0
- auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
- //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b
-#else
- auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
-#endif
- return lerp(lerp(lerp(v[0][0][0], v[0][0][1], uvw[2]), lerp(v[0][1][0], v[0][1][1], uvw[2]), uvw[1]),
- lerp(lerp(v[1][0][0], v[1][0][1], uvw[2]), lerp(v[1][1][0], v[1][1][1], uvw[2]), uvw[1]),
- uvw[0]);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
-{
- static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::gradient requires a floating-point type");
-#if 0
- auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
- //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b
-#else
- auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
-#endif
-
- ValueT D[4] = {v[0][0][1] - v[0][0][0], v[0][1][1] - v[0][1][0], v[1][0][1] - v[1][0][0], v[1][1][1] - v[1][1][0]};
-
- // Z component
- Vec3T<ValueT> grad(0, 0, lerp(lerp(D[0], D[1], uvw[1]), lerp(D[2], D[3], uvw[1]), uvw[0]));
-
- const ValueT w = ValueT(uvw[2]);
- D[0] = v[0][0][0] + D[0] * w;
- D[1] = v[0][1][0] + D[1] * w;
- D[2] = v[1][0][0] + D[2] * w;
- D[3] = v[1][1][0] + D[3] * w;
-
- // X component
- grad[0] = lerp(D[2], D[3], uvw[1]) - lerp(D[0], D[1], uvw[1]);
-
- // Y component
- grad[1] = lerp(D[1] - D[0], D[3] - D[2], uvw[0]);
-
- return grad;
-}
-
-template<typename TreeOrAccT>
-__hostdev__ bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
-{
- static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
- const bool less = v[0][0][0] < ValueT(0);
- return (less ^ (v[0][0][1] < ValueT(0))) ||
- (less ^ (v[0][1][1] < ValueT(0))) ||
- (less ^ (v[0][1][0] < ValueT(0))) ||
- (less ^ (v[1][0][0] < ValueT(0))) ||
- (less ^ (v[1][0][1] < ValueT(0))) ||
- (less ^ (v[1][1][1] < ValueT(0))) ||
- (less ^ (v[1][1][0] < ValueT(0)));
-}
-
-/// @brief Template specialization that does not use caching of stencil points
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 1, false> : public TrilinearSampler<TreeOrAccT>
-{
- using BaseT = TrilinearSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
-public:
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {}
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- /// @note ijk is in index space space
- __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
-
- /// @brief Return the gradient in index space.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ Vec3T<ValueT> gradient(Vec3T<RealT> xyz) const;
-
- /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
-
-}; // SampleFromVoxels<TreeOrAccT, 1, false>
-
-/// @brief Template specialization with caching of stencil values
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 1, true> : public TrilinearSampler<TreeOrAccT>
-{
- using BaseT = TrilinearSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- mutable CoordT mPos;
- mutable ValueT mVal[2][2][2];
-
- template<typename RealT, template<typename...> class Vec3T>
- __hostdev__ void cache(Vec3T<RealT>& xyz) const;
-public:
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){}
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- // @note ijk is in index space space
- __hostdev__ ValueT operator()(const CoordT &ijk) const;
-
- /// @brief Return the gradient in index space.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ Vec3T<ValueT> gradient(Vec3T<RealT> xyz) const;
-
- /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
-
- /// @brief Return true if the cached tri-linear stencil has a zero crossing.
- ///
- /// @warning Will only compile with floating point value types
- __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); }
-
-}; // SampleFromVoxels<TreeOrAccT, 1, true>
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::sample(xyz, mVal);
-}
-
-template<typename TreeOrAccT>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
-{
- return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::gradient(xyz, mVal);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, true>::zeroCrossing(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::zeroCrossing(mVal);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
-{
- CoordT ijk = Floor<CoordT>(xyz);
- if (ijk != mPos) {
- mPos = ijk;
- BaseT::stencil(ijk, mVal);
- }
-}
-
-#if 0
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
-{
- ValueT val[2][2][2];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::sample(xyz, val);
-}
-
-#else
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
-{
- auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
-
- CoordT coord = Floor<CoordT>(xyz);
-
- ValueT vx, vx1, vy, vy1, vz, vz1;
-
- vz = BaseT::mAcc.getValue(coord);
- coord[2] += 1;
- vz1 = BaseT::mAcc.getValue(coord);
- vy = lerp(vz, vz1, xyz[2]);
-
- coord[1] += 1;
-
- vz1 = BaseT::mAcc.getValue(coord);
- coord[2] -= 1;
- vz = BaseT::mAcc.getValue(coord);
- vy1 = lerp(vz, vz1, xyz[2]);
-
- vx = lerp(vy, vy1, xyz[1]);
-
- coord[0] += 1;
-
- vz = BaseT::mAcc.getValue(coord);
- coord[2] += 1;
- vz1 = BaseT::mAcc.getValue(coord);
- vy1 = lerp(vz, vz1, xyz[2]);
-
- coord[1] -= 1;
-
- vz1 = BaseT::mAcc.getValue(coord);
- coord[2] -= 1;
- vz = BaseT::mAcc.getValue(coord);
- vy = lerp(vz, vz1, xyz[2]);
-
- vx1 = lerp(vy, vy1, xyz[1]);
-
- return lerp(vx, vx1, xyz[0]);
-}
-#endif
-
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ inline Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
-{
- ValueT val[2][2][2];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::gradient(xyz, val);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
-{
- ValueT val[2][2][2];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::zeroCrossing(val);
-}
-
-// ------------------------------> TriquadraticSampler <--------------------------------------
-
-/// @brief Tri-quadratic sampler, i.e. second order, interpolator
-template<typename TreeOrAccT>
-class TriquadraticSampler
-{
-protected:
- const TreeOrAccT& mAcc;
-
-public:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
- static const int ORDER = 1;
-
- /// @brief Protected constructor from a Tree or ReadAccessor
- __hostdev__ TriquadraticSampler(const TreeOrAccT& acc) : mAcc(acc) {}
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @brief Extract the stencil of 27 values
- inline __hostdev__ void stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const;
-
- template<typename RealT, template<typename...> class Vec3T>
- static inline __hostdev__ ValueT sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3]);
-
- static inline __hostdev__ bool zeroCrossing(const ValueT (&v)[3][3][3]);
-}; // TriquadraticSamplerBase
-
-template<typename TreeOrAccT>
-__hostdev__ void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
-{
- CoordT p(ijk[0] - 1, 0, 0);
- for (int dx = 0; dx < 3; ++dx, ++p[0]) {
- p[1] = ijk[1] - 1;
- for (int dy = 0; dy < 3; ++dy, ++p[1]) {
- p[2] = ijk[2] - 1;
- for (int dz = 0; dz < 3; ++dz, ++p[2]) {
- v[dx][dy][dz] = mAcc.getValue(p);// extract the stencil of 27 values
- }
- }
- }
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
-{
- auto kernel = [](const ValueT* value, double weight)->ValueT {
- return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) +
- 0.5f * (value[2] - value[0])) + value[1];
- };
-
- ValueT vx[3];
- for (int dx = 0; dx < 3; ++dx) {
- ValueT vy[3];
- for (int dy = 0; dy < 3; ++dy) {
- vy[dy] = kernel(&v[dx][dy][0], uvw[2]);
- }//loop over y
- vx[dx] = kernel(vy, uvw[1]);
- }//loop over x
- return kernel(vx, uvw[0]);
-}
-
-template<typename TreeOrAccT>
-__hostdev__ bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
-{
- static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
- const bool less = v[0][0][0] < ValueT(0);
- for (int dx = 0; dx < 3; ++dx) {
- for (int dy = 0; dy < 3; ++dy) {
- for (int dz = 0; dz < 3; ++dz) {
- if (less ^ (v[dx][dy][dz] < ValueT(0))) return true;
- }
- }
- }
- return false;
-}
-
-/// @brief Template specialization that does not use caching of stencil points
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 2, false> : public TriquadraticSampler<TreeOrAccT>
-{
- using BaseT = TriquadraticSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-public:
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc) {}
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
-
- /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
-
-}; // SampleFromVoxels<TreeOrAccT, 2, false>
-
-/// @brief Template specialization with caching of stencil values
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 2, true> : public TriquadraticSampler<TreeOrAccT>
-{
- using BaseT = TriquadraticSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- mutable CoordT mPos;
- mutable ValueT mVal[3][3][3];
-
- template<typename RealT, template<typename...> class Vec3T>
- __hostdev__ void cache(Vec3T<RealT>& xyz) const;
-public:
-
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){}
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- inline __hostdev__ ValueT operator()(const CoordT &ijk) const;
-
- /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
- ///
- /// @warning Will only compile with floating point value types
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
-
- /// @brief Return true if the cached tri-linear stencil has a zero crossing.
- ///
- /// @warning Will only compile with floating point value types
- __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); }
-
-}; // SampleFromVoxels<TreeOrAccT, 2, true>
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::sample(xyz, mVal);
-}
-
-template<typename TreeOrAccT>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
-{
- return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, true>::zeroCrossing(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::zeroCrossing(mVal);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
-{
- CoordT ijk = Floor<CoordT>(xyz);
- if (ijk != mPos) {
- mPos = ijk;
- BaseT::stencil(ijk, mVal);
- }
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
-{
- ValueT val[3][3][3];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::sample(xyz, val);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
-{
- ValueT val[3][3][3];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, val);
- return BaseT::zeroCrossing(val);
-}
-
-// ------------------------------> TricubicSampler <--------------------------------------
-
-/// @brief Tri-cubic sampler, i.e. third order, interpolator.
-///
-/// @details See the following paper for implementation details:
-/// Lekien, F. and Marsden, J.: Tricubic interpolation in three dimensions.
-/// In: International Journal for Numerical Methods
-/// in Engineering (2005), No. 63, p. 455-471
-
-template<typename TreeOrAccT>
-class TricubicSampler
-{
-protected:
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- const TreeOrAccT& mAcc;
-
-public:
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ TricubicSampler(const TreeOrAccT& acc)
- : mAcc(acc)
- {
- }
-
- __hostdev__ const TreeOrAccT& accessor() const { return mAcc; }
-
- /// @brief Extract the stencil of 8 values
- inline __hostdev__ void stencil(const CoordT& ijk, ValueT (&c)[64]) const;
-
- template<typename RealT, template<typename...> class Vec3T>
- static inline __hostdev__ ValueT sample(const Vec3T<RealT> &uvw, const ValueT (&c)[64]);
-}; // TricubicSampler
-
-template<typename TreeOrAccT>
-__hostdev__ void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
-{
- auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; };
-
- // fetch 64 point stencil values
- for (int i = -1; i < 3; ++i) {
- for (int j = -1; j < 3; ++j) {
- fetch(i, j, -1) = mAcc.getValue(ijk + CoordT(i, j, -1));
- fetch(i, j, 0) = mAcc.getValue(ijk + CoordT(i, j, 0));
- fetch(i, j, 1) = mAcc.getValue(ijk + CoordT(i, j, 1));
- fetch(i, j, 2) = mAcc.getValue(ijk + CoordT(i, j, 2));
- }
- }
- const ValueT half(0.5), quarter(0.25), eighth(0.125);
- const ValueT X[64] = {// values of f(x,y,z) at the 8 corners (each from 1 stencil value).
- fetch(0, 0, 0),
- fetch(1, 0, 0),
- fetch(0, 1, 0),
- fetch(1, 1, 0),
- fetch(0, 0, 1),
- fetch(1, 0, 1),
- fetch(0, 1, 1),
- fetch(1, 1, 1),
- // values of df/dx at the 8 corners (each from 2 stencil values).
- half * (fetch(1, 0, 0) - fetch(-1, 0, 0)),
- half * (fetch(2, 0, 0) - fetch(0, 0, 0)),
- half * (fetch(1, 1, 0) - fetch(-1, 1, 0)),
- half * (fetch(2, 1, 0) - fetch(0, 1, 0)),
- half * (fetch(1, 0, 1) - fetch(-1, 0, 1)),
- half * (fetch(2, 0, 1) - fetch(0, 0, 1)),
- half * (fetch(1, 1, 1) - fetch(-1, 1, 1)),
- half * (fetch(2, 1, 1) - fetch(0, 1, 1)),
- // values of df/dy at the 8 corners (each from 2 stencil values).
- half * (fetch(0, 1, 0) - fetch(0, -1, 0)),
- half * (fetch(1, 1, 0) - fetch(1, -1, 0)),
- half * (fetch(0, 2, 0) - fetch(0, 0, 0)),
- half * (fetch(1, 2, 0) - fetch(1, 0, 0)),
- half * (fetch(0, 1, 1) - fetch(0, -1, 1)),
- half * (fetch(1, 1, 1) - fetch(1, -1, 1)),
- half * (fetch(0, 2, 1) - fetch(0, 0, 1)),
- half * (fetch(1, 2, 1) - fetch(1, 0, 1)),
- // values of df/dz at the 8 corners (each from 2 stencil values).
- half * (fetch(0, 0, 1) - fetch(0, 0, -1)),
- half * (fetch(1, 0, 1) - fetch(1, 0, -1)),
- half * (fetch(0, 1, 1) - fetch(0, 1, -1)),
- half * (fetch(1, 1, 1) - fetch(1, 1, -1)),
- half * (fetch(0, 0, 2) - fetch(0, 0, 0)),
- half * (fetch(1, 0, 2) - fetch(1, 0, 0)),
- half * (fetch(0, 1, 2) - fetch(0, 1, 0)),
- half * (fetch(1, 1, 2) - fetch(1, 1, 0)),
- // values of d2f/dxdy at the 8 corners (each from 4 stencil values).
- quarter * (fetch(1, 1, 0) - fetch(-1, 1, 0) - fetch(1, -1, 0) + fetch(-1, -1, 0)),
- quarter * (fetch(2, 1, 0) - fetch(0, 1, 0) - fetch(2, -1, 0) + fetch(0, -1, 0)),
- quarter * (fetch(1, 2, 0) - fetch(-1, 2, 0) - fetch(1, 0, 0) + fetch(-1, 0, 0)),
- quarter * (fetch(2, 2, 0) - fetch(0, 2, 0) - fetch(2, 0, 0) + fetch(0, 0, 0)),
- quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1)),
- quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1)),
- quarter * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1)),
- quarter * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1)),
- // values of d2f/dxdz at the 8 corners (each from 4 stencil values).
- quarter * (fetch(1, 0, 1) - fetch(-1, 0, 1) - fetch(1, 0, -1) + fetch(-1, 0, -1)),
- quarter * (fetch(2, 0, 1) - fetch(0, 0, 1) - fetch(2, 0, -1) + fetch(0, 0, -1)),
- quarter * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1)),
- quarter * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1)),
- quarter * (fetch(1, 0, 2) - fetch(-1, 0, 2) - fetch(1, 0, 0) + fetch(-1, 0, 0)),
- quarter * (fetch(2, 0, 2) - fetch(0, 0, 2) - fetch(2, 0, 0) + fetch(0, 0, 0)),
- quarter * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0)),
- quarter * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0)),
- // values of d2f/dydz at the 8 corners (each from 4 stencil values).
- quarter * (fetch(0, 1, 1) - fetch(0, -1, 1) - fetch(0, 1, -1) + fetch(0, -1, -1)),
- quarter * (fetch(1, 1, 1) - fetch(1, -1, 1) - fetch(1, 1, -1) + fetch(1, -1, -1)),
- quarter * (fetch(0, 2, 1) - fetch(0, 0, 1) - fetch(0, 2, -1) + fetch(0, 0, -1)),
- quarter * (fetch(1, 2, 1) - fetch(1, 0, 1) - fetch(1, 2, -1) + fetch(1, 0, -1)),
- quarter * (fetch(0, 1, 2) - fetch(0, -1, 2) - fetch(0, 1, 0) + fetch(0, -1, 0)),
- quarter * (fetch(1, 1, 2) - fetch(1, -1, 2) - fetch(1, 1, 0) + fetch(1, -1, 0)),
- quarter * (fetch(0, 2, 2) - fetch(0, 0, 2) - fetch(0, 2, 0) + fetch(0, 0, 0)),
- quarter * (fetch(1, 2, 2) - fetch(1, 0, 2) - fetch(1, 2, 0) + fetch(1, 0, 0)),
- // values of d3f/dxdydz at the 8 corners (each from 8 stencil values).
- eighth * (fetch(1, 1, 1) - fetch(-1, 1, 1) - fetch(1, -1, 1) + fetch(-1, -1, 1) - fetch(1, 1, -1) + fetch(-1, 1, -1) + fetch(1, -1, -1) - fetch(-1, -1, -1)),
- eighth * (fetch(2, 1, 1) - fetch(0, 1, 1) - fetch(2, -1, 1) + fetch(0, -1, 1) - fetch(2, 1, -1) + fetch(0, 1, -1) + fetch(2, -1, -1) - fetch(0, -1, -1)),
- eighth * (fetch(1, 2, 1) - fetch(-1, 2, 1) - fetch(1, 0, 1) + fetch(-1, 0, 1) - fetch(1, 2, -1) + fetch(-1, 2, -1) + fetch(1, 0, -1) - fetch(-1, 0, -1)),
- eighth * (fetch(2, 2, 1) - fetch(0, 2, 1) - fetch(2, 0, 1) + fetch(0, 0, 1) - fetch(2, 2, -1) + fetch(0, 2, -1) + fetch(2, 0, -1) - fetch(0, 0, -1)),
- eighth * (fetch(1, 1, 2) - fetch(-1, 1, 2) - fetch(1, -1, 2) + fetch(-1, -1, 2) - fetch(1, 1, 0) + fetch(-1, 1, 0) + fetch(1, -1, 0) - fetch(-1, -1, 0)),
- eighth * (fetch(2, 1, 2) - fetch(0, 1, 2) - fetch(2, -1, 2) + fetch(0, -1, 2) - fetch(2, 1, 0) + fetch(0, 1, 0) + fetch(2, -1, 0) - fetch(0, -1, 0)),
- eighth * (fetch(1, 2, 2) - fetch(-1, 2, 2) - fetch(1, 0, 2) + fetch(-1, 0, 2) - fetch(1, 2, 0) + fetch(-1, 2, 0) + fetch(1, 0, 0) - fetch(-1, 0, 0)),
- eighth * (fetch(2, 2, 2) - fetch(0, 2, 2) - fetch(2, 0, 2) + fetch(0, 0, 2) - fetch(2, 2, 0) + fetch(0, 2, 0) + fetch(2, 0, 0) - fetch(0, 0, 0))};
-
- // 4Kb of static table (int8_t has a range of -127 -> 127 which suffices)
- static const int8_t A[64][64] = {
- {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {9, -9, -9, 9, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-6, 6, 6, -6, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-6, 6, 6, -6, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {4, -4, -4, 4, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
- {-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {9, -9, 0, 0, -9, 9, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-6, 6, 0, 0, 6, -6, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0},
- {9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0},
- {-27, 27, 27, -27, 27, -27, -27, 27, -18, -9, 18, 9, 18, 9, -18, -9, -18, 18, -9, 9, 18, -18, 9, -9, -18, 18, 18, -18, -9, 9, 9, -9, -12, -6, -6, -3, 12, 6, 6, 3, -12, -6, 12, 6, -6, -3, 6, 3, -12, 12, -6, 6, -6, 6, -3, 3, -8, -4, -4, -2, -4, -2, -2, -1},
- {18, -18, -18, 18, -18, 18, 18, -18, 9, 9, -9, -9, -9, -9, 9, 9, 12, -12, 6, -6, -12, 12, -6, 6, 12, -12, -12, 12, 6, -6, -6, 6, 6, 6, 3, 3, -6, -6, -3, -3, 6, 6, -6, -6, 3, 3, -3, -3, 8, -8, 4, -4, 4, -4, 2, -2, 4, 4, 2, 2, 2, 2, 1, 1},
- {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0},
- {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 9, -9, 9, -9, -9, 9, -9, 9, 12, -12, -12, 12, 6, -6, -6, 6, 6, 3, 6, 3, -6, -3, -6, -3, 8, 4, -8, -4, 4, 2, -4, -2, 6, -6, 6, -6, 3, -3, 3, -3, 4, 2, 4, 2, 2, 1, 2, 1},
- {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -6, 6, -6, 6, 6, -6, 6, -6, -8, 8, 8, -8, -4, 4, 4, -4, -3, -3, -3, -3, 3, 3, 3, 3, -4, -4, 4, 4, -2, -2, 2, 2, -4, 4, -4, 4, -2, 2, -2, 2, -2, -2, -2, -2, -1, -1, -1, -1},
- {2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {-6, 6, 0, 0, 6, -6, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {4, -4, 0, 0, -4, 4, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, 0, 0, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0},
- {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0},
- {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 12, -12, 6, -6, -12, 12, -6, 6, 9, -9, -9, 9, 9, -9, -9, 9, 8, 4, 4, 2, -8, -4, -4, -2, 6, 3, -6, -3, 6, 3, -6, -3, 6, -6, 3, -3, 6, -6, 3, -3, 4, 2, 2, 1, 4, 2, 2, 1},
- {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -8, 8, -4, 4, 8, -8, 4, -4, -6, 6, 6, -6, -6, 6, 6, -6, -4, -4, -2, -2, 4, 4, 2, 2, -3, -3, 3, 3, -3, -3, 3, 3, -4, 4, -2, 2, -4, 4, -2, 2, -2, -2, -1, -1, -2, -2, -1, -1},
- {4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0, 0, 0, 0, 4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0},
- {-12, 12, 12, -12, 12, -12, -12, 12, -8, -4, 8, 4, 8, 4, -8, -4, -6, 6, -6, 6, 6, -6, 6, -6, -6, 6, 6, -6, -6, 6, 6, -6, -4, -2, -4, -2, 4, 2, 4, 2, -4, -2, 4, 2, -4, -2, 4, 2, -3, 3, -3, 3, -3, 3, -3, 3, -2, -1, -2, -1, -2, -1, -2, -1},
- {8, -8, -8, 8, -8, 8, 8, -8, 4, 4, -4, -4, -4, -4, 4, 4, 4, -4, 4, -4, -4, 4, -4, 4, 4, -4, -4, 4, 4, -4, -4, 4, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, -2, 2, -2, 2, -2, 2, -2, 1, 1, 1, 1, 1, 1, 1, 1}};
-
- for (int i = 0; i < 64; ++i) { // C = A * X
- C[i] = ValueT(0);
-#if 0
- for (int j = 0; j < 64; j += 4) {
- C[i] = fma(A[i][j], X[j], fma(A[i][j+1], X[j+1], fma(A[i][j+2], X[j+2], fma(A[i][j+3], X[j+3], C[i]))));
- }
-#else
- for (int j = 0; j < 64; j += 4) {
- C[i] += A[i][j] * X[j] + A[i][j + 1] * X[j + 1] + A[i][j + 2] * X[j + 2] + A[i][j + 3] * X[j + 3];
- }
-#endif
- }
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType TricubicSampler<TreeOrAccT>::sample(const Vec3T<RealT> &xyz, const ValueT (&C)[64])
-{
- ValueT zPow(1), sum(0);
- for (int k = 0, n = 0; k < 4; ++k) {
- ValueT yPow(1);
- for (int j = 0; j < 4; ++j, n += 4) {
-#if 0
- sum = fma( yPow, zPow * fma(xyz[0], fma(xyz[0], fma(xyz[0], C[n + 3], C[n + 2]), C[n + 1]), C[n]), sum);
-#else
- sum += yPow * zPow * (C[n] + xyz[0] * (C[n + 1] + xyz[0] * (C[n + 2] + xyz[0] * C[n + 3])));
-#endif
- yPow *= xyz[1];
- }
- zPow *= xyz[2];
- }
- return sum;
-}
-
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 3, true> : public TricubicSampler<TreeOrAccT>
-{
- using BaseT = TricubicSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
- mutable CoordT mPos;
- mutable ValueT mC[64];
-
- template<typename RealT, template<typename...> class Vec3T>
- __hostdev__ void cache(Vec3T<RealT>& xyz) const;
-
-public:
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc)
- : BaseT(acc)
- {
- }
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- // @brief Return value at the coordinate @a ijk in index space space
- __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
-
-}; // SampleFromVoxels<TreeOrAccT, 3, true>
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
-{
- this->cache(xyz);
- return BaseT::sample(xyz, mC);
-}
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
-{
- CoordT ijk = Floor<CoordT>(xyz);
- if (ijk != mPos) {
- mPos = ijk;
- BaseT::stencil(ijk, mC);
- }
-}
-
-template<typename TreeOrAccT>
-class SampleFromVoxels<TreeOrAccT, 3, false> : public TricubicSampler<TreeOrAccT>
-{
- using BaseT = TricubicSampler<TreeOrAccT>;
- using ValueT = typename TreeOrAccT::ValueType;
- using CoordT = typename TreeOrAccT::CoordType;
-
-public:
- /// @brief Construction from a Tree or ReadAccessor
- __hostdev__ SampleFromVoxels(const TreeOrAccT& acc)
- : BaseT(acc)
- {
- }
-
- /// @note xyz is in index space space
- template<typename RealT, template<typename...> class Vec3T>
- inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
-
- __hostdev__ ValueT operator()(const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
-
-}; // SampleFromVoxels<TreeOrAccT, 3, true>
-
-template<typename TreeOrAccT>
-template<typename RealT, template<typename...> class Vec3T>
-__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, false>::operator()(Vec3T<RealT> xyz) const
-{
- ValueT C[64];
- CoordT ijk = Floor<CoordT>(xyz);
- BaseT::stencil(ijk, C);
- return BaseT::sample(xyz, C);
-}
-
-} // namespace nanovdb
-
-#endif // NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
+// SampleFromVoxels.h
+// Copyright Contributors to the OpenVDB Project
+// SPDX-License-Identifier: MPL-2.0
+
+//////////////////////////////////////////////////////////////////////////
+///
+/// @file SampleFromVoxels.h
+///
+/// @brief NearestNeighborSampler, TrilinearSampler, TriquadraticSampler and TricubicSampler
+///
+/// @note These interpolators employ internal caching for better performance when used repeatedly
+/// in the same voxel location, so try to reuse an instance of these classes more than once.
+///
+/// @warning While all the interpolators defined below work with both scalars and vectors
+/// values (e.g. float and Vec3<float>) TrilinarSampler::zeroCrossing and
+/// Trilinear::gradient will only compile with floating point value types.
+///
+/// @author Ken Museth
+///
+///////////////////////////////////////////////////////////////////////////
+
+#ifndef NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
+#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
+
+// Only define __hostdev__ when compiling as NVIDIA CUDA
+#ifdef __CUDACC__
+#define __hostdev__ __host__ __device__
+#elif defined(__KERNEL_METAL__)
+#else
+#include <cmath> // for floor
+#define __hostdev__
+#endif
+
+namespace nanovdb {
+
+// Forward declaration of sampler with specific polynomial orders
+template<typename TreeT, int Order, bool UseCache = true>
+class SampleFromVoxels;
+
+/// @brief Factory free-function for a sampler of specific polynomial orders
+///
+/// @details This allows for the compact syntax:
+/// @code
+/// auto acc = grid.getAccessor();
+/// auto smp = nanovdb::createSampler<1>( acc );
+/// @endcode
+template<int Order, typename TreeOrAccT, bool UseCache = true>
+__hostdev__ SampleFromVoxels<TreeOrAccT, Order, UseCache> createSampler(__global__ const TreeOrAccT& acc)
+{
+ return SampleFromVoxels<TreeOrAccT, Order, UseCache>(acc);
+}
+
+/// @brief Utility function that returns the Coord of the round-down of @a xyz
+/// and redefined @xyz as the fractional part, ie xyz-in = return-value + xyz-out
+template<typename CoordT, typename RealT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__global__ Vec3T<RealT>& xyz);
+
+/// @brief Template specialization of Floor for Vec3<float>
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__global__ Vec3T<float>& xyz)
+{
+ const float ijk[3] = {floorf(xyz[0]), floorf(xyz[1]), floorf(xyz[2])};
+ xyz[0] -= ijk[0];
+ xyz[1] -= ijk[1];
+ xyz[2] -= ijk[2];
+ return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
+}
+
+/// @brief Template specialization of Floor for Vec3<float>
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__global__ Vec3T<double>& xyz)
+{
+ const double ijk[3] = {floor(xyz[0]), floor(xyz[1]), floor(xyz[2])};
+ xyz[0] -= ijk[0];
+ xyz[1] -= ijk[1];
+ xyz[2] -= ijk[2];
+ return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
+}
+
+#if defined(__KERNEL_METAL__)
+/// @brief Template specialization of Floor for Vec3<float>
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__local__ Vec3T<float>& xyz)
+{
+ const float ijk[3] = {floorf(xyz[0]), floorf(xyz[1]), floorf(xyz[2])};
+ xyz[0] -= ijk[0];
+ xyz[1] -= ijk[1];
+ xyz[2] -= ijk[2];
+ return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
+}
+
+/// @brief Template specialization of Floor for Vec3<float>
+template<typename CoordT, template<typename> class Vec3T>
+__hostdev__ inline CoordT Floor(__local__ Vec3T<double>& xyz)
+{
+ const double ijk[3] = {floor(xyz[0]), floor(xyz[1]), floor(xyz[2])};
+ xyz[0] -= ijk[0];
+ xyz[1] -= ijk[1];
+ xyz[2] -= ijk[2];
+ return CoordT(int32_t(ijk[0]), int32_t(ijk[1]), int32_t(ijk[2]));
+}
+#endif
+
+// ------------------------------> NearestNeighborSampler <--------------------------------------
+
+/// @brief Nearest neighbor, i.e. zero order, interpolator with caching
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 0, true>
+{
+public:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ static __constant__ const int ORDER = 0;
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc)
+ : mAcc(acc)
+ , mPos(CoordT::max())
+ {
+ }
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const { return mAcc; }
+
+ /// @note xyz is in index space space
+ template<typename Vec3T>
+ inline __hostdev__ ValueT operator()(__global__ const Vec3T& xyz) const __local__;
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ inline __hostdev__ ValueT operator()(__local__ const Vec3T& xyz) const __local__;
+#endif
+
+ inline __hostdev__ ValueT operator()(__global__ const CoordT& ijk) const __local__;
+
+ inline __hostdev__ ValueT operator()() const;
+
+private:
+ __global__ const TreeOrAccT& mAcc;
+ mutable CoordT mPos;
+ mutable ValueT mVal; // private cache
+}; // SampleFromVoxels<TreeOrAccT, 0, true>
+
+/// @brief Nearest neighbor, i.e. zero order, interpolator without caching
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 0, false>
+{
+public:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+ static __constant__ const int ORDER = 0;
+
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc)
+ : mAcc(acc)
+ {
+ }
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const __local__ { return mAcc; }
+
+ /// @note xyz is in index space space
+ template<typename Vec3T>
+ inline __hostdev__ ValueT operator()(__global__ const Vec3T& xyz) const __local__;
+#if defined(__KERNEL_METAL__)
+ template<typename Vec3T>
+ inline __hostdev__ ValueT operator()(__local__ const Vec3T& xyz) const __local__;
+#endif
+
+ inline __hostdev__ ValueT operator()(__global__ const CoordT& ijk) const __local__ { return mAcc.getValue(ijk);}
+
+private:
+ __local__ const TreeOrAccT& mAcc;
+}; // SampleFromVoxels<TreeOrAccT, 0, false>
+
+template<typename TreeOrAccT>
+template<typename Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(__global__ const Vec3T& xyz) const __local__
+{
+ const CoordT ijk = Round<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ mVal = mAcc.getValue(mPos);
+ }
+ return mVal;
+}
+#if defined(__KERNEL_METAL__)
+template<typename TreeOrAccT>
+template<typename Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(__local__ const Vec3T& xyz) const __local__
+{
+ const CoordT ijk = Round<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ mVal = mAcc.getValue(mPos);
+ }
+ return mVal;
+}
+#endif
+
+template<typename TreeOrAccT>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(__global__ const CoordT& ijk) const __local__
+{
+ if (ijk != mPos) {
+ mPos = ijk;
+ mVal = mAcc.getValue(mPos);
+ }
+ return mVal;
+}
+
+template<typename TreeOrAccT>
+template<typename Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(__global__ const Vec3T& xyz) const __local__
+{
+ return mAcc.getValue(Round<CoordT>(xyz));
+}
+
+#if defined(__KERNEL_METAL__)
+template<typename TreeOrAccT>
+template<typename Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(__local__ const Vec3T& xyz) const __local__
+{
+ return mAcc.getValue(Round<CoordT>(xyz));
+}
+#endif
+
+// ------------------------------> TrilinearSampler <--------------------------------------
+
+/// @brief Tri-linear sampler, i.e. first order, interpolator
+template<typename TreeOrAccT>
+class TrilinearSampler
+{
+#if defined(__KERNEL_METAL__)
+public:
+#else
+protected:
+#endif
+ __local__ const TreeOrAccT& mAcc;
+
+public:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+ static __constant__ const int ORDER = 1;
+
+ /// @brief Protected constructor from a Tree or ReadAccessor
+ __hostdev__ TrilinearSampler(__local__ const TreeOrAccT& acc) : mAcc(acc) {}
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const { return mAcc; }
+
+ /// @brief Extract the stencil of 8 values
+ inline __hostdev__ void stencil(__global__ CoordT& ijk, __global__ ValueT (&v)[2][2][2]) const;
+
+ template<typename RealT, template<typename...> class Vec3T>
+ static inline __hostdev__ ValueT sample(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&v)[2][2][2]);
+
+ template<typename RealT, template<typename...> class Vec3T>
+ static inline __hostdev__ Vec3T<ValueT> gradient(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&v)[2][2][2]);
+
+ static inline __hostdev__ bool zeroCrossing(__global__ const ValueT (&v)[2][2][2]);
+}; // TrilinearSamplerBase
+
+template<typename TreeOrAccT>
+void TrilinearSampler<TreeOrAccT>::stencil(__global__ CoordT& ijk, __global__ ValueT (&v)[2][2][2]) const
+{
+ v[0][0][0] = mAcc.getValue(ijk); // i, j, k
+
+ ijk[2] += 1;
+ v[0][0][1] = mAcc.getValue(ijk); // i, j, k + 1
+
+ ijk[1] += 1;
+ v[0][1][1] = mAcc.getValue(ijk); // i, j+1, k + 1
+
+ ijk[2] -= 1;
+ v[0][1][0] = mAcc.getValue(ijk); // i, j+1, k
+
+ ijk[0] += 1;
+ ijk[1] -= 1;
+ v[1][0][0] = mAcc.getValue(ijk); // i+1, j, k
+
+ ijk[2] += 1;
+ v[1][0][1] = mAcc.getValue(ijk); // i+1, j, k + 1
+
+ ijk[1] += 1;
+ v[1][1][1] = mAcc.getValue(ijk); // i+1, j+1, k + 1
+
+ ijk[2] -= 1;
+ v[1][1][0] = mAcc.getValue(ijk); // i+1, j+1, k
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&v)[2][2][2])
+{
+#if 0
+ auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
+ //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b
+#else
+ struct Lerp {
+ static ValueT lerp(ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }
+ };
+#endif
+ return Lerp::lerp(Lerp::lerp(Lerp::lerp(v[0][0][0], v[0][0][1], uvw[2]), Lerp::lerp(v[0][1][0], v[0][1][1], uvw[2]), uvw[1]),
+ Lerp::lerp(Lerp::lerp(v[1][0][0], v[1][0][1], uvw[2]), Lerp::lerp(v[1][1][0], v[1][1][1], uvw[2]), uvw[1]),
+ uvw[0]);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&v)[2][2][2])
+{
+ static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::gradient requires a floating-point type");
+#if 0
+ auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
+ //auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b, fma(-w, a, a));};// = (1-w)*a + w*b
+#else
+ struct Lerp {
+ static ValueT lerp(ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }
+ };
+#endif
+
+ ValueT D[4] = {v[0][0][1] - v[0][0][0], v[0][1][1] - v[0][1][0], v[1][0][1] - v[1][0][0], v[1][1][1] - v[1][1][0]};
+
+ // Z component
+ Vec3T<ValueT> grad(0, 0, Lerp::lerp(Lerp::lerp(D[0], D[1], uvw[1]), lerp(D[2], D[3], uvw[1]), uvw[0]));
+
+ const ValueT w = ValueT(uvw[2]);
+ D[0] = v[0][0][0] + D[0] * w;
+ D[1] = v[0][1][0] + D[1] * w;
+ D[2] = v[1][0][0] + D[2] * w;
+ D[3] = v[1][1][0] + D[3] * w;
+
+ // X component
+ grad[0] = Lerp::lerp(D[2], D[3], uvw[1]) - Lerp::lerp(D[0], D[1], uvw[1]);
+
+ // Y component
+ grad[1] = Lerp::lerp(D[1] - D[0], D[3] - D[2], uvw[0]);
+
+ return grad;
+}
+
+template<typename TreeOrAccT>
+bool TrilinearSampler<TreeOrAccT>::zeroCrossing(__global__ const ValueT (&v)[2][2][2])
+{
+ static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
+ const bool less = v[0][0][0] < ValueT(0);
+ return (less ^ (v[0][0][1] < ValueT(0))) ||
+ (less ^ (v[0][1][1] < ValueT(0))) ||
+ (less ^ (v[0][1][0] < ValueT(0))) ||
+ (less ^ (v[1][0][0] < ValueT(0))) ||
+ (less ^ (v[1][0][1] < ValueT(0))) ||
+ (less ^ (v[1][1][1] < ValueT(0))) ||
+ (less ^ (v[1][1][0] < ValueT(0)));
+}
+
+/// @brief Template specialization that does not use caching of stencil points
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 1, false>
+#if !defined(__KERNEL_METAL__)
+ : public TrilinearSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+
+ TrilinearSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+
+#endif
+ using BaseT = TrilinearSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+public:
+
+ /// @brief Construction from a Tree or ReadAccessor
+#if defined(__KERNEL_METAL__)
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : _base(acc) {}
+#else
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : BaseT(acc) {}
+#endif
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ /// @note ijk is in index space space
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
+
+ /// @brief Return the gradient in index space.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ Vec3T<ValueT> gradient(Vec3T<RealT> xyz) const;
+
+ /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
+
+}; // SampleFromVoxels<TreeOrAccT, 1, false>
+
+/// @brief Template specialization with caching of stencil values
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 1, true>
+#if !defined(__KERNEL_METAL__)
+ : public TrilinearSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TrilinearSampler<TreeOrAccT> _base;
+#endif
+ using BaseT = TrilinearSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ mutable CoordT mPos;
+ mutable ValueT mVal[2][2][2];
+
+ template<typename RealT, template<typename...> class Vec3T>
+ __hostdev__ void cache(__global__ Vec3T<RealT>& xyz) const;
+public:
+
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){}
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ // @note ijk is in index space space
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const;
+
+ /// @brief Return the gradient in index space.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ Vec3T<ValueT> gradient(Vec3T<RealT> xyz) const;
+
+ /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
+
+ /// @brief Return true if the cached tri-linear stencil has a zero crossing.
+ ///
+ /// @warning Will only compile with floating point value types
+ __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); }
+
+}; // SampleFromVoxels<TreeOrAccT, 1, true>
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::sample(xyz, mVal);
+}
+
+template<typename TreeOrAccT>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(__global__ const CoordT &ijk) const
+{
+ return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::gradient(xyz, mVal);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, true>::zeroCrossing(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::zeroCrossing(mVal);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+void SampleFromVoxels<TreeOrAccT, 1, true>::cache(__global__ Vec3T<RealT>& xyz) const
+{
+ CoordT ijk = Floor<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ BaseT::stencil(ijk, mVal);
+ }
+}
+
+#if 0
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
+{
+ ValueT val[2][2][2];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, val);
+ return BaseT::sample(xyz, val);
+}
+
+#else
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
+{
+ struct Lerp {
+ static ValueT lerp(ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); }
+ };
+
+ CoordT coord = Floor<CoordT>(xyz);
+
+ ValueT vx, vx1, vy, vy1, vz, vz1;
+
+ vz = BASE(mAcc).getValue(coord);
+ coord[2] += 1;
+ vz1 = BASE(mAcc).getValue(coord);
+ vy = Lerp::lerp(vz, vz1, xyz[2]);
+
+ coord[1] += 1;
+
+ vz1 = BASE(mAcc).getValue(coord);
+ coord[2] -= 1;
+ vz = BASE(mAcc).getValue(coord);
+ vy1 = Lerp::lerp(vz, vz1, xyz[2]);
+
+ vx = Lerp::lerp(vy, vy1, xyz[1]);
+
+ coord[0] += 1;
+
+ vz = BASE(mAcc).getValue(coord);
+ coord[2] += 1;
+ vz1 = BASE(mAcc).getValue(coord);
+ vy1 = Lerp::lerp(vz, vz1, xyz[2]);
+
+ coord[1] -= 1;
+
+ vz1 = BASE(mAcc).getValue(coord);
+ coord[2] -= 1;
+ vz = BASE(mAcc).getValue(coord);
+ vy = Lerp::lerp(vz, vz1, xyz[2]);
+
+ vx1 = Lerp::lerp(vy, vy1, xyz[1]);
+
+ return Lerp::lerp(vx, vx1, xyz[0]);
+}
+#endif
+
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+inline Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
+{
+ ValueT val[2][2][2];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, val);
+ return BaseT::gradient(xyz, val);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
+{
+ ValueT val[2][2][2];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, val);
+ return BaseT::zeroCrossing(val);
+}
+
+// ------------------------------> TriquadraticSampler <--------------------------------------
+
+/// @brief Tri-quadratic sampler, i.e. second order, interpolator
+template<typename TreeOrAccT>
+class TriquadraticSampler
+{
+protected:
+ __local__ const TreeOrAccT& mAcc;
+
+public:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+ static __constant__ const int ORDER = 1;
+
+ /// @brief Protected constructor from a Tree or ReadAccessor
+ __hostdev__ TriquadraticSampler(__local__ const TreeOrAccT& acc) : mAcc(acc) {}
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const { return mAcc; }
+
+ /// @brief Extract the stencil of 27 values
+ inline __hostdev__ void stencil(__local__ const CoordT &ijk, __local__ ValueT (&v)[3][3][3]) const;
+
+ template<typename RealT, template<typename...> class Vec3T>
+ static inline __hostdev__ ValueT sample(__local__ const Vec3T<RealT> &uvw, __local__ const ValueT (&v)[3][3][3]);
+
+ static inline __hostdev__ bool zeroCrossing(__global__ const ValueT (&v)[3][3][3]);
+}; // TriquadraticSamplerBase
+
+template<typename TreeOrAccT>
+void TriquadraticSampler<TreeOrAccT>::stencil(__local__ const CoordT &ijk, __local__ ValueT (&v)[3][3][3]) const
+{
+ CoordT p(ijk[0] - 1, 0, 0);
+ for (int dx = 0; dx < 3; ++dx, ++p[0]) {
+ p[1] = ijk[1] - 1;
+ for (int dy = 0; dy < 3; ++dy, ++p[1]) {
+ p[2] = ijk[2] - 1;
+ for (int dz = 0; dz < 3; ++dz, ++p[2]) {
+ v[dx][dy][dz] = mAcc.getValue(p);// extract the stencil of 27 values
+ }
+ }
+ }
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(__local__ const Vec3T<RealT> &uvw, __local__ const ValueT (&v)[3][3][3])
+{
+ struct Kernel {
+ static ValueT _kernel(__local__ const ValueT* value, double weight) {
+ return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) + 0.5f * (value[2] - value[0])) + value[1];
+ }
+ };
+
+ ValueT vx[3];
+ for (int dx = 0; dx < 3; ++dx) {
+ ValueT vy[3];
+ for (int dy = 0; dy < 3; ++dy) {
+ vy[dy] = Kernel::_kernel(&v[dx][dy][0], uvw[2]);
+ }//loop over y
+ vx[dx] = Kernel::_kernel(vy, uvw[1]);
+ }//loop over x
+ return Kernel::_kernel(vx, uvw[0]);
+}
+
+template<typename TreeOrAccT>
+bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(__global__ const ValueT (&v)[3][3][3])
+{
+ static_assert(is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
+ const bool less = v[0][0][0] < ValueT(0);
+ for (int dx = 0; dx < 3; ++dx) {
+ for (int dy = 0; dy < 3; ++dy) {
+ for (int dz = 0; dz < 3; ++dz) {
+ if (less ^ (v[dx][dy][dz] < ValueT(0))) return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// @brief Template specialization that does not use caching of stencil points
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 2, false>
+#if !defined(__KERNEL_METAL__)
+ : public TriquadraticSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TriquadraticSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
+ using BaseT = TriquadraticSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+public:
+
+ /// @brief Construction from a Tree or ReadAccessor
+#if defined(__KERNEL_METAL__)
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : _base(acc) {}
+#else
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : BaseT(acc) {}
+#endif
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
+
+ /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
+
+}; // SampleFromVoxels<TreeOrAccT, 2, false>
+
+/// @brief Template specialization with caching of stencil values
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 2, true>
+#if !defined(__KERNEL_METAL__)
+ : public TriquadraticSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TriquadraticSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
+ using BaseT = TriquadraticSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ mutable CoordT mPos;
+ mutable ValueT mVal[3][3][3];
+
+ template<typename RealT, template<typename...> class Vec3T>
+ __hostdev__ void cache(__global__ Vec3T<RealT>& xyz) const;
+public:
+
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc) : BaseT(acc), mPos(CoordT::max()){}
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ inline __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const;
+
+ /// @brief Return true if the tr-linear stencil has a zero crossing at the specified index position.
+ ///
+ /// @warning Will only compile with floating point value types
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ bool zeroCrossing(Vec3T<RealT> xyz) const;
+
+ /// @brief Return true if the cached tri-linear stencil has a zero crossing.
+ ///
+ /// @warning Will only compile with floating point value types
+ __hostdev__ bool zeroCrossing() const { return BaseT::zeroCrossing(mVal); }
+
+}; // SampleFromVoxels<TreeOrAccT, 2, true>
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::sample(xyz, mVal);
+}
+
+template<typename TreeOrAccT>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(__global__ const CoordT &ijk) const
+{
+ return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, true>::zeroCrossing(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::zeroCrossing(mVal);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+void SampleFromVoxels<TreeOrAccT, 2, true>::cache(__global__ Vec3T<RealT>& xyz) const
+{
+ CoordT ijk = Floor<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ BaseT::stencil(ijk, mVal);
+ }
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
+{
+ ValueT val[3][3][3];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BASE(stencil)(ijk, val);
+ return BaseT::sample(xyz, val);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
+{
+ ValueT val[3][3][3];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, val);
+ return BaseT::zeroCrossing(val);
+}
+
+// ------------------------------> TricubicSampler <--------------------------------------
+
+/// @brief Tri-cubic sampler, i.e. third order, interpolator.
+///
+/// @details See the following paper for implementation details:
+/// Lekien, F. and Marsden, J.: Tricubic interpolation in three dimensions.
+/// In: International Journal for Numerical Methods
+/// in Engineering (2005), No. 63, p. 455-471
+
+template<typename TreeOrAccT>
+class TricubicSampler
+{
+protected:
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ __global__ const TreeOrAccT& mAcc;
+
+public:
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ TricubicSampler(__global__ const TreeOrAccT& acc)
+ : mAcc(acc)
+ {
+ }
+
+ __hostdev__ __global__ const TreeOrAccT& accessor() const { return mAcc; }
+
+ /// @brief Extract the stencil of 8 values
+ inline __hostdev__ void stencil(__global__ const CoordT& ijk, __global__ ValueT (&c)[64]) const;
+
+ template<typename RealT, template<typename...> class Vec3T>
+ static inline __hostdev__ ValueT sample(__global__ const Vec3T<RealT> &uvw, __global__ const ValueT (&c)[64]);
+}; // TricubicSampler
+
+// 4Kb of static table (int8_t has a range of -127 -> 127 which suffices)
+static __constant__ const int8_t TricubicSampler_A[64][64] = {
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {9, -9, -9, 9, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-6, 6, 6, -6, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-6, 6, 6, -6, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {4, -4, -4, 4, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, -6, -3, 0, 0, 0, 0, 6, -6, 3, -3, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 3, 3, 0, 0, 0, 0, -4, 4, -2, 2, 0, 0, 0, 0, -2, -2, -1, -1, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 4, 2, 0, 0, 0, 0, -3, 3, -3, 3, 0, 0, 0, 0, -2, -1, -2, -1, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, -2, -2, 0, 0, 0, 0, 2, -2, 2, -2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
+ {-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {9, -9, 0, 0, -9, 9, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-6, 6, 0, 0, 6, -6, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 0, -1, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0, -9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0, -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, -6, 0, 0, 3, -3, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 4, 0, 0, -2, 2, 0, 0, -2, -2, 0, 0, -1, -1, 0, 0},
+ {9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 9, 0, -9, 0, -9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, -6, 0, -3, 0, 6, 0, -6, 0, 3, 0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0},
+ {-27, 27, 27, -27, 27, -27, -27, 27, -18, -9, 18, 9, 18, 9, -18, -9, -18, 18, -9, 9, 18, -18, 9, -9, -18, 18, 18, -18, -9, 9, 9, -9, -12, -6, -6, -3, 12, 6, 6, 3, -12, -6, 12, 6, -6, -3, 6, 3, -12, 12, -6, 6, -6, 6, -3, 3, -8, -4, -4, -2, -4, -2, -2, -1},
+ {18, -18, -18, 18, -18, 18, 18, -18, 9, 9, -9, -9, -9, -9, 9, 9, 12, -12, 6, -6, -12, 12, -6, 6, 12, -12, -12, 12, 6, -6, -6, 6, 6, 6, 3, 3, -6, -6, -3, -3, 6, 6, -6, -6, 3, 3, -3, -3, 8, -8, 4, -4, 4, -4, 2, -2, 4, 4, 2, 2, 2, 2, 1, 1},
+ {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, -3, 0, 3, 0, 3, 0, -4, 0, 4, 0, -2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -1, 0, -1, 0},
+ {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 9, -9, 9, -9, -9, 9, -9, 9, 12, -12, -12, 12, 6, -6, -6, 6, 6, 3, 6, 3, -6, -3, -6, -3, 8, 4, -8, -4, 4, 2, -4, -2, 6, -6, 6, -6, 3, -3, 3, -3, 4, 2, 4, 2, 2, 1, 2, 1},
+ {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -6, 6, -6, 6, 6, -6, 6, -6, -8, 8, 8, -8, -4, 4, 4, -4, -3, -3, -3, -3, 3, 3, 3, 3, -4, -4, 4, 4, -2, -2, 2, 2, -4, 4, -4, 4, -2, 2, -2, 2, -2, -2, -2, -2, -1, -1, -1, -1},
+ {2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {-6, 6, 0, 0, 6, -6, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {4, -4, 0, 0, -4, 4, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -6, 6, 0, 0, 6, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -3, 3, 0, 0, -2, -1, 0, 0, -2, -1, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4, 0, 0, -4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, -2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 2, -2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0},
+ {-6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 6, 0, 6, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, 0, -2, 0, 4, 0, 2, 0, -3, 0, 3, 0, -3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -1, 0, -2, 0, -1, 0},
+ {18, -18, -18, 18, -18, 18, 18, -18, 12, 6, -12, -6, -12, -6, 12, 6, 12, -12, 6, -6, -12, 12, -6, 6, 9, -9, -9, 9, 9, -9, -9, 9, 8, 4, 4, 2, -8, -4, -4, -2, 6, 3, -6, -3, 6, 3, -6, -3, 6, -6, 3, -3, 6, -6, 3, -3, 4, 2, 2, 1, 4, 2, 2, 1},
+ {-12, 12, 12, -12, 12, -12, -12, 12, -6, -6, 6, 6, 6, 6, -6, -6, -8, 8, -4, 4, 8, -8, 4, -4, -6, 6, 6, -6, -6, 6, 6, -6, -4, -4, -2, -2, 4, 4, 2, 2, -3, -3, 3, 3, -3, -3, 3, 3, -4, 4, -2, 2, -4, 4, -2, 2, -2, -2, -1, -1, -2, -2, -1, -1},
+ {4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 4, 0, -4, 0, -4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, -2, 0, -2, 0, 2, 0, -2, 0, 2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+ {-12, 12, 12, -12, 12, -12, -12, 12, -8, -4, 8, 4, 8, 4, -8, -4, -6, 6, -6, 6, 6, -6, 6, -6, -6, 6, 6, -6, -6, 6, 6, -6, -4, -2, -4, -2, 4, 2, 4, 2, -4, -2, 4, 2, -4, -2, 4, 2, -3, 3, -3, 3, -3, 3, -3, 3, -2, -1, -2, -1, -2, -1, -2, -1},
+ {8, -8, -8, 8, -8, 8, 8, -8, 4, 4, -4, -4, -4, -4, 4, 4, 4, -4, 4, -4, -4, 4, -4, 4, 4, -4, -4, 4, 4, -4, -4, 4, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, -2, 2, -2, 2, -2, 2, -2, 1, 1, 1, 1, 1, 1, 1, 1}};
+
+template<typename TreeOrAccT>
+void TricubicSampler<TreeOrAccT>::stencil(__global__ const CoordT& ijk, __global__ ValueT (&C)[64]) const
+{
+ struct Fetch {
+ Fetch(__global__ ValueT (&_C)[64]):C(_C) {}
+ __global__ ValueT& fetch(int i, int j, int k) { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; }
+
+ __global__ ValueT (&C)[64];
+ };
+ Fetch f(C);
+
+ // fetch 64 point stencil values
+ for (int i = -1; i < 3; ++i) {
+ for (int j = -1; j < 3; ++j) {
+ Fetch::fetch(i, j, -1) = mAcc.getValue(ijk + CoordT(i, j, -1));
+ Fetch::fetch(i, j, 0) = mAcc.getValue(ijk + CoordT(i, j, 0));
+ Fetch::fetch(i, j, 1) = mAcc.getValue(ijk + CoordT(i, j, 1));
+ Fetch::fetch(i, j, 2) = mAcc.getValue(ijk + CoordT(i, j, 2));
+ }
+ }
+ const ValueT _half(0.5), quarter(0.25), eighth(0.125);
+ const ValueT X[64] = {// values of f(x,y,z) at the 8 corners (each from 1 stencil value).
+ f.fetch(0, 0, 0),
+ f.fetch(1, 0, 0),
+ f.fetch(0, 1, 0),
+ f.fetch(1, 1, 0),
+ f.fetch(0, 0, 1),
+ f.fetch(1, 0, 1),
+ f.fetch(0, 1, 1),
+ f.fetch(1, 1, 1),
+ // values of df/dx at the 8 corners (each from 2 stencil values).
+ _half * (f.fetch(1, 0, 0) - f.fetch(-1, 0, 0)),
+ _half * (f.fetch(2, 0, 0) - f.fetch(0, 0, 0)),
+ _half * (f.fetch(1, 1, 0) - f.fetch(-1, 1, 0)),
+ _half * (f.fetch(2, 1, 0) - f.fetch(0, 1, 0)),
+ _half * (f.fetch(1, 0, 1) - f.fetch(-1, 0, 1)),
+ _half * (f.fetch(2, 0, 1) - f.fetch(0, 0, 1)),
+ _half * (f.fetch(1, 1, 1) - f.fetch(-1, 1, 1)),
+ _half * (f.fetch(2, 1, 1) - f.fetch(0, 1, 1)),
+ // values of df/dy at the 8 corners (each from 2 stencil values).
+ _half * (f.fetch(0, 1, 0) - f.fetch(0, -1, 0)),
+ _half * (f.fetch(1, 1, 0) - f.fetch(1, -1, 0)),
+ _half * (f.fetch(0, 2, 0) - f.fetch(0, 0, 0)),
+ _half * (f.fetch(1, 2, 0) - f.fetch(1, 0, 0)),
+ _half * (f.fetch(0, 1, 1) - f.fetch(0, -1, 1)),
+ _half * (f.fetch(1, 1, 1) - f.fetch(1, -1, 1)),
+ _half * (f.fetch(0, 2, 1) - f.fetch(0, 0, 1)),
+ _half * (f.fetch(1, 2, 1) - f.fetch(1, 0, 1)),
+ // values of df/dz at the 8 corners (each from 2 stencil values).
+ _half * (f.fetch(0, 0, 1) - f.fetch(0, 0, -1)),
+ _half * (f.fetch(1, 0, 1) - f.fetch(1, 0, -1)),
+ _half * (f.fetch(0, 1, 1) - f.fetch(0, 1, -1)),
+ _half * (f.fetch(1, 1, 1) - f.fetch(1, 1, -1)),
+ _half * (f.fetch(0, 0, 2) - f.fetch(0, 0, 0)),
+ _half * (f.fetch(1, 0, 2) - f.fetch(1, 0, 0)),
+ _half * (f.fetch(0, 1, 2) - f.fetch(0, 1, 0)),
+ _half * (f.fetch(1, 1, 2) - f.fetch(1, 1, 0)),
+ // values of d2f/dxdy at the 8 corners (each from 4 stencil values).
+ quarter * (f.fetch(1, 1, 0) - f.fetch(-1, 1, 0) - f.fetch(1, -1, 0) + f.fetch(-1, -1, 0)),
+ quarter * (f.fetch(2, 1, 0) - f.fetch(0, 1, 0) - f.fetch(2, -1, 0) + f.fetch(0, -1, 0)),
+ quarter * (f.fetch(1, 2, 0) - f.fetch(-1, 2, 0) - f.fetch(1, 0, 0) + f.fetch(-1, 0, 0)),
+ quarter * (f.fetch(2, 2, 0) - f.fetch(0, 2, 0) - f.fetch(2, 0, 0) + f.fetch(0, 0, 0)),
+ quarter * (f.fetch(1, 1, 1) - f.fetch(-1, 1, 1) - f.fetch(1, -1, 1) + f.fetch(-1, -1, 1)),
+ quarter * (f.fetch(2, 1, 1) - f.fetch(0, 1, 1) - f.fetch(2, -1, 1) + f.fetch(0, -1, 1)),
+ quarter * (f.fetch(1, 2, 1) - f.fetch(-1, 2, 1) - f.fetch(1, 0, 1) + f.fetch(-1, 0, 1)),
+ quarter * (f.fetch(2, 2, 1) - f.fetch(0, 2, 1) - f.fetch(2, 0, 1) + f.fetch(0, 0, 1)),
+ // values of d2f/dxdz at the 8 corners (each from 4 stencil values).
+ quarter * (f.fetch(1, 0, 1) - f.fetch(-1, 0, 1) - f.fetch(1, 0, -1) + f.fetch(-1, 0, -1)),
+ quarter * (f.fetch(2, 0, 1) - f.fetch(0, 0, 1) - f.fetch(2, 0, -1) + f.fetch(0, 0, -1)),
+ quarter * (f.fetch(1, 1, 1) - f.fetch(-1, 1, 1) - f.fetch(1, 1, -1) + f.fetch(-1, 1, -1)),
+ quarter * (f.fetch(2, 1, 1) - f.fetch(0, 1, 1) - f.fetch(2, 1, -1) + f.fetch(0, 1, -1)),
+ quarter * (f.fetch(1, 0, 2) - f.fetch(-1, 0, 2) - f.fetch(1, 0, 0) + f.fetch(-1, 0, 0)),
+ quarter * (f.fetch(2, 0, 2) - f.fetch(0, 0, 2) - f.fetch(2, 0, 0) + f.fetch(0, 0, 0)),
+ quarter * (f.fetch(1, 1, 2) - f.fetch(-1, 1, 2) - f.fetch(1, 1, 0) + f.fetch(-1, 1, 0)),
+ quarter * (f.fetch(2, 1, 2) - f.fetch(0, 1, 2) - f.fetch(2, 1, 0) + f.fetch(0, 1, 0)),
+ // values of d2f/dydz at the 8 corners (each from 4 stencil values).
+ quarter * (f.fetch(0, 1, 1) - f.fetch(0, -1, 1) - f.fetch(0, 1, -1) + f.fetch(0, -1, -1)),
+ quarter * (f.fetch(1, 1, 1) - f.fetch(1, -1, 1) - f.fetch(1, 1, -1) + f.fetch(1, -1, -1)),
+ quarter * (f.fetch(0, 2, 1) - f.fetch(0, 0, 1) - f.fetch(0, 2, -1) + f.fetch(0, 0, -1)),
+ quarter * (f.fetch(1, 2, 1) - f.fetch(1, 0, 1) - f.fetch(1, 2, -1) + f.fetch(1, 0, -1)),
+ quarter * (f.fetch(0, 1, 2) - f.fetch(0, -1, 2) - f.fetch(0, 1, 0) + f.fetch(0, -1, 0)),
+ quarter * (f.fetch(1, 1, 2) - f.fetch(1, -1, 2) - f.fetch(1, 1, 0) + f.fetch(1, -1, 0)),
+ quarter * (f.fetch(0, 2, 2) - f.fetch(0, 0, 2) - f.fetch(0, 2, 0) + f.fetch(0, 0, 0)),
+ quarter * (f.fetch(1, 2, 2) - f.fetch(1, 0, 2) - f.fetch(1, 2, 0) + f.fetch(1, 0, 0)),
+ // values of d3f/dxdydz at the 8 corners (each from 8 stencil values).
+ eighth * (f.fetch(1, 1, 1) - f.fetch(-1, 1, 1) - f.fetch(1, -1, 1) + f.fetch(-1, -1, 1) - f.fetch(1, 1, -1) + f.fetch(-1, 1, -1) + f.fetch(1, -1, -1) - f.fetch(-1, -1, -1)),
+ eighth * (f.fetch(2, 1, 1) - f.fetch(0, 1, 1) - f.fetch(2, -1, 1) + f.fetch(0, -1, 1) - f.fetch(2, 1, -1) + f.fetch(0, 1, -1) + f.fetch(2, -1, -1) - f.fetch(0, -1, -1)),
+ eighth * (f.fetch(1, 2, 1) - f.fetch(-1, 2, 1) - f.fetch(1, 0, 1) + f.fetch(-1, 0, 1) - f.fetch(1, 2, -1) + f.fetch(-1, 2, -1) + f.fetch(1, 0, -1) - f.fetch(-1, 0, -1)),
+ eighth * (f.fetch(2, 2, 1) - f.fetch(0, 2, 1) - f.fetch(2, 0, 1) + f.fetch(0, 0, 1) - f.fetch(2, 2, -1) + f.fetch(0, 2, -1) + f.fetch(2, 0, -1) - f.fetch(0, 0, -1)),
+ eighth * (f.fetch(1, 1, 2) - f.fetch(-1, 1, 2) - f.fetch(1, -1, 2) + f.fetch(-1, -1, 2) - f.fetch(1, 1, 0) + f.fetch(-1, 1, 0) + f.fetch(1, -1, 0) - f.fetch(-1, -1, 0)),
+ eighth * (f.fetch(2, 1, 2) - f.fetch(0, 1, 2) - f.fetch(2, -1, 2) + f.fetch(0, -1, 2) - f.fetch(2, 1, 0) + f.fetch(0, 1, 0) + f.fetch(2, -1, 0) - f.fetch(0, -1, 0)),
+ eighth * (f.fetch(1, 2, 2) - f.fetch(-1, 2, 2) - f.fetch(1, 0, 2) + f.fetch(-1, 0, 2) - f.fetch(1, 2, 0) + f.fetch(-1, 2, 0) + f.fetch(1, 0, 0) - f.fetch(-1, 0, 0)),
+ eighth * (f.fetch(2, 2, 2) - f.fetch(0, 2, 2) - f.fetch(2, 0, 2) + f.fetch(0, 0, 2) - f.fetch(2, 2, 0) + f.fetch(0, 2, 0) + f.fetch(2, 0, 0) - f.fetch(0, 0, 0))};
+
+ for (int i = 0; i < 64; ++i) { // C = A * X
+ C[i] = ValueT(0);
+#if 0
+ for (int j = 0; j < 64; j += 4) {
+ C[i] = fma(A[i][j], X[j], fma(A[i][j+1], X[j+1], fma(A[i][j+2], X[j+2], fma(A[i][j+3], X[j+3], C[i]))));
+ }
+#else
+ for (int j = 0; j < 64; j += 4) {
+ C[i] += TricubicSampler_A[i][j] * X[j] + TricubicSampler_A[i][j + 1] * X[j + 1] +
+ TricubicSampler_A[i][j + 2] * X[j + 2] + TricubicSampler_A[i][j + 3] * X[j + 3];
+ }
+#endif
+ }
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+__hostdev__ typename TreeOrAccT::ValueType TricubicSampler<TreeOrAccT>::sample(__global__ const Vec3T<RealT> &xyz, __global__ const ValueT (&C)[64])
+{
+ ValueT zPow(1), sum(0);
+ for (int k = 0, n = 0; k < 4; ++k) {
+ ValueT yPow(1);
+ for (int j = 0; j < 4; ++j, n += 4) {
+#if 0
+ sum = fma( yPow, zPow * fma(xyz[0], fma(xyz[0], fma(xyz[0], C[n + 3], C[n + 2]), C[n + 1]), C[n]), sum);
+#else
+ sum += yPow * zPow * (C[n] + xyz[0] * (C[n + 1] + xyz[0] * (C[n + 2] + xyz[0] * C[n + 3])));
+#endif
+ yPow *= xyz[1];
+ }
+ zPow *= xyz[2];
+ }
+ return sum;
+}
+
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 3, true>
+#if !defined(__KERNEL_METAL__)
+ : public TricubicSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TricubicSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
+ using BaseT = TricubicSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+ mutable CoordT mPos;
+ mutable ValueT mC[64];
+
+ template<typename RealT, template<typename...> class Vec3T>
+ __hostdev__ void cache(__global__ Vec3T<RealT>& xyz) const;
+
+public:
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc)
+ : BaseT(acc)
+ {
+ }
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ // @brief Return value at the coordinate @a ijk in index space space
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
+
+}; // SampleFromVoxels<TreeOrAccT, 3, true>
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
+{
+ this->cache(xyz);
+ return BaseT::sample(xyz, mC);
+}
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+void SampleFromVoxels<TreeOrAccT, 3, true>::cache(__global__ Vec3T<RealT>& xyz) const
+{
+ CoordT ijk = Floor<CoordT>(xyz);
+ if (ijk != mPos) {
+ mPos = ijk;
+ BaseT::stencil(ijk, mC);
+ }
+}
+
+template<typename TreeOrAccT>
+class SampleFromVoxels<TreeOrAccT, 3, false>
+#if !defined(__KERNEL_METAL__)
+ : public TricubicSampler<TreeOrAccT>
+#endif
+{
+#if defined(__KERNEL_METAL__)
+ TricubicSampler<TreeOrAccT> _base;
+#define BASE(v) _base.v
+#else
+#define BASE(v) BaseT::v
+#endif
+ using BaseT = TricubicSampler<TreeOrAccT>;
+ using ValueT = typename TreeOrAccT::ValueType;
+ using CoordT = typename TreeOrAccT::CoordType;
+
+public:
+ /// @brief Construction from a Tree or ReadAccessor
+ __hostdev__ SampleFromVoxels(__local__ const TreeOrAccT& acc)
+ : BaseT(acc)
+ {
+ }
+
+ /// @note xyz is in index space space
+ template<typename RealT, template<typename...> class Vec3T>
+ inline __hostdev__ ValueT operator()(Vec3T<RealT> xyz) const;
+
+ __hostdev__ ValueT operator()(__global__ const CoordT &ijk) const {return BaseT::mAcc.getValue(ijk);}
+
+}; // SampleFromVoxels<TreeOrAccT, 3, true>
+
+template<typename TreeOrAccT>
+template<typename RealT, template<typename...> class Vec3T>
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, false>::operator()(Vec3T<RealT> xyz) const
+{
+ ValueT C[64];
+ CoordT ijk = Floor<CoordT>(xyz);
+ BaseT::stencil(ijk, C);
+ return BaseT::sample(xyz, C);
+}
+
+} // namespace nanovdb
+
+#endif // NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED