Merge branch 'bf-blender' into main

This commit is contained in:
Jaume Bellet 2023-12-16 11:10:41 +01:00
commit 5104519898
815 changed files with 81706 additions and 47635 deletions

View File

@ -760,8 +760,8 @@ endif()
# Unit testing
option(WITH_GTESTS "Enable GTest unit testing" OFF)
option(WITH_OPENGL_RENDER_TESTS "Enable OpenGL render related unit testing (Experimental)" OFF)
option(WITH_OPENGL_DRAW_TESTS "Enable OpenGL UI drawing related unit testing (Experimental)" OFF)
option(WITH_GPU_RENDER_TESTS "Enable GPU render related unit testing (EEVEE, Workbench and Grease Pencil)" OFF)
option(WITH_GPU_DRAW_TESTS "Enable GPU drawing related unit testing (GPU backends and draw manager)" OFF)
option(WITH_COMPOSITOR_REALTIME_TESTS "Enable regression testing for realtime compositor" OFF)
if(UNIX AND NOT (APPLE OR HAIKU))
option(WITH_UI_TESTS "\

View File

@ -17,6 +17,7 @@ ExternalProject_Add(external_ocloc
PREFIX ${BUILD_DIR}/ocloc
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/ocloc ${DEFAULT_CMAKE_FLAGS} ${OCLOC_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/ocloc
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/ocloc/src/external_ocloc/ < ${PATCH_DIR}/ocloc.diff
)
add_dependencies(

View File

@ -14,6 +14,7 @@ if(NOT APPLE)
${OIDN_EXTRA_ARGS}
-DOIDN_DEVICE_SYCL=ON
-DOIDN_DEVICE_SYCL_AOT=OFF
-DOIDN_DEVICE_HIP=ON
-DLEVEL_ZERO_ROOT=${LIBDIR}/level-zero/lib
)
endif()

View File

@ -47,12 +47,6 @@ set(OPENVDB_EXTRA_ARGS
)
set(OPENVDB_PATCH ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff)
if(APPLE)
set(OPENVDB_PATCH
${OPENVDB_PATCH} &&
${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb_metal.diff
)
endif()
ExternalProject_Add(openvdb
URL file://${PACKAGE_DIR}/${OPENVDB_FILE}

View File

@ -0,0 +1,14 @@
diff --git a/shared/offline_compiler/source/ocloc_fatbinary.cpp b/shared/offline_compiler/source/ocloc_fatbinary.cpp
index 98a1c0e..4d9b5b0 100644
--- a/shared/offline_compiler/source/ocloc_fatbinary.cpp
+++ b/shared/offline_compiler/source/ocloc_fatbinary.cpp
@@ -286,7 +286,9 @@ int buildFatBinaryForTarget(int retVal, const std::vector<std::string> &argsCopy
productConfig = ProductConfigHelper::parseMajorMinorRevisionValue(argHelper->productConfigHelper->getProductConfigFromDeviceName(product));
}
- fatbinary.appendFileEntry(pointerSize + "." + productConfig, pCompiler->getPackedDeviceBinaryOutput());
+ // Storing binaries under the hardware prefix instead of the full architecture version number,
+ // as they would otherwise be ignored if they do not fully match that of the execution device.
+ fatbinary.appendFileEntry(pointerSize + "." + NEO::hardwarePrefix[argHelper->productConfigHelper->getProductFamilyFromDeviceName(productConfig)], pCompiler->getPackedDeviceBinaryOutput());
return retVal;
}

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,7 @@ buildbot:
optix:
version: '7.3.0'
ocloc:
version: '101.4723'
version: '101.4723p0'
cmake:
default:
version: any

View File

@ -17,8 +17,8 @@ if NOT "%1" == "" (
shift /1
) else if "%1" == "with_tests" (
set TESTS_CMAKE_ARGS=%TESTS_CMAKE_ARGS% -DWITH_GTESTS=On
) else if "%1" == "with_opengl_tests" (
set TESTS_CMAKE_ARGS=%TESTS_CMAKE_ARGS% -DWITH_OPENGL_DRAW_TESTS=On -DWITH_OPENGL_RENDER_TESTS=On
) else if "%1" == "with_gpu_tests" (
set TESTS_CMAKE_ARGS=%TESTS_CMAKE_ARGS% -DWITH_GPU_DRAW_TESTS=On -DWITH_GPU_RENDER_TESTS=On
) else if "%1" == "full" (
set TARGET=Full
set BUILD_CMAKE_ARGS=%BUILD_CMAKE_ARGS% ^

View File

@ -39,7 +39,7 @@ echo - doc_py ^(Generate sphinx python api docs^)
echo.
echo Experimental options
echo - with_opengl_tests ^(enable both the render and draw opengl test suites^)
echo - with_gpu_tests ^(enable both the render and draw gpu test suites including EEVEE, Workbench, Grease Pencil, draw manager and GPU backends^)
echo - clang ^(enable building with clang^)
echo - asan ^(enable asan when building with clang^)
echo - ninja ^(enable building with ninja instead of msbuild^)

View File

@ -7,4 +7,5 @@ GPL-2.0-or-later GPL-license.txt https://spdx.org/licenses/GP
GPL-3.0-or-later GPL3-license.txt https://spdx.org/licenses/GPL-3.0-or-later.html
LGPL-2.1-or-later LGPL2.1-license.txt https://spdx.org/licenses/LGPL-2.1-or-later.html
MIT MIT-license.txt https://spdx.org/licenses/MIT.html
MPL-2.0 MPL-2.0.txt https://spdx.org/licenses/MPL-2.0.html
Zlib Zlib-license.txt https://spdx.org/licenses/Zlib.html

View File

@ -191,13 +191,6 @@ if(CXX_HAS_AVX2)
add_definitions(-DWITH_KERNEL_AVX2)
endif()
# LLVM and OSL need to build without RTTI
if(WIN32 AND MSVC)
set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
endif()
# Definitions and Includes
add_definitions(

View File

@ -2272,6 +2272,7 @@ class CYCLES_RENDER_PT_simplify_viewport(CyclesButtonsPanel, Panel):
col.prop(rd, "simplify_child_particles", text="Child Particles")
col.prop(cscene, "texture_limit", text="Texture Limit")
col.prop(rd, "simplify_volumes", text="Volume Resolution")
col.prop(rd, "use_simplify_normals", text="Normals")
class CYCLES_RENDER_PT_simplify_render(CyclesButtonsPanel, Panel):

View File

@ -326,13 +326,13 @@ static void attr_create_generic(Scene *scene,
}
else {
for (const int i : looptris.index_range()) {
const MLoopTri &tri = looptris[i];
const MLoopTri &lt = looptris[i];
data[i * 3 + 0] = make_uchar4(
src[tri.tri[0]][0], src[tri.tri[0]][1], src[tri.tri[0]][2], src[tri.tri[0]][3]);
src[lt.tri[0]][0], src[lt.tri[0]][1], src[lt.tri[0]][2], src[lt.tri[0]][3]);
data[i * 3 + 1] = make_uchar4(
src[tri.tri[1]][0], src[tri.tri[1]][1], src[tri.tri[1]][2], src[tri.tri[1]][3]);
src[lt.tri[1]][0], src[lt.tri[1]][1], src[lt.tri[1]][2], src[lt.tri[1]][3]);
data[i * 3 + 2] = make_uchar4(
src[tri.tri[2]][0], src[tri.tri[2]][1], src[tri.tri[2]][2], src[tri.tri[2]][3]);
src[lt.tri[2]][0], src[lt.tri[2]][1], src[lt.tri[2]][2], src[lt.tri[2]][3]);
}
}
return true;
@ -376,10 +376,10 @@ static void attr_create_generic(Scene *scene,
}
else {
for (const int i : looptris.index_range()) {
const MLoopTri &tri = looptris[i];
data[i * 3 + 0] = Converter::convert(src[tri.tri[0]]);
data[i * 3 + 1] = Converter::convert(src[tri.tri[1]]);
data[i * 3 + 2] = Converter::convert(src[tri.tri[2]]);
const MLoopTri &lt = looptris[i];
data[i * 3 + 0] = Converter::convert(src[lt.tri[0]]);
data[i * 3 + 1] = Converter::convert(src[lt.tri[1]]);
data[i * 3 + 2] = Converter::convert(src[lt.tri[2]]);
}
}
break;
@ -469,10 +469,10 @@ static void attr_create_uv_map(Scene *scene,
uv_name.c_str(), ATTR_DOMAIN_CORNER);
float2 *fdata = uv_attr->data_float2();
for (const int i : looptris.index_range()) {
const MLoopTri &tri = looptris[i];
fdata[i * 3 + 0] = make_float2(b_uv_map[tri.tri[0]][0], b_uv_map[tri.tri[0]][1]);
fdata[i * 3 + 1] = make_float2(b_uv_map[tri.tri[1]][0], b_uv_map[tri.tri[1]][1]);
fdata[i * 3 + 2] = make_float2(b_uv_map[tri.tri[2]][0], b_uv_map[tri.tri[2]][1]);
const MLoopTri &lt = looptris[i];
fdata[i * 3 + 0] = make_float2(b_uv_map[lt.tri[0]][0], b_uv_map[lt.tri[0]][1]);
fdata[i * 3 + 1] = make_float2(b_uv_map[lt.tri[1]][0], b_uv_map[lt.tri[1]][1]);
fdata[i * 3 + 2] = make_float2(b_uv_map[lt.tri[2]][0], b_uv_map[lt.tri[2]][1]);
}
}
@ -915,10 +915,10 @@ static void create_mesh(Scene *scene,
const blender::Span<MLoopTri> looptris = b_mesh.looptris();
for (const int i : looptris.index_range()) {
const MLoopTri &tri = looptris[i];
triangles[i * 3 + 0] = corner_verts[tri.tri[0]];
triangles[i * 3 + 1] = corner_verts[tri.tri[1]];
triangles[i * 3 + 2] = corner_verts[tri.tri[2]];
const MLoopTri &lt = looptris[i];
triangles[i * 3 + 0] = corner_verts[lt.tri[0]];
triangles[i * 3 + 1] = corner_verts[lt.tri[1]];
triangles[i * 3 + 2] = corner_verts[lt.tri[2]];
}
if (!material_indices.is_empty()) {
@ -944,9 +944,9 @@ static void create_mesh(Scene *scene,
if (use_loop_normals && !corner_normals.is_empty()) {
for (const int i : looptris.index_range()) {
const MLoopTri &tri = looptris[i];
const MLoopTri &lt = looptris[i];
for (int i = 0; i < 3; i++) {
const int corner = tri.tri[i];
const int corner = lt.tri[i];
const int vert = corner_verts[corner];
const float *normal = corner_normals[corner];
N[vert] = make_float3(normal[0], normal[1], normal[2]);

View File

@ -313,7 +313,7 @@ void CPUDevice::get_cpu_kernel_thread_globals(
kernel_thread_globals.clear();
void *osl_memory = get_cpu_osl_memory();
for (int i = 0; i < info.cpu_threads; i++) {
kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler);
kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler, i);
}
}

View File

@ -60,7 +60,7 @@ class CPUKernels {
int x,
int y,
float threshold,
bool reset,
int reset,
int offset,
int stride)>;

View File

@ -12,14 +12,16 @@ CCL_NAMESPACE_BEGIN
CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobalsCPU &kernel_globals,
void *osl_globals_memory,
Profiler &cpu_profiler)
Profiler &cpu_profiler,
const int thread_index)
: KernelGlobalsCPU(kernel_globals), cpu_profiler_(cpu_profiler)
{
clear_runtime_pointers();
#ifdef WITH_OSL
OSLGlobals::thread_init(this, static_cast<OSLGlobals *>(osl_globals_memory));
OSLGlobals::thread_init(this, static_cast<OSLGlobals *>(osl_globals_memory), thread_index);
#else
(void)thread_index;
(void)osl_globals_memory;
#endif

View File

@ -23,7 +23,8 @@ class CPUKernelThreadGlobals : public KernelGlobalsCPU {
* without OSL support. Will avoid need to those unnamed pointers and casts. */
CPUKernelThreadGlobals(const KernelGlobalsCPU &kernel_globals,
void *osl_globals_memory,
Profiler &cpu_profiler);
Profiler &cpu_profiler,
const int thread_index);
~CPUKernelThreadGlobals();

View File

@ -10,6 +10,8 @@
# include "device/device.h"
# include "device/hip/device_impl.h"
# include "integrator/denoiser_oidn_gpu.h"
# include "util/string.h"
# include "util/windows.h"
#endif /* WITH_HIP */
@ -158,6 +160,11 @@ void device_hip_info(vector<DeviceInfo> &devices)
info.has_light_tree = true;
info.has_mnee = true;
info.denoisers = 0;
# if defined(WITH_OPENIMAGEDENOISE)
if (OIDNDenoiserGPU::is_device_supported(info)) {
info.denoisers |= DENOISER_OPENIMAGEDENOISE;
}
# endif
info.has_gpu_queue = true;
/* Check if the device has P2P access to any other device in the system. */

View File

@ -73,7 +73,7 @@ class MetalDevice : public Device {
/* Bindless Textures */
bool is_texture(const TextureInfo &tex);
device_vector<TextureInfo> texture_info;
bool need_texture_info;
bool need_texture_info = false;
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
id<MTLBuffer> buffer_bindings_1d = nil;

View File

@ -40,7 +40,7 @@ class OneapiDeviceQueue : public DeviceQueue {
virtual void copy_to_device(device_memory &mem) override;
virtual void copy_from_device(device_memory &mem) override;
virtual bool supports_local_atomic_sort() const
virtual bool supports_local_atomic_sort() const override
{
return true;
}

View File

@ -27,7 +27,6 @@ struct DeviceKernelArguments {
POINTER,
INT32,
FLOAT32,
BOOLEAN,
KERNEL_FILM_CONVERT,
};
@ -66,10 +65,6 @@ struct DeviceKernelArguments {
{
add(FLOAT32, value, sizeof(float));
}
void add(const bool *value)
{
add(BOOLEAN, value, 4);
}
void add(const Type type, const void *value, size_t size)
{
assert(count < MAX_ARGS);

View File

@ -27,7 +27,9 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
#endif
#ifdef WITH_OPENIMAGEDENOISE
if (params.type == DENOISER_OPENIMAGEDENOISE && path_trace_device->info.type == DEVICE_ONEAPI) {
if (params.type == DENOISER_OPENIMAGEDENOISE && path_trace_device->info.type != DEVICE_CPU &&
OIDNDenoiserGPU::is_device_supported(path_trace_device->info))
{
return make_unique<OIDNDenoiserGPU>(path_trace_device, params);
}
#endif

View File

@ -103,7 +103,7 @@ class DenoiserGPU : public Denoiser {
int denoised_offset;
int num_components;
bool use_compositing;
int use_compositing;
bool use_denoising_albedo;
};

View File

@ -30,16 +30,48 @@ CCL_NAMESPACE_BEGIN
bool OIDNDenoiserGPU::is_device_supported(const DeviceInfo &device)
{
/* Currently falls back to checking just the device type, can be improved. */
int device_type = OIDN_DEVICE_TYPE_DEFAULT;
switch (device.type) {
# ifdef OIDN_DEVICE_SYCL
/* Assume all devices with Cycles support are also supported by OIDN2. */
case DEVICE_ONEAPI:
return true;
device_type = OIDN_DEVICE_TYPE_SYCL;
break;
# endif
# ifdef OIDN_DEVICE_HIP
case DEVICE_HIP:
device_type = OIDN_DEVICE_TYPE_HIP;
break;
# endif
# ifdef OIDN_DEVICE_CUDA
case DEVICE_CUDA:
case DEVICE_OPTIX:
device_type = OIDN_DEVICE_TYPE_CUDA;
break;
# endif
case DEVICE_CPU:
/* This is the GPU denoiser - CPU devices shouldn't end up here. */
assert(0);
default:
return false;
}
/* Match GPUs by their PCI ID. */
const int num_devices = oidnGetNumPhysicalDevices();
for (int i = 0; i < num_devices; i++) {
if (oidnGetPhysicalDeviceInt(i, "type") == device_type) {
if (oidnGetPhysicalDeviceBool(i, "pciAddressSupported")) {
unsigned int pci_domain = oidnGetPhysicalDeviceInt(i, "pciDomain");
unsigned int pci_bus = oidnGetPhysicalDeviceInt(i, "pciBus");
unsigned int pci_device = oidnGetPhysicalDeviceInt(i, "pciDevice");
string pci_id = string_printf("%04x:%02x:%02x", pci_domain, pci_bus, pci_device);
if (device.id.find(pci_id) != string::npos) {
return true;
}
}
}
}
return false;
}
OIDNDenoiserGPU::OIDNDenoiserGPU(Device *path_trace_device, const DenoiseParams &params)
@ -78,6 +110,9 @@ uint OIDNDenoiserGPU::get_device_type_mask() const
uint device_mask = 0;
# ifdef OIDN_DEVICE_SYCL
device_mask |= DEVICE_MASK_ONEAPI;
# endif
# ifdef OIDN_DEVICE_HIP
device_mask |= DEVICE_MASK_HIP;
# endif
return device_mask;
}
@ -122,15 +157,27 @@ bool OIDNDenoiserGPU::denoise_create_if_needed(DenoiseContext &context)
1);
denoiser_queue_->init_execution();
break;
# endif
# if defined(OIDN_DEVICE_HIP) && defined(WITH_HIP)
case DEVICE_HIP: {
hipStream_t stream = nullptr;
oidn_device_ = oidnNewHIPDevice(&denoiser_device_->info.num, &stream, 1);
break;
}
# endif
default:
break;
}
if (!oidn_device_) {
denoiser_device_->set_error("Failed to create OIDN device");
return false;
}
if (denoiser_queue_) {
denoiser_queue_->init_execution();
}
oidnCommitDevice(oidn_device_);
oidn_filter_ = create_filter();

View File

@ -1055,6 +1055,7 @@ int PathTraceWorkGPU::adaptive_sampling_convergence_check_count_active(float thr
queue_->zero_to_device(num_active_pixels);
const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
const int reset_int = reset; /* No bool kernel arguments. */
DeviceKernelArguments args(&buffers_->buffer.device_pointer,
&effective_buffer_params_.full_x,
@ -1062,7 +1063,7 @@ int PathTraceWorkGPU::adaptive_sampling_convergence_check_count_active(float thr
&effective_buffer_params_.width,
&effective_buffer_params_.height,
&threshold,
&reset,
&reset_int,
&effective_buffer_params_.offset,
&effective_buffer_params_.stride,
&num_active_pixels.device_pointer);

View File

@ -327,6 +327,7 @@ set(SRC_KERNEL_UTIL_HEADERS
util/color.h
util/differential.h
util/lookup_table.h
util/nanovdb.h
util/profiling.h
)
@ -483,8 +484,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
if(WITH_NANOVDB)
set(cuda_flags ${cuda_flags}
-D WITH_NANOVDB
-I "${NANOVDB_INCLUDE_DIR}")
-D WITH_NANOVDB)
endif()
if(WITH_CYCLES_DEBUG)
@ -634,8 +634,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
if(WITH_NANOVDB)
set(hip_flags ${hip_flags}
-D WITH_NANOVDB
-I "${NANOVDB_INCLUDE_DIR}")
-D WITH_NANOVDB)
endif()
if(WITH_CYCLES_DEBUG)
@ -755,8 +754,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
if(WITH_NANOVDB)
set(cuda_flags ${cuda_flags}
-D WITH_NANOVDB
-I "${NANOVDB_INCLUDE_DIR}")
-D WITH_NANOVDB)
endif()
if(WITH_CYCLES_OSL)
set(cuda_flags ${cuda_flags}
@ -936,8 +934,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
if(WITH_NANOVDB)
list(APPEND sycl_compiler_flags
-DWITH_NANOVDB
-I"${NANOVDB_INCLUDE_DIR}")
-DWITH_NANOVDB)
endif()
if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
@ -1256,16 +1253,3 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SVM_HEADERS}" ${CYCLES
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_TYPES_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/util)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/util)
if(WITH_NANOVDB)
set(SRC_NANOVDB_HEADERS
nanovdb/NanoVDB.h
nanovdb/CNanoVDB.h
)
set(SRC_NANOVDB_UTIL_HEADERS
nanovdb/util/CSampleFromVoxels.h
nanovdb/util/SampleFromVoxels.h
)
delayed_install(${NANOVDB_INCLUDE_DIR} "${SRC_NANOVDB_HEADERS}" ${CYCLES_INSTALL_PATH}/source/nanovdb)
delayed_install(${NANOVDB_INCLUDE_DIR} "${SRC_NANOVDB_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/nanovdb/util)
endif()

View File

@ -49,6 +49,7 @@ typedef struct KernelGlobalsCPU {
OSLGlobals *osl = nullptr;
OSLShadingSystem *osl_ss = nullptr;
OSLThreadData *osl_tdata = nullptr;
int osl_thread_index = 0;
#endif
#ifdef __PATH_GUIDING__

View File

@ -5,9 +5,7 @@
#pragma once
#ifdef WITH_NANOVDB
# define NANOVDB_USE_INTRINSICS
# include <nanovdb/NanoVDB.h>
# include <nanovdb/util/SampleFromVoxels.h>
# include "kernel/util/nanovdb.h"
#endif
CCL_NAMESPACE_BEGIN
@ -685,46 +683,59 @@ template<typename TexT, typename OutT = float4> struct TextureInterpolator {
};
#ifdef WITH_NANOVDB
template<typename TexT, typename OutT = float4> struct NanoVDBInterpolator {
typedef typename nanovdb::NanoGrid<TexT>::AccessorType AccessorType;
template<typename TexT, typename OutT> struct NanoVDBInterpolator {
static ccl_always_inline float read(float r)
{
return r;
}
static ccl_always_inline float4 read(nanovdb::Vec3f r)
static ccl_always_inline float4 read(const packed_float3 r)
{
return make_float4(r[0], r[1], r[2], 1.0f);
return make_float4(r.x, r.y, r.z, 1.0f);
}
static ccl_always_inline OutT interp_3d_closest(const AccessorType &acc,
float x,
float y,
float z)
template<typename Acc>
static ccl_always_inline OutT interp_3d_closest(const Acc &acc, float x, float y, float z)
{
const nanovdb::Vec3f xyz(x, y, z);
return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz));
const nanovdb::Coord coord((int32_t)floorf(x), (int32_t)floorf(y), (int32_t)floorf(z));
return read(acc.getValue(coord));
}
static ccl_always_inline OutT interp_3d_linear(const AccessorType &acc,
float x,
float y,
float z)
template<typename Acc>
static ccl_always_inline OutT interp_3d_linear(const Acc &acc, float x, float y, float z)
{
const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz));
int ix, iy, iz;
const float tx = frac(x - 0.5f, &ix);
const float ty = frac(y - 0.5f, &iy);
const float tz = frac(z - 0.5f, &iz);
return mix(mix(mix(read(acc.getValue(nanovdb::Coord(ix, iy, iz))),
read(acc.getValue(nanovdb::Coord(ix, iy, iz + 1))),
tz),
mix(read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz + 1))),
read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz))),
1.0f - tz),
ty),
mix(mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz))),
read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz + 1))),
tz),
mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz + 1))),
read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz))),
1.0f - tz),
1.0f - ty),
tx);
}
/* Tricubic b-spline interpolation. */
template<typename Acc>
# if defined(__GNUC__) || defined(__clang__)
static ccl_always_inline
# else
static ccl_never_inline
# endif
OutT
interp_3d_cubic(const AccessorType &acc, float x, float y, float z)
interp_3d_cubic(const Acc &acc, float x, float y, float z)
{
int ix, iy, iz;
int nix, niy, niz;
@ -779,15 +790,20 @@ template<typename TexT, typename OutT = float4> struct NanoVDBInterpolator {
using namespace nanovdb;
NanoGrid<TexT> *const grid = (NanoGrid<TexT> *)info.data;
AccessorType acc = grid->getAccessor();
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
case INTERPOLATION_CLOSEST:
case INTERPOLATION_CLOSEST: {
ReadAccessor<TexT> acc(grid->tree().root());
return interp_3d_closest(acc, x, y, z);
case INTERPOLATION_LINEAR:
}
case INTERPOLATION_LINEAR: {
CachedReadAccessor<TexT> acc(grid->tree().root());
return interp_3d_linear(acc, x, y, z);
default:
}
default: {
CachedReadAccessor<TexT> acc(grid->tree().root());
return interp_3d_cubic(acc, x, y, z);
}
}
}
};
@ -880,7 +896,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
return make_float4(f, f, f, 1.0f);
}
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
return NanoVDBInterpolator<packed_float3, float4>::interp_3d(info, P.x, P.y, P.z, interp);
case IMAGE_DATA_TYPE_NANOVDB_FPN: {
const float f = NanoVDBInterpolator<nanovdb::FpN, float>::interp_3d(
info, P.x, P.y, P.z, interp);

View File

@ -101,7 +101,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
int x,
int y,
float threshold,
bool reset,
int reset,
int offset,
int stride);

View File

@ -164,7 +164,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
int x,
int y,
float threshold,
bool reset,
int reset,
int offset,
int stride)
{

View File

@ -45,6 +45,7 @@ typedef unsigned long long uint64_t;
#define ccl_global
#define ccl_inline_constant __constant__
#define ccl_device_constant __constant__ __device__
#define ccl_static_constexpr static constexpr
#define ccl_constant const
#define ccl_gpu_shared __shared__
#define ccl_private

View File

@ -8,12 +8,17 @@ CCL_NAMESPACE_BEGIN
#if !defined __KERNEL_METAL__
# ifdef WITH_NANOVDB
# define NDEBUG /* Disable "assert" in device code */
# define NANOVDB_USE_INTRINSICS
# include "nanovdb/NanoVDB.h"
# include "nanovdb/util/SampleFromVoxels.h"
# include "kernel/util/nanovdb.h"
# endif
#endif
ccl_device_inline float frac(float x, ccl_private int *ix)
{
int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
*ix = i;
return x - (float)i;
}
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
ccl_device float cubic_w0(float a)
{
@ -126,63 +131,121 @@ kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, fl
}
#ifdef WITH_NANOVDB
template<typename T, typename S>
ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tricubic_nanovdb(
ccl_private S &s, float x, float y, float z)
template<typename OutT, typename Acc>
ccl_device OutT
kernel_tex_image_interp_trilinear_nanovdb(ccl_private Acc &acc, float x, float y, float z)
{
float px = floorf(x);
float py = floorf(y);
float pz = floorf(z);
float fx = x - px;
float fy = y - py;
float fz = z - pz;
int ix, iy, iz;
const float tx = frac(x - 0.5f, &ix);
const float ty = frac(y - 0.5f, &iy);
const float tz = frac(z - 0.5f, &iz);
float g0x = cubic_g0(fx);
float g1x = cubic_g1(fx);
float g0y = cubic_g0(fy);
float g1y = cubic_g1(fy);
float g0z = cubic_g0(fz);
float g1z = cubic_g1(fz);
return mix(mix(mix(OutT(acc.getValue(nanovdb::Coord(ix, iy, iz))),
OutT(acc.getValue(nanovdb::Coord(ix, iy, iz + 1))),
tz),
mix(OutT(acc.getValue(nanovdb::Coord(ix, iy + 1, iz + 1))),
OutT(acc.getValue(nanovdb::Coord(ix, iy + 1, iz))),
1.0f - tz),
ty),
mix(mix(OutT(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz))),
OutT(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz + 1))),
tz),
mix(OutT(acc.getValue(nanovdb::Coord(ix + 1, iy, iz + 1))),
OutT(acc.getValue(nanovdb::Coord(ix + 1, iy, iz))),
1.0f - tz),
1.0f - ty),
tx);
}
float x0 = px + cubic_h0(fx);
float x1 = px + cubic_h1(fx);
float y0 = py + cubic_h0(fy);
float y1 = py + cubic_h1(fy);
float z0 = pz + cubic_h0(fz);
float z1 = pz + cubic_h1(fz);
template<typename OutT, typename Acc>
ccl_device OutT
kernel_tex_image_interp_tricubic_nanovdb(ccl_private Acc &acc, float x, float y, float z)
{
int ix, iy, iz;
int nix, niy, niz;
int pix, piy, piz;
int nnix, nniy, nniz;
using namespace nanovdb;
/* A -0.5 offset is used to center the cubic samples around the sample point. */
const float tx = frac(x - 0.5f, &ix);
const float ty = frac(y - 0.5f, &iy);
const float tz = frac(z - 0.5f, &iz);
return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) +
g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) +
g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) +
g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1))));
pix = ix - 1;
piy = iy - 1;
piz = iz - 1;
nix = ix + 1;
niy = iy + 1;
niz = iz + 1;
nnix = ix + 2;
nniy = iy + 2;
nniz = iz + 2;
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {piy, iy, niy, nniy};
const int zc[4] = {piz, iz, niz, nniz};
float u[4], v[4], w[4];
/* Some helper macros to keep code size reasonable.
* Lets the compiler inline all the matrix multiplications.
*/
# define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
{ \
u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
u[3] = (1.0f / 6.0f) * t * t * t; \
} \
(void)0
# define DATA(x, y, z) (OutT(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
# define COL_TERM(col, row) \
(v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
u[3] * DATA(3, col, row)))
# define ROW_TERM(row) \
(w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
SET_CUBIC_SPLINE_WEIGHTS(w, tz);
/* Actual interpolation. */
return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
# undef COL_TERM
# undef ROW_TERM
# undef DATA
# undef SET_CUBIC_SPLINE_WEIGHTS
}
# if defined(__KERNEL_METAL__)
template<typename T>
__attribute__((noinline)) typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
template<typename OutT, typename T>
__attribute__((noinline)) OutT kernel_tex_image_interp_nanovdb(
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
# else
template<typename T>
ccl_device_noinline typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
template<typename OutT, typename T>
ccl_device_noinline OutT kernel_tex_image_interp_nanovdb(
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
# endif
{
using namespace nanovdb;
ccl_global NanoGrid<T> *const grid = (ccl_global NanoGrid<T> *)info.data;
typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
AccessorType acc = grid->getAccessor();
switch (interpolation) {
case INTERPOLATION_CLOSEST:
return SampleFromVoxels<AccessorType, 0, false>(acc)(Vec3f(x, y, z));
case INTERPOLATION_LINEAR:
return SampleFromVoxels<AccessorType, 1, false>(acc)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f));
default:
SampleFromVoxels<AccessorType, 1, false> s(acc);
return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f);
case INTERPOLATION_CLOSEST: {
ReadAccessor<T> acc(grid->tree().root());
const nanovdb::Coord coord((int32_t)floorf(x), (int32_t)floorf(y), (int32_t)floorf(z));
return OutT(acc.getValue(coord));
}
case INTERPOLATION_LINEAR: {
CachedReadAccessor<T> acc(grid->tree().root());
return kernel_tex_image_interp_trilinear_nanovdb<OutT>(acc, x, y, z);
}
default: {
CachedReadAccessor<T> acc(grid->tree().root());
return kernel_tex_image_interp_tricubic_nanovdb<OutT>(acc, x, y, z);
}
}
}
#endif
@ -240,20 +303,20 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
#ifdef WITH_NANOVDB
if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
float f = kernel_tex_image_interp_nanovdb<float>(info, x, y, z, interpolation);
float f = kernel_tex_image_interp_nanovdb<float, float>(info, x, y, z, interpolation);
return make_float4(f, f, f, 1.0f);
}
if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
nanovdb::Vec3f f = kernel_tex_image_interp_nanovdb<nanovdb::Vec3f>(
float3 f = kernel_tex_image_interp_nanovdb<float3, packed_float3>(
info, x, y, z, interpolation);
return make_float4(f[0], f[1], f[2], 1.0f);
return make_float4(f.x, f.y, f.z, 1.0f);
}
if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FPN) {
float f = kernel_tex_image_interp_nanovdb<nanovdb::FpN>(info, x, y, z, interpolation);
float f = kernel_tex_image_interp_nanovdb<float, nanovdb::FpN>(info, x, y, z, interpolation);
return make_float4(f, f, f, 1.0f);
}
if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FP16) {
float f = kernel_tex_image_interp_nanovdb<nanovdb::Fp16>(info, x, y, z, interpolation);
float f = kernel_tex_image_interp_nanovdb<float, nanovdb::Fp16>(info, x, y, z, interpolation);
return make_float4(f, f, f, 1.0f);
}
#endif

View File

@ -668,7 +668,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
int sw,
int sh,
float threshold,
bool reset,
int reset,
int offset,
int stride,
ccl_global uint *num_active_pixels)
@ -1104,7 +1104,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
int pass_denoised,
int pass_sample_count,
int num_components,
bool use_compositing)
int use_compositing)
{
const int work_index = ccl_gpu_global_id_x();
const int y = work_index / width;

View File

@ -38,6 +38,7 @@ typedef unsigned long long uint64_t;
#define ccl_global
#define ccl_inline_constant __constant__
#define ccl_device_constant __constant__ __device__
#define ccl_static_constexpr static constexpr
#define ccl_constant const
#define ccl_gpu_shared __shared__
#define ccl_private

View File

@ -47,6 +47,7 @@ using namespace metal::raytracing;
#define ccl_global device
#define ccl_inline_constant static constant constexpr
#define ccl_device_constant constant
#define ccl_static_constexpr static constant constexpr
#define ccl_constant constant
#define ccl_gpu_shared threadgroup
#define ccl_private thread

View File

@ -5,10 +5,7 @@
// clang-format off
#ifdef WITH_NANOVDB
# define NDEBUG /* Disable "assert" in device code */
# define NANOVDB_USE_INTRINSICS
# include "nanovdb/NanoVDB.h"
# include "nanovdb/util/SampleFromVoxels.h"
# include "kernel/util/nanovdb.h"
#endif
/* Open the Metal kernel context class

View File

@ -40,7 +40,8 @@
#define ccl_device_inline inline
#define ccl_noinline __attribute__((noinline))
#define ccl_inline_constant const constexpr
#define ccl_static_constant const
#define ccl_device_constant static constexpr
#define ccl_static_constexpr static constexpr
#define ccl_device_forceinline __attribute__((always_inline))
#define ccl_device_noinline ccl_device ccl_noinline
#define ccl_device_noinline_cpu ccl_device

View File

@ -34,8 +34,7 @@ typedef struct ccl_vdb_double_t {
} ccl_vdb_double_t;
# define double ccl_vdb_double_t
# include <nanovdb/NanoVDB.h>
# include <nanovdb/util/SampleFromVoxels.h>
# include "kernel/util/nanovdb.h"
# undef double
#endif

View File

@ -202,48 +202,64 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals, int id, float x, float
}
#ifdef WITH_NANOVDB
template<typename T> struct NanoVDBInterpolator {
template<typename TexT, typename OutT> struct NanoVDBInterpolator {
typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
static ccl_always_inline float4 read(float r)
static ccl_always_inline float read(float r)
{
return make_float4(r, r, r, 1.0f);
return r;
}
static ccl_always_inline float4 read(nanovdb::Vec3f r)
static ccl_always_inline float4 read(const packed_float3 r)
{
return make_float4(r[0], r[1], r[2], 1.0f);
return make_float4(r.x, r.y, r.z, 1.0f);
}
static ccl_always_inline float4 interp_3d_closest(const AccessorType &acc,
float x,
float y,
float z)
template<typename Acc>
static ccl_always_inline OutT interp_3d_closest(const Acc &acc, float x, float y, float z)
{
const nanovdb::Vec3f xyz(x, y, z);
return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz));
const nanovdb::Coord coord(int32_t(rintf(x)), int32_t(rintf(y)), int32_t(rintf(z)));
return read(acc.getValue(coord));
}
static ccl_always_inline float4 interp_3d_linear(const AccessorType &acc,
float x,
float y,
float z)
template<typename Acc>
static ccl_always_inline OutT interp_3d_linear(const Acc &acc, float x, float y, float z)
{
const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz));
int ix, iy, iz;
const float tx = svm_image_texture_frac(x - 0.5f, &ix);
const float ty = svm_image_texture_frac(y - 0.5f, &iy);
const float tz = svm_image_texture_frac(z - 0.5f, &iz);
return mix(mix(mix(read(acc.getValue(nanovdb::Coord(ix, iy, iz))),
read(acc.getValue(nanovdb::Coord(ix, iy, iz + 1))),
tz),
mix(read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz + 1))),
read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz))),
1.0f - tz),
ty),
mix(mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz))),
read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz + 1))),
tz),
mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz + 1))),
read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz))),
1.0f - tz),
1.0f - ty),
tx);
}
static float4 interp_3d_cubic(const AccessorType &acc, float x, float y, float z)
/* Tricubic b-spline interpolation. */
template<typename Acc>
static ccl_always_inline OutT interp_3d_cubic(const Acc &acc, float x, float y, float z)
{
int ix, iy, iz;
int nix, niy, niz;
int pix, piy, piz;
int nnix, nniy, nniz;
/* Tri-cubic b-spline interpolation. */
/* A -0.5 offset is used to center the cubic samples around the sample point. */
const float tx = svm_image_texture_frac(x - 0.5f, &ix);
const float ty = svm_image_texture_frac(y - 0.5f, &iy);
const float tz = svm_image_texture_frac(z - 0.5f, &iz);
pix = ix - 1;
piy = iy - 1;
piz = iz - 1;
@ -259,8 +275,8 @@ template<typename T> struct NanoVDBInterpolator {
const int zc[4] = {piz, iz, niz, nniz};
float u[4], v[4], w[4];
/* Some helper macro to keep code reasonable size,
* let compiler to inline all the matrix multiplications.
/* Some helper macros to keep code size reasonable.
* Lets the compiler inline all the matrix multiplications.
*/
# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
# define COL_TERM(col, row) \
@ -281,21 +297,26 @@ template<typename T> struct NanoVDBInterpolator {
# undef DATA
}
static ccl_always_inline float4
static ccl_always_inline OutT
interp_3d(const TextureInfo &info, float x, float y, float z, int interp)
{
using namespace nanovdb;
NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
AccessorType acc