From bdf5649f36caa7892e75b12394a8f419d897a4ec Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 30 May 2023 11:12:05 +0200 Subject: [PATCH 1/2] Cycles: Remove redundant MetalRT workaround & add two useful DebugFlags This patch removes a workaround for an issue that is now understood to be undefined behaviour (and fixed by #108176). It also adds two useful debug flags that we would like to be available in Blender 3.6. Pull Request: https://projects.blender.org/blender/blender/pulls/108322 --- intern/cycles/device/metal/device_impl.mm | 4 ++- intern/cycles/device/metal/kernel.mm | 37 ++++++----------------- intern/cycles/util/debug.cpp | 6 ++++ intern/cycles/util/debug.h | 6 ++++ 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index c0c5f2e93d2..8b2949fb02f 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -345,7 +345,9 @@ string MetalDevice::preprocess_source(MetalPipelineType pso_type, case METAL_GPU_APPLE: global_defines += "#define __KERNEL_METAL_APPLE__\n"; # ifdef WITH_NANOVDB - global_defines += "#define WITH_NANOVDB\n"; + if (DebugFlags().metal.use_nanovdb) { + global_defines += "#define WITH_NANOVDB\n"; + } # endif break; } diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 9177cff5598..18fc35d556e 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -677,7 +677,7 @@ void MetalKernelPipeline::compile() __block bool compilation_finished = false; __block string error_str; - if (loading_existing_archive) { + if (loading_existing_archive || !DebugFlags().metal.use_async_pso_creation) { /* Use the blocking variant of newComputePipelineStateWithDescriptor if an archive exists on * disk. It should load almost instantaneously, and will fail gracefully when loading a * corrupt archive (unlike the async variant). */ @@ -690,29 +690,6 @@ void MetalKernelPipeline::compile() error_str = err ? err : "nil"; } else { - /* TODO / MetalRT workaround: - * Workaround for a crash when addComputePipelineFunctionsWithDescriptor is called *after* - * newComputePipelineStateWithDescriptor with linked functions (i.e. with MetalRT enabled). - * Ideally we would like to call newComputePipelineStateWithDescriptor (async) first so we - * can bail out if needed, but we can stop the crash by flipping the order when there are - * linked functions. However when addComputePipelineFunctionsWithDescriptor is called first - * it will block while it builds the pipeline, offering no way of bailing out. */ - auto addComputePipelineFunctionsWithDescriptor = [&]() { - if (creating_new_archive && ShaderCache::running) { - NSError *error; - if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor - error:&error]) - { - NSString *errStr = [error localizedDescription]; - metal_printf("Failed to add PSO to archive:\n%s\n", - errStr ? [errStr UTF8String] : "nil"); - } - } - }; - if (linked_functions) { - addComputePipelineFunctionsWithDescriptor(); - } - /* Use the async variant of newComputePipelineStateWithDescriptor if no archive exists on * disk. This allows us to respond to app shutdown. */ [mtlDevice @@ -740,10 +717,16 @@ void MetalKernelPipeline::compile() while (ShaderCache::running && !compilation_finished) { std::this_thread::sleep_for(std::chrono::milliseconds(5)); } + } - /* Add pipeline into the new archive (unless we did it earlier). */ - if (pipeline && !linked_functions) { - addComputePipelineFunctionsWithDescriptor(); + if (creating_new_archive && pipeline) { + /* Add pipeline into the new archive. */ + NSError *error; + if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor + error:&error]) + { + NSString *errStr = [error localizedDescription]; + metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil"); } } diff --git a/intern/cycles/util/debug.cpp b/intern/cycles/util/debug.cpp index 6425cafeb41..aa484bcbf06 100644 --- a/intern/cycles/util/debug.cpp +++ b/intern/cycles/util/debug.cpp @@ -72,6 +72,12 @@ void DebugFlags::Metal::reset() if (auto str = getenv("CYCLES_METAL_LOCAL_ATOMIC_SORT")) use_local_atomic_sort = (atoi(str) != 0); + + if (auto str = getenv("CYCLES_METAL_NANOVDB")) + use_nanovdb = (atoi(str) != 0); + + if (auto str = getenv("CYCLES_METAL_ASYNC_PSO_CREATION")) + use_async_pso_creation = (atoi(str) != 0); } DebugFlags::OptiX::OptiX() diff --git a/intern/cycles/util/debug.h b/intern/cycles/util/debug.h index 9efbdceaefe..4af3db1e54e 100644 --- a/intern/cycles/util/debug.h +++ b/intern/cycles/util/debug.h @@ -100,6 +100,12 @@ class DebugFlags { /* Whether local atomic sorting is enabled or not. */ bool use_local_atomic_sort = true; + + /* Whether nanovdb is enabled or not. */ + bool use_nanovdb = true; + + /* Whether async PSO creation is enabled or not. */ + bool use_async_pso_creation = true; }; /* Get instance of debug flags registry. */ From 50ba227740bc5f1bdcd9e5b2e87b3d2cafa7f08a Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Sat, 27 May 2023 14:19:19 +0200 Subject: [PATCH 2/2] Fix #108316: CUDA error rendering Attic scene The light tree dependent on the first threshold to evaluate to 1 when picking up an emitter. Pull Request: https://projects.blender.org/blender/blender/pulls/108323 --- intern/cycles/kernel/light/tree.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/intern/cycles/kernel/light/tree.h b/intern/cycles/kernel/light/tree.h index 8a8089520c8..32c5788db31 100644 --- a/intern/cycles/kernel/light/tree.h +++ b/intern/cycles/kernel/light/tree.h @@ -509,6 +509,19 @@ ccl_device void sample_resevoir(const int current_index, return; } total_weight += current_weight; + + /* When `-ffast-math` is used it is possible that the threshold is almost 1 but not quite. + * For this case we check the first assignment explicitly (instead of relying on the threshold to + * be 1, giving it certain probability). */ + if (selected_index == -1) { + selected_index = current_index; + selected_weight = current_weight; + /* The threshold is expected to be 1 in this case with strict mathematics, so no need to divide + * the rand. In fact, division in such case could lead the rand to exceed 1 because of division + * by something smaller than 1. */ + return; + } + float thresh = current_weight / total_weight; if (rand <= thresh) { selected_index = current_index; @@ -518,8 +531,10 @@ ccl_device void sample_resevoir(const int current_index, else { rand = (rand - thresh) / (1.0f - thresh); } - kernel_assert(rand >= 0.0f && rand <= 1.0f); - return; + + /* Ensure the `rand` is always within 0..1 range, which could be violated above when + * `-ffast-math` is used. */ + rand = saturatef(rand); } /* Pick an emitter from a leaf node using resevoir sampling, keep two reservoirs for upper and