diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index d494b40f71d..68dec7f0af2 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -68,7 +68,8 @@ CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_ { /* Pick any kernel, all of them are supposed to have same level of microarchitecture * optimization. */ - VLOG(1) << "Using " << kernels.integrator_init_from_camera.get_uarch_name() << " CPU kernels."; + VLOG(1) << "Using " << get_cpu_kernels().integrator_init_from_camera.get_uarch_name() + << " CPU kernels."; if (info.cpu_threads == 0) { info.cpu_threads = TaskScheduler::num_threads(); @@ -296,11 +297,6 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) Device::build_bvh(bvh, progress, refit); } -const CPUKernels *CPUDevice::get_cpu_kernels() const -{ - return &kernels; -} - void CPUDevice::get_cpu_kernel_thread_globals( vector &kernel_thread_globals) { diff --git a/intern/cycles/device/cpu/device_impl.h b/intern/cycles/device/cpu/device_impl.h index 553728ccc3b..90d217bb624 100644 --- a/intern/cycles/device/cpu/device_impl.h +++ b/intern/cycles/device/cpu/device_impl.h @@ -57,8 +57,6 @@ class CPUDevice : public Device { RTCDevice embree_device; #endif - CPUKernels kernels; - CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_); ~CPUDevice(); @@ -90,7 +88,6 @@ class CPUDevice : public Device { void build_bvh(BVH *bvh, Progress &progress, bool refit) override; - virtual const CPUKernels *get_cpu_kernels() const override; virtual void get_cpu_kernel_thread_globals( vector &kernel_thread_globals) override; virtual void *get_cpu_osl_memory() override; diff --git a/intern/cycles/device/cpu/kernel.cpp b/intern/cycles/device/cpu/kernel.cpp index 3b253c094fd..91c472d41e8 100644 --- a/intern/cycles/device/cpu/kernel.cpp +++ b/intern/cycles/device/cpu/kernel.cpp @@ -26,6 +26,9 @@ CCL_NAMESPACE_BEGIN KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name) #define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name)) +#define REGISTER_KERNEL_FILM_CONVERT(name) \ + film_convert_##name(KERNEL_FUNCTIONS(film_convert_##name)), \ + film_convert_half_rgba_##name(KERNEL_FUNCTIONS(film_convert_half_rgba_##name)) CPUKernels::CPUKernels() : /* Integrator. */ @@ -50,11 +53,25 @@ CPUKernels::CPUKernels() REGISTER_KERNEL(adaptive_sampling_filter_x), REGISTER_KERNEL(adaptive_sampling_filter_y), /* Cryptomatte. */ - REGISTER_KERNEL(cryptomatte_postprocess) + REGISTER_KERNEL(cryptomatte_postprocess), + /* Film Convert. */ + REGISTER_KERNEL_FILM_CONVERT(depth), + REGISTER_KERNEL_FILM_CONVERT(mist), + REGISTER_KERNEL_FILM_CONVERT(sample_count), + REGISTER_KERNEL_FILM_CONVERT(float), + REGISTER_KERNEL_FILM_CONVERT(light_path), + REGISTER_KERNEL_FILM_CONVERT(float3), + REGISTER_KERNEL_FILM_CONVERT(motion), + REGISTER_KERNEL_FILM_CONVERT(cryptomatte), + REGISTER_KERNEL_FILM_CONVERT(shadow_catcher), + REGISTER_KERNEL_FILM_CONVERT(shadow_catcher_matte_with_shadow), + REGISTER_KERNEL_FILM_CONVERT(combined), + REGISTER_KERNEL_FILM_CONVERT(float4) { } #undef REGISTER_KERNEL +#undef REGISTER_KERNEL_FILM_CONVERT #undef KERNEL_FUNCTIONS CCL_NAMESPACE_END diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h index 5beeaf148a1..406bd07ab3d 100644 --- a/intern/cycles/device/cpu/kernel.h +++ b/intern/cycles/device/cpu/kernel.h @@ -17,11 +17,13 @@ #pragma once #include "device/cpu/kernel_function.h" +#include "util/half.h" #include "util/types.h" CCL_NAMESPACE_BEGIN struct KernelGlobalsCPU; +struct KernelFilmConvert; struct IntegratorStateCPU; struct TileInfo; @@ -102,6 +104,41 @@ class CPUKernels { CryptomattePostprocessFunction cryptomatte_postprocess; + /* Film Convert. */ + using FilmConvertFunction = CPUKernelFunction; + using FilmConvertHalfRGBAFunction = + CPUKernelFunction; + +#define KERNEL_FILM_CONVERT_FUNCTION(name) \ + FilmConvertFunction film_convert_##name; \ + FilmConvertHalfRGBAFunction film_convert_half_rgba_##name; + + KERNEL_FILM_CONVERT_FUNCTION(depth) + KERNEL_FILM_CONVERT_FUNCTION(mist) + KERNEL_FILM_CONVERT_FUNCTION(sample_count) + KERNEL_FILM_CONVERT_FUNCTION(float) + + KERNEL_FILM_CONVERT_FUNCTION(light_path) + KERNEL_FILM_CONVERT_FUNCTION(float3) + + KERNEL_FILM_CONVERT_FUNCTION(motion) + KERNEL_FILM_CONVERT_FUNCTION(cryptomatte) + KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher) + KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow) + KERNEL_FILM_CONVERT_FUNCTION(combined) + KERNEL_FILM_CONVERT_FUNCTION(float4) + +#undef KERNEL_FILM_CONVERT_FUNCTION + CPUKernels(); }; diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 3756fc251c0..bfbcdb20d5e 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -23,6 +23,7 @@ #include "device/queue.h" #include "device/cpu/device.h" +#include "device/cpu/kernel.h" #include "device/cuda/device.h" #include "device/dummy/device.h" #include "device/hip/device.h" @@ -361,10 +362,11 @@ unique_ptr Device::gpu_queue_create() return nullptr; } -const CPUKernels *Device::get_cpu_kernels() const +const CPUKernels &Device::get_cpu_kernels() { - LOG(FATAL) << "Device does not support CPU kernels."; - return nullptr; + /* Initialize CPU kernels once and reuse. */ + static CPUKernels kernels; + return kernels; } void Device::get_cpu_kernel_thread_globals( diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index fdd78b3aa4c..a7d47f23d54 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -178,7 +178,7 @@ class Device { * These may not be used on GPU or multi-devices. */ /* Get CPU kernel functions for native instruction set. */ - virtual const CPUKernels *get_cpu_kernels() const; + static const CPUKernels &get_cpu_kernels(); /* Get kernel globals to pass to kernels. */ virtual void get_cpu_kernel_thread_globals( vector & /*kernel_thread_globals*/); diff --git a/intern/cycles/integrator/pass_accessor_cpu.cpp b/intern/cycles/integrator/pass_accessor_cpu.cpp index 820da757be0..77ca332d142 100644 --- a/intern/cycles/integrator/pass_accessor_cpu.cpp +++ b/intern/cycles/integrator/pass_accessor_cpu.cpp @@ -14,9 +14,12 @@ * limitations under the License. */ +#include "device/device.h" + #include "integrator/pass_accessor_cpu.h" #include "session/buffers.h" + #include "util/log.h" #include "util/tbb.h" @@ -33,70 +36,16 @@ CCL_NAMESPACE_BEGIN * Kernel processing. */ -template -inline void PassAccessorCPU::run_get_pass_kernel_processor(const RenderBuffers *render_buffers, - const BufferParams &buffer_params, - const Destination &destination, - const Processor &processor) const -{ - KernelFilmConvert kfilm_convert; - init_kernel_film_convert(&kfilm_convert, buffer_params, destination); - - if (destination.pixels) { - /* NOTE: No overlays are applied since they are not used for final renders. - * Can be supported via some sort of specialization to avoid code duplication. */ - - run_get_pass_kernel_processor_float( - &kfilm_convert, render_buffers, buffer_params, destination, processor); - } - - if (destination.pixels_half_rgba) { - /* TODO(sergey): Consider adding specialization to avoid per-pixel overlay check. */ - - if (destination.num_components == 1) { - run_get_pass_kernel_processor_half_rgba(&kfilm_convert, - render_buffers, - buffer_params, - destination, - [&processor](const KernelFilmConvert *kfilm_convert, - ccl_global const float *buffer, - float *pixel_rgba) { - float pixel; - processor(kfilm_convert, buffer, &pixel); - - pixel_rgba[0] = pixel; - pixel_rgba[1] = pixel; - pixel_rgba[2] = pixel; - pixel_rgba[3] = 1.0f; - }); - } - else if (destination.num_components == 3) { - run_get_pass_kernel_processor_half_rgba(&kfilm_convert, - render_buffers, - buffer_params, - destination, - [&processor](const KernelFilmConvert *kfilm_convert, - ccl_global const float *buffer, - float *pixel_rgba) { - processor(kfilm_convert, buffer, pixel_rgba); - pixel_rgba[3] = 1.0f; - }); - } - else if (destination.num_components == 4) { - run_get_pass_kernel_processor_half_rgba( - &kfilm_convert, render_buffers, buffer_params, destination, processor); - } - } -} - -template inline void PassAccessorCPU::run_get_pass_kernel_processor_float( const KernelFilmConvert *kfilm_convert, const RenderBuffers *render_buffers, const BufferParams &buffer_params, const Destination &destination, - const Processor &processor) const + const CPUKernels::FilmConvertFunction func) const { + /* NOTE: No overlays are applied since they are not used for final renders. + * Can be supported via some sort of specialization to avoid code duplication. */ + DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented."; const int64_t pass_stride = buffer_params.pass_stride; @@ -112,21 +61,16 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float( const float *buffer = window_data + y * buffer_row_stride; float *pixel = destination.pixels + (y * buffer_params.width + destination.offset) * pixel_stride; - - for (int64_t x = 0; x < buffer_params.window_width; - ++x, buffer += pass_stride, pixel += pixel_stride) { - processor(kfilm_convert, buffer, pixel); - } + func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride, pixel_stride); }); } -template inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba( const KernelFilmConvert *kfilm_convert, const RenderBuffers *render_buffers, const BufferParams &buffer_params, const Destination &destination, - const Processor &processor) const + const CPUKernels::FilmConvertHalfRGBAFunction func) const { const int64_t pass_stride = buffer_params.pass_stride; const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride; @@ -141,16 +85,7 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba( tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) { const float *buffer = window_data + y * buffer_row_stride; half4 *pixel = dst_start + y * destination_stride; - for (int64_t x = 0; x < buffer_params.window_width; ++x, buffer += pass_stride, ++pixel) { - - float pixel_rgba[4]; - processor(kfilm_convert, buffer, pixel_rgba); - - film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); - - *pixel = float4_to_half4_display( - make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); - } + func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride); }); } @@ -163,8 +98,25 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba( const BufferParams &buffer_params, \ const Destination &destination) const \ { \ - run_get_pass_kernel_processor( \ - render_buffers, buffer_params, destination, film_get_pass_pixel_##pass); \ + const CPUKernels &kernels = Device::get_cpu_kernels(); \ + KernelFilmConvert kfilm_convert; \ + init_kernel_film_convert(&kfilm_convert, buffer_params, destination); \ +\ + if (destination.pixels) { \ + run_get_pass_kernel_processor_float(&kfilm_convert, \ + render_buffers, \ + buffer_params, \ + destination, \ + kernels.film_convert_##pass); \ + } \ +\ + if (destination.pixels_half_rgba) { \ + run_get_pass_kernel_processor_half_rgba(&kfilm_convert, \ + render_buffers, \ + buffer_params, \ + destination, \ + kernels.film_convert_half_rgba_##pass); \ + } \ } /* Float (scalar) passes. */ diff --git a/intern/cycles/integrator/pass_accessor_cpu.h b/intern/cycles/integrator/pass_accessor_cpu.h index 0313dc5bb0d..9ed38ab256e 100644 --- a/intern/cycles/integrator/pass_accessor_cpu.h +++ b/intern/cycles/integrator/pass_accessor_cpu.h @@ -16,6 +16,8 @@ #pragma once +#include "device/cpu/kernel.h" + #include "integrator/pass_accessor.h" CCL_NAMESPACE_BEGIN @@ -28,25 +30,19 @@ class PassAccessorCPU : public PassAccessor { using PassAccessor::PassAccessor; protected: - template - inline void run_get_pass_kernel_processor(const RenderBuffers *render_buffers, - const BufferParams &buffer_params, - const Destination &destination, - const Processor &processor) const; + inline void run_get_pass_kernel_processor_float( + const KernelFilmConvert *kfilm_convert, + const RenderBuffers *render_buffers, + const BufferParams &buffer_params, + const Destination &destination, + const CPUKernels::FilmConvertFunction func) const; - template - inline void run_get_pass_kernel_processor_float(const KernelFilmConvert *kfilm_convert, - const RenderBuffers *render_buffers, - const BufferParams &buffer_params, - const Destination &destination, - const Processor &processor) const; - - template - inline void run_get_pass_kernel_processor_half_rgba(const KernelFilmConvert *kfilm_convert, - const RenderBuffers *render_buffers, - const BufferParams &buffer_params, - const Destination &destination, - const Processor &processor) const; + inline void run_get_pass_kernel_processor_half_rgba( + const KernelFilmConvert *kfilm_convert, + const RenderBuffers *render_buffers, + const BufferParams &buffer_params, + const Destination &destination, + const CPUKernels::FilmConvertHalfRGBAFunction func) const; #define DECLARE_PASS_ACCESSOR(pass) \ virtual void get_pass_##pass(const RenderBuffers *render_buffers, \ diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp index 541a7eca02f..36ce2be9f6d 100644 --- a/intern/cycles/integrator/path_trace_work_cpu.cpp +++ b/intern/cycles/integrator/path_trace_work_cpu.cpp @@ -58,7 +58,7 @@ PathTraceWorkCPU::PathTraceWorkCPU(Device *device, DeviceScene *device_scene, bool *cancel_requested_flag) : PathTraceWork(device, film, device_scene, cancel_requested_flag), - kernels_(*(device->get_cpu_kernels())) + kernels_(Device::get_cpu_kernels()) { DCHECK_EQ(device->info.type, DEVICE_CPU); } diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp index 42cbf87f254..9ec530c81df 100644 --- a/intern/cycles/integrator/shader_eval.cpp +++ b/intern/cycles/integrator/shader_eval.cpp @@ -96,7 +96,7 @@ bool ShaderEval::eval_cpu(Device *device, device->get_cpu_kernel_thread_globals(kernel_thread_globals); /* Find required kernel function. */ - const CPUKernels &kernels = *(device->get_cpu_kernels()); + const CPUKernels &kernels = Device::get_cpu_kernels(); /* Simple parallel_for over all work items. */ KernelShaderEvalInput *input_data = input.data(); diff --git a/intern/cycles/kernel/device/cpu/kernel.h b/intern/cycles/kernel/device/cpu/kernel.h index c49d7ca445a..6af8094b1ea 100644 --- a/intern/cycles/kernel/device/cpu/kernel.h +++ b/intern/cycles/kernel/device/cpu/kernel.h @@ -18,6 +18,7 @@ /* CPU Kernel Interface */ +#include "util/half.h" #include "util/types.h" #include "kernel/types.h" diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h index 432ac5e15a9..2f9a3f7c59d 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch.h @@ -52,6 +52,37 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel); #undef KERNEL_INTEGRATOR_INIT_FUNCTION #undef KERNEL_INTEGRATOR_SHADE_FUNCTION +#define KERNEL_FILM_CONVERT_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + float *pixel, \ + const int width, \ + const int buffer_stride, \ + const int pixel_stride); \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \ + const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + half4 *pixel, \ + const int width, \ + const int buffer_stride); + +KERNEL_FILM_CONVERT_FUNCTION(depth) +KERNEL_FILM_CONVERT_FUNCTION(mist) +KERNEL_FILM_CONVERT_FUNCTION(sample_count) +KERNEL_FILM_CONVERT_FUNCTION(float) + +KERNEL_FILM_CONVERT_FUNCTION(light_path) +KERNEL_FILM_CONVERT_FUNCTION(float3) + +KERNEL_FILM_CONVERT_FUNCTION(motion) +KERNEL_FILM_CONVERT_FUNCTION(cryptomatte) +KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher) +KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow) +KERNEL_FILM_CONVERT_FUNCTION(combined) +KERNEL_FILM_CONVERT_FUNCTION(float4) + +#undef KERNEL_FILM_CONVERT_FUNCTION + /* -------------------------------------------------------------------- * Shader evaluation. */ diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 6df5d7787fc..1ea5002e300 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -47,8 +47,8 @@ # include "kernel/integrator/megakernel.h" # include "kernel/film/adaptive_sampling.h" -# include "kernel/film/read.h" # include "kernel/film/id_passes.h" +# include "kernel/film/read.h" # include "kernel/bake/bake.h" @@ -232,6 +232,85 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU * #endif } +/* -------------------------------------------------------------------- + * Film Convert. + */ + +#ifdef KERNEL_STUB + +# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + float *pixel, \ + const int width, \ + const int buffer_stride, \ + const int pixel_stride) \ + { \ + STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \ + } \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \ + const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + half4 *pixel, \ + const int width, \ + const int buffer_stride) \ + { \ + STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \ + } + +#else + +# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + float *pixel, \ + const int width, \ + const int buffer_stride, \ + const int pixel_stride) \ + { \ + for (int i = 0; i < width; i++, buffer += buffer_stride, pixel += pixel_stride) { \ + film_get_pass_pixel_##name(kfilm_convert, buffer, pixel); \ + } \ + } \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \ + const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + half4 *pixel, \ + const int width, \ + const int buffer_stride) \ + { \ + for (int i = 0; i < width; i++, buffer += buffer_stride, pixel++) { \ + float pixel_rgba[4] = {0.0f, 0.0f, 0.0f, 1.0f}; \ + film_get_pass_pixel_##name(kfilm_convert, buffer, pixel_rgba); \ + if (is_float) { \ + pixel_rgba[1] = pixel_rgba[0]; \ + pixel_rgba[2] = pixel_rgba[0]; \ + } \ + film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); \ + *pixel = float4_to_half4_display( \ + make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); \ + } \ + } + +#endif + +KERNEL_FILM_CONVERT_FUNCTION(depth, true) +KERNEL_FILM_CONVERT_FUNCTION(mist, true) +KERNEL_FILM_CONVERT_FUNCTION(sample_count, true) +KERNEL_FILM_CONVERT_FUNCTION(float, true) + +KERNEL_FILM_CONVERT_FUNCTION(light_path, false) +KERNEL_FILM_CONVERT_FUNCTION(float3, false) + +KERNEL_FILM_CONVERT_FUNCTION(motion, false) +KERNEL_FILM_CONVERT_FUNCTION(cryptomatte, false) +KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher, false) +KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow, false) +KERNEL_FILM_CONVERT_FUNCTION(combined, false) +KERNEL_FILM_CONVERT_FUNCTION(float4, false) + +#undef KERNEL_FILM_CONVERT_FUNCTION + #undef KERNEL_INVOKE #undef DEFINE_INTEGRATOR_KERNEL #undef DEFINE_INTEGRATOR_SHADE_KERNEL diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 2cac18ed889..5522b46205b 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -31,7 +31,6 @@ CCL_NAMESPACE_BEGIN -template ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg, IntegratorState state, const int shader_flags) @@ -86,36 +85,75 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg, return false; } -/* Note that current_kernel is a template value since making this a variable - * leads to poor performance with CUDA atomics. */ -template -ccl_device_forceinline void integrator_intersect_shader_next_kernel( - KernelGlobals kg, - IntegratorState state, - ccl_private const Intersection *ccl_restrict isect, - const int shader, - const int shader_flags) +#ifdef __SHADOW_CATCHER__ +/* Split path if a shadow catcher was hit. */ +ccl_device_forceinline void integrator_split_shadow_catcher( + KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect) { - /* Note on scheduling. - * - * When there is no shadow catcher split the scheduling is simple: schedule surface shading with - * or without raytrace support, depending on the shader used. - * - * When there is a shadow catcher split the general idea is to have the following configuration: - * - * - Schedule surface shading kernel (with corresponding raytrace support) for the ray which - * will trace shadow catcher object. - * - * - When no alpha-over of approximate shadow catcher is needed, schedule surface shading for - * the matte ray. - * - * - Otherwise schedule background shading kernel, so that we have a background to alpha-over - * on. The background kernel will then schedule surface shading for the matte ray. + /* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two + * paths from here. */ + const int object_flags = intersection_get_object_flags(kg, isect); + if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) { + return; + } + + /* Mark state as having done a shadow catcher split so that it stops contributing to + * the shadow catcher matte pass, but keeps contributing to the combined pass. */ + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT; + + /* Copy current state to new state. */ + state = integrator_state_shadow_catcher_split(kg, state); + + /* Initialize new state. * * Note that the splitting leaves kernel and sorting counters as-is, so use INIT semantic for * the matte path. */ - const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); + /* Mark current state so that it will only track contribution of shadow catcher objects ignoring + * non-catcher objects. */ + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_PASS; + + if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) { + /* If using background pass, schedule background shading kernel so that we have a background + * to alpha-over on. The background kernel will then continue the path afterwards. */ + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND; + INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + return; + } + + if (!integrator_state_volume_stack_is_empty(kg, state)) { + /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher + * objects from it, and then continue shading volume and shadow catcher surface after. */ + INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + return; + } + + /* Continue with shading shadow catcher surface. */ + const int shader = intersection_get_shader(kg, isect); + const int flags = kernel_tex_fetch(__shaders, shader).flags; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + + if (use_raytrace_kernel) { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + } + else { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + } +} + +/* Schedule next kernel to be executed after updating volume stack for shadow catcher. */ +template +ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume( + KernelGlobals kg, IntegratorState state) +{ + /* Continue with shading shadow catcher surface. Same as integrator_split_shadow_catcher, but + * using NEXT instead of INIT. */ + Intersection isect ccl_optional_struct_init; + integrator_state_read_isect(kg, state, &isect); + + const int shader = intersection_get_shader(kg, &isect); + const int flags = kernel_tex_fetch(__shaders, shader).flags; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( @@ -124,23 +162,132 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel( else { INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } +} -#ifdef __SHADOW_CATCHER__ - const int object_flags = intersection_get_object_flags(kg, isect); - if (kernel_shadow_catcher_split(kg, state, object_flags)) { - if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) { - INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND; +/* Schedule next kernel to be executed after executing background shader for shadow catcher. */ +template +ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background( + KernelGlobals kg, IntegratorState state) +{ + /* Same logic as integrator_split_shadow_catcher, but using NEXT instead of INIT. */ + if (!integrator_state_volume_stack_is_empty(kg, state)) { + /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher + * objects from it, and then continue shading volume and shadow catcher surface after. */ + INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + return; + } - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); - } - else if (use_raytrace_kernel) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + /* Continue with shading shadow catcher surface. */ + integrator_intersect_next_kernel_after_shadow_catcher_volume(kg, state); +} +#endif + +/* Schedule next kernel to be executed after intersect closest. + * + * Note that current_kernel is a template value since making this a variable + * leads to poor performance with CUDA atomics. */ +template +ccl_device_forceinline void integrator_intersect_next_kernel( + KernelGlobals kg, + IntegratorState state, + ccl_private const Intersection *ccl_restrict isect, + const bool hit) +{ + /* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */ +#ifdef __VOLUME__ + if (!integrator_state_volume_stack_is_empty(kg, state)) { + const bool hit_surface = hit && !(isect->type & PRIMITIVE_LAMP); + const int shader = (hit_surface) ? intersection_get_shader(kg, isect) : SHADER_NONE; + const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0; + + if (!integrator_intersect_terminate(kg, state, flags)) { + INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); } else { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + INTEGRATOR_PATH_TERMINATE(current_kernel); } + return; } #endif + + if (hit) { + /* Hit a surface, continue with light or surface kernel. */ + if (isect->type & PRIMITIVE_LAMP) { + INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + } + else { + /* Hit a surface, continue with surface kernel unless terminated. */ + const int shader = intersection_get_shader(kg, isect); + const int flags = kernel_tex_fetch(__shaders, shader).flags; + + if (!integrator_intersect_terminate(kg, state, flags)) { + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + if (use_raytrace_kernel) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + } + else { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + } + +#ifdef __SHADOW_CATCHER__ + /* Handle shadow catcher. */ + integrator_split_shadow_catcher(kg, state, isect); +#endif + } + else { + INTEGRATOR_PATH_TERMINATE(current_kernel); + } + } + } + else { + /* Nothing hit, continue with background kernel. */ + INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + } +} + +/* Schedule next kernel to be executed after shade volume. + * + * The logic here matches integrator_intersect_next_kernel, except that + * volume shading and termination testing have already been done. */ +template +ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( + KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect) +{ + if (isect->prim != PRIM_NONE) { + /* Hit a surface, continue with light or surface kernel. */ + if (isect->type & PRIMITIVE_LAMP) { + INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + return; + } + else { + /* Hit a surface, continue with surface kernel unless terminated. */ + const int shader = intersection_get_shader(kg, isect); + const int flags = kernel_tex_fetch(__shaders, shader).flags; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + + if (use_raytrace_kernel) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + } + else { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + } + +#ifdef __SHADOW_CATCHER__ + /* Handle shadow catcher. */ + integrator_split_shadow_catcher(kg, state, isect); +#endif + return; + } + } + else { + /* Nothing hit, continue with background kernel. */ + INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + return; + } } ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state) @@ -192,56 +339,9 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState s /* Write intersection result into global integrator state memory. */ integrator_state_write_isect(kg, state, &isect); -#ifdef __VOLUME__ - if (!integrator_state_volume_stack_is_empty(kg, state)) { - const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP); - const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE; - const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0; - - if (!integrator_intersect_terminate( - kg, state, flags)) { - /* Continue with volume kernel if we are inside a volume, regardless - * if we hit anything. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST, - DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); - } - else { - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); - } - return; - } -#endif - - if (hit) { - /* Hit a surface, continue with light or surface kernel. */ - if (isect.type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST, - DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); - return; - } - else { - /* Hit a surface, continue with surface kernel unless terminated. */ - const int shader = intersection_get_shader(kg, &isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; - - if (!integrator_intersect_terminate( - kg, state, flags)) { - integrator_intersect_shader_next_kernel( - kg, state, &isect, shader, flags); - return; - } - else { - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); - return; - } - } - } - else { - /* Nothing hit, continue with background kernel. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST, - DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); - return; - } + /* Setup up next kernel to be executed. */ + integrator_intersect_next_kernel( + kg, state, &isect, hit); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h index dd0587db9d8..9fa5ff63ad2 100644 --- a/intern/cycles/kernel/integrator/intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h @@ -42,10 +42,13 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, /* Store to avoid global fetches on every intersection step. */ const uint volume_stack_size = kernel_data.volume_stack_size; + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + const uint32_t visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, PATH_RAY_ALL_VISIBILITY); + #ifdef __VOLUME_RECORD_ALL__ Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1]; uint num_hits = scene_intersect_volume_all( - kg, &volume_ray, hits, 2 * volume_stack_size, PATH_RAY_ALL_VISIBILITY); + kg, &volume_ray, hits, 2 * volume_stack_size, visibility); if (num_hits > 0) { Intersection *isect = hits; @@ -60,7 +63,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, Intersection isect; int step = 0; while (step < 2 * volume_stack_size && - scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) { + scene_intersect_volume(kg, &volume_ray, &isect, visibility)) { shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect); volume_stack_enter_exit(kg, state, stack_sd); @@ -74,7 +77,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, #endif } -ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state) +ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState state) { PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK); @@ -89,14 +92,20 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt volume_ray.D = make_float3(0.0f, 0.0f, 1.0f); volume_ray.t = FLT_MAX; - const uint visibility = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_ALL_VISIBILITY); int stack_index = 0, enclosed_index = 0; - /* Write background shader. */ + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + const uint32_t visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, PATH_RAY_CAMERA); + + /* Initialize volume stack with background volume For shadow catcher the + * background volume is always assumed to be CG. */ if (kernel_data.background.volume_shader != SHADER_NONE) { - const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader}; - integrator_state_write_volume_stack(state, stack_index, new_entry); - stack_index++; + if (!(path_flag & PATH_RAY_SHADOW_CATCHER_PASS)) { + INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, stack_index, object) = OBJECT_NONE; + INTEGRATOR_STATE_ARRAY_WRITE( + state, volume_stack, stack_index, shader) = kernel_data.background.volume_shader; + stack_index++; + } } /* Store to avoid global fetches on every intersection step. */ @@ -202,9 +211,22 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt /* Write terminator. */ const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE}; integrator_state_write_volume_stack(state, stack_index, new_entry); +} - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, - DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); +ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state) +{ + integrator_volume_stack_init(kg, state); + + if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_PASS) { + /* Volume stack re-init for shadow catcher, continue with shading of hit. */ + integrator_intersect_next_kernel_after_shadow_catcher_volume< + DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK>(kg, state); + } + else { + /* Volume stack init for camera rays, continue with intersection of camera ray. */ + INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h index 71a590749bd..24482e85b05 100644 --- a/intern/cycles/kernel/integrator/shade_background.h +++ b/intern/cycles/kernel/integrator/shade_background.h @@ -192,23 +192,11 @@ ccl_device void integrator_shade_background(KernelGlobals kg, #ifdef __SHADOW_CATCHER__ if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { + /* Special case for shadow catcher where we want to fill the background pass + * behind the shadow catcher but also continue tracing the path. */ INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND; - - const int isect_prim = INTEGRATOR_STATE(state, isect, prim); - const int isect_type = INTEGRATOR_STATE(state, isect, type); - const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type); - const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; - - if (shader_flags & SD_HAS_RAYTRACE) { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND, - DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, - shader); - } - else { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND, - DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, - shader); - } + integrator_intersect_next_kernel_after_shadow_catcher_background< + DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND>(kg, state); return; } #endif diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 05959bef220..412be289ebe 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -1023,25 +1023,9 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, } else { /* Continue to background, light or surface. */ - if (isect.prim == PRIM_NONE) { - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, - DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); - return; - } - else if (isect.type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, - DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); - return; - } - else { - /* Hit a surface, continue with surface kernel unless terminated. */ - const int shader = intersection_get_shader(kg, &isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; - - integrator_intersect_shader_next_kernel( - kg, state, &isect, shader, flags); - return; - } + integrator_intersect_next_kernel_after_volume( + kg, state, &isect); + return; } #endif /* __VOLUME__ */ } diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h index 7beae235dbc..ac55678c9cb 100644 --- a/intern/cycles/kernel/integrator/shadow_catcher.h +++ b/intern/cycles/kernel/integrator/shadow_catcher.h @@ -76,33 +76,6 @@ ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg, return (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) != 0; } -/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths - * after this function. */ -ccl_device_inline bool kernel_shadow_catcher_split(KernelGlobals kg, - IntegratorState state, - const int object_flags) -{ -#ifdef __SHADOW_CATCHER__ - - if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) { - return false; - } - - /* The split is to be done. Mark the current state as such, so that it stops contributing to the - * shadow catcher matte pass, but keeps contributing to the combined pass. */ - INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT; - - /* Split new state from the current one. This new state will only track contribution of shadow - * catcher objects ignoring non-catcher objects. */ - integrator_state_shadow_catcher_split(kg, state); - - return true; -#else - (void)object_flags; - return false; -#endif -} - #ifdef __SHADOW_CATCHER__ ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(const uint32_t path_flag) diff --git a/intern/cycles/kernel/integrator/state.h b/intern/cycles/kernel/integrator/state.h index 86dac0a65cf..ed2a0be3068 100644 --- a/intern/cycles/kernel/integrator/state.h +++ b/intern/cycles/kernel/integrator/state.h @@ -173,10 +173,10 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState; /* Array access on GPU with Structure-of-Arrays. */ -typedef const int IntegratorState; -typedef const int ConstIntegratorState; -typedef const int IntegratorShadowState; -typedef const int ConstIntegratorShadowState; +typedef int IntegratorState; +typedef int ConstIntegratorState; +typedef int IntegratorShadowState; +typedef int ConstIntegratorShadowState; # define INTEGRATOR_STATE_NULL -1 diff --git a/intern/cycles/kernel/integrator/state_util.h b/intern/cycles/kernel/integrator/state_util.h index dafe06e7009..99dae83233c 100644 --- a/intern/cycles/kernel/integrator/state_util.h +++ b/intern/cycles/kernel/integrator/state_util.h @@ -326,8 +326,8 @@ ccl_device_inline void integrator_shadow_state_move(KernelGlobals kg, /* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths * after this function. */ -ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg, - IntegratorState state) +ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGlobals kg, + IntegratorState state) { #if defined(__KERNEL_GPU__) ConstIntegratorState to_state = atomic_fetch_and_add_uint32( @@ -337,14 +337,14 @@ ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg, #else IntegratorStateCPU *ccl_restrict to_state = state + 1; - /* Only copy the required subset, since shadow intersections are big and irrelevant here. */ + /* Only copy the required subset for performance. */ to_state->path = state->path; to_state->ray = state->ray; to_state->isect = state->isect; integrator_state_copy_volume_stack(kg, to_state, state); #endif - INTEGRATOR_STATE_WRITE(to_state, path, flag) |= PATH_RAY_SHADOW_CATCHER_PASS; + return to_state; } #ifdef __KERNEL_CPU__ diff --git a/source/blender/editors/interface/interface_icons.c b/source/blender/editors/interface/interface_icons.c index 5784af90834..c1dd4fcb4e4 100644 --- a/source/blender/editors/interface/interface_icons.c +++ b/source/blender/editors/interface/interface_icons.c @@ -1503,7 +1503,8 @@ static void icon_draw_rect(float x, int draw_w = w; int draw_h = h; int draw_x = x; - int draw_y = y; + /* We need to round y, to avoid the icon jittering in some cases. */ + int draw_y = round_fl_to_int(y); /* sanity check */ if (w <= 0 || h <= 0 || w > 2000 || h > 2000) { diff --git a/source/blender/editors/interface/interface_widgets.c b/source/blender/editors/interface/interface_widgets.c index 4b11ed61657..7d1b7b80ccd 100644 --- a/source/blender/editors/interface/interface_widgets.c +++ b/source/blender/editors/interface/interface_widgets.c @@ -1407,8 +1407,8 @@ static void widget_draw_icon( /* force positions to integers, for zoom levels near 1. draws icons crisp. */ if (aspect > 0.95f && aspect < 1.05f) { - xs = (int)(xs + 0.1f); - ys = (int)(ys + 0.1f); + xs = roundf(xs); + ys = roundf(ys); } /* Get theme color. */