From 4d4f8bbfe460bcba9486c9f4b1868c1feb58c2c0 Mon Sep 17 00:00:00 2001 From: Xavier Hallade Date: Wed, 31 Jan 2024 17:25:34 +0100 Subject: [PATCH] Cycles: set num_sort_partition_elements to 8192 for oneAPI The default value of 65536 wasn't optimal on Intel GPUs, switching to 8192 gives a 0 to 15% performance improvement depending on the scenes. --- intern/cycles/device/oneapi/queue.cpp | 5 +++++ intern/cycles/device/oneapi/queue.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/intern/cycles/device/oneapi/queue.cpp b/intern/cycles/device/oneapi/queue.cpp index b9392f3c116..885e6ac10e7 100644 --- a/intern/cycles/device/oneapi/queue.cpp +++ b/intern/cycles/device/oneapi/queue.cpp @@ -52,6 +52,11 @@ int OneapiDeviceQueue::num_concurrent_busy_states(const size_t /*state_size*/) c return 4 * max(8 * max_num_threads, 65536); } +int OneapiDeviceQueue::num_sort_partition_elements() const +{ + return 8192; +} + void OneapiDeviceQueue::init_execution() { oneapi_device_->load_texture_info(); diff --git a/intern/cycles/device/oneapi/queue.h b/intern/cycles/device/oneapi/queue.h index c7fb832210c..d2f4eaa8851 100644 --- a/intern/cycles/device/oneapi/queue.h +++ b/intern/cycles/device/oneapi/queue.h @@ -28,6 +28,8 @@ class OneapiDeviceQueue : public DeviceQueue { virtual int num_concurrent_busy_states(const size_t state_size) const override; + virtual int num_sort_partition_elements() const override; + virtual void init_execution() override; virtual bool enqueue(DeviceKernel kernel,