Cycles: set num_sort_partition_elements to 8192 for oneAPI

The default value of 65536 wasn't optimal on Intel GPUs, switching to
8192 gives a 0 to 15% performance improvement depending on the scenes.
This commit is contained in:
Xavier Hallade 2024-01-31 17:25:34 +01:00
parent f9a0e825c6
commit 4d4f8bbfe4
2 changed files with 7 additions and 0 deletions

View File

@ -52,6 +52,11 @@ int OneapiDeviceQueue::num_concurrent_busy_states(const size_t /*state_size*/) c
return 4 * max(8 * max_num_threads, 65536);
}
int OneapiDeviceQueue::num_sort_partition_elements() const
{
return 8192;
}
void OneapiDeviceQueue::init_execution()
{
oneapi_device_->load_texture_info();

View File

@ -28,6 +28,8 @@ class OneapiDeviceQueue : public DeviceQueue {
virtual int num_concurrent_busy_states(const size_t state_size) const override;
virtual int num_sort_partition_elements() const override;
virtual void init_execution() override;
virtual bool enqueue(DeviceKernel kernel,