Cycles: Metal support for OpenImageDenoise
This is supported on Apple Silicon GPUs and macOS 13.0+. Co-authored-by: Stefan Werner <stefan.werner@intel.com> Co-authored-by: Attila Afra <attila.t.afra@intel.com> Pull Request: https://projects.blender.org/blender/blender/pulls/116124
This commit is contained in:
parent
691584da1b
commit
31d55e87f9
|
@ -9,7 +9,12 @@ set(OIDN_EXTRA_ARGS
|
|||
-DOIDN_FILTER_RTLIGHTMAP=OFF
|
||||
-DPython_EXECUTABLE=${PYTHON_BINARY}
|
||||
)
|
||||
if(NOT APPLE)
|
||||
if(APPLE)
|
||||
set(OIDN_EXTRA_ARGS
|
||||
${OIDN_EXTRA_ARGS}
|
||||
-DOIDN_DEVICE_METAL=ON
|
||||
)
|
||||
else()
|
||||
set(OIDN_EXTRA_ARGS
|
||||
${OIDN_EXTRA_ARGS}
|
||||
-DOIDN_DEVICE_SYCL=ON
|
||||
|
|
|
@ -257,6 +257,12 @@ class Device {
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Returns native buffer handle for device pointer. */
|
||||
virtual void *get_native_buffer(device_ptr /*ptr*/)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* Guiding */
|
||||
|
||||
/* Returns path guiding device handle. */
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
# include "device/metal/device.h"
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "integrator/denoiser_oidn_gpu.h"
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -55,6 +56,11 @@ void device_metal_info(vector<DeviceInfo> &devices)
|
|||
info.display_device = true;
|
||||
info.denoisers = DENOISER_NONE;
|
||||
info.id = id;
|
||||
# if defined(WITH_OPENIMAGEDENOISE)
|
||||
if (OIDNDenoiserGPU::is_device_supported(info)) {
|
||||
info.denoisers |= DENOISER_OPENIMAGEDENOISE;
|
||||
}
|
||||
# endif
|
||||
|
||||
MetalGPUVendor vendor = MetalInfo::get_device_vendor(device);
|
||||
|
||||
|
|
|
@ -134,6 +134,8 @@ class MetalDevice : public Device {
|
|||
|
||||
virtual bool should_use_graphics_interop() override;
|
||||
|
||||
virtual void *get_native_buffer(device_ptr ptr) override;
|
||||
|
||||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
|
|
|
@ -1389,6 +1389,11 @@ bool MetalDevice::should_use_graphics_interop()
|
|||
return false;
|
||||
}
|
||||
|
||||
void *MetalDevice::get_native_buffer(device_ptr ptr)
|
||||
{
|
||||
return ((MetalMem *)ptr)->mtlBuffer;
|
||||
}
|
||||
|
||||
void MetalDevice::flush_delayed_free_list()
|
||||
{
|
||||
/* free any Metal buffers that may have been freed by host while a command
|
||||
|
|
|
@ -40,6 +40,8 @@ class MetalDeviceQueue : public DeviceQueue {
|
|||
virtual void copy_to_device(device_memory &mem) override;
|
||||
virtual void copy_from_device(device_memory &mem) override;
|
||||
|
||||
virtual void *native_queue() override;
|
||||
|
||||
protected:
|
||||
void setup_capture();
|
||||
void update_capture(DeviceKernel kernel);
|
||||
|
|
|
@ -979,6 +979,11 @@ void MetalDeviceQueue::close_blit_encoder()
|
|||
}
|
||||
}
|
||||
|
||||
void *MetalDeviceQueue::native_queue()
|
||||
{
|
||||
return mtlCommandQueue_;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
|
|
|
@ -157,6 +157,11 @@ class DeviceQueue {
|
|||
/* Device this queue has been created for. */
|
||||
Device *device;
|
||||
|
||||
virtual void *native_queue()
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
/* Hide construction so that allocation via `Device` API is enforced. */
|
||||
explicit DeviceQueue(Device *device);
|
||||
|
|
|
@ -324,9 +324,9 @@ void DenoiserGPU::denoise_color_read(const DenoiseContext &context, const Denois
|
|||
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
|
||||
|
||||
PassAccessor::Destination destination(pass_access_info.type);
|
||||
destination.d_pixels = context.render_buffers->buffer.device_pointer +
|
||||
pass.denoised_offset * sizeof(float);
|
||||
destination.d_pixels = context.render_buffers->buffer.device_pointer;
|
||||
destination.num_components = 3;
|
||||
destination.pixel_offset = pass.denoised_offset;
|
||||
destination.pixel_stride = context.buffer_params.pass_stride;
|
||||
|
||||
BufferParams buffer_params = context.buffer_params;
|
||||
|
|
|
@ -47,6 +47,20 @@ bool OIDNDenoiserGPU::is_device_supported(const DeviceInfo &device)
|
|||
case DEVICE_OPTIX:
|
||||
device_type = OIDN_DEVICE_TYPE_CUDA;
|
||||
break;
|
||||
# endif
|
||||
# ifdef OIDN_DEVICE_METAL
|
||||
case DEVICE_METAL: {
|
||||
int num_devices = oidnGetNumPhysicalDevices();
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
if (oidnGetPhysicalDeviceUInt(i, "type") == OIDN_DEVICE_TYPE_METAL) {
|
||||
const char *name = oidnGetPhysicalDeviceString(i, "name");
|
||||
if (device.id.find(name) != std::string::npos) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
case DEVICE_CPU:
|
||||
/* This is the GPU denoiser - CPU devices shouldn't end up here. */
|
||||
|
@ -111,6 +125,9 @@ uint OIDNDenoiserGPU::get_device_type_mask() const
|
|||
# ifdef OIDN_DEVICE_SYCL
|
||||
device_mask |= DEVICE_MASK_ONEAPI;
|
||||
# endif
|
||||
# ifdef OIDN_DEVICE_METAL
|
||||
device_mask |= DEVICE_MASK_METAL;
|
||||
# endif
|
||||
# ifdef OIDN_DEVICE_CUDA
|
||||
device_mask |= DEVICE_MASK_CUDA;
|
||||
device_mask |= DEVICE_MASK_OPTIX;
|
||||
|
@ -162,6 +179,13 @@ bool OIDNDenoiserGPU::denoise_create_if_needed(DenoiseContext &context)
|
|||
1);
|
||||
break;
|
||||
# endif
|
||||
# if defined(OIDN_DEVICE_METAL) && defined(WITH_METAL)
|
||||
case DEVICE_METAL: {
|
||||
denoiser_queue_->init_execution();
|
||||
const MTLCommandQueue_id queue = (const MTLCommandQueue_id)denoiser_queue_->native_queue();
|
||||
oidn_device_ = oidnNewMetalDevice(&queue, 1);
|
||||
} break;
|
||||
# endif
|
||||
# if defined(OIDN_DEVICE_CUDA) && defined(WITH_CUDA)
|
||||
case DEVICE_CUDA:
|
||||
case DEVICE_OPTIX: {
|
||||
|
@ -262,24 +286,24 @@ bool OIDNDenoiserGPU::denoise_run(const DenoiseContext &context, const DenoisePa
|
|||
/* Color pass. */
|
||||
const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
|
||||
|
||||
oidnSetSharedFilterImage(oidn_filter_,
|
||||
"color",
|
||||
(void *)context.render_buffers->buffer.device_pointer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
pass.denoised_offset * sizeof(float),
|
||||
pass_stride_in_bytes,
|
||||
pass_stride_in_bytes * context.buffer_params.stride);
|
||||
oidnSetSharedFilterImage(oidn_filter_,
|
||||
"output",
|
||||
(void *)context.render_buffers->buffer.device_pointer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
pass.denoised_offset * sizeof(float),
|
||||
pass_stride_in_bytes,
|
||||
pass_stride_in_bytes * context.buffer_params.stride);
|
||||
set_filter_pass(oidn_filter_,
|
||||
"color",
|
||||
context.render_buffers->buffer.device_pointer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
pass.denoised_offset * sizeof(float),
|
||||
pass_stride_in_bytes,
|
||||
pass_stride_in_bytes * context.buffer_params.stride);
|
||||
set_filter_pass(oidn_filter_,
|
||||
"output",
|
||||
context.render_buffers->buffer.device_pointer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
pass.denoised_offset * sizeof(float),
|
||||
pass_stride_in_bytes,
|
||||
pass_stride_in_bytes * context.buffer_params.stride);
|
||||
|
||||
/* Optional albedo and color passes. */
|
||||
if (context.num_input_passes > 1) {
|
||||
|
@ -289,95 +313,95 @@ bool OIDNDenoiserGPU::denoise_run(const DenoiseContext &context, const DenoisePa
|
|||
|
||||
if (context.use_pass_albedo) {
|
||||
if (params_.prefilter == DENOISER_PREFILTER_NONE) {
|
||||
oidnSetSharedFilterImage(oidn_filter_,
|
||||
"albedo",
|
||||
(void *)d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_albedo * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
set_filter_pass(oidn_filter_,
|
||||
"albedo",
|
||||
d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_albedo * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
}
|
||||
else {
|
||||
oidnSetSharedFilterImage(albedo_filter_,
|
||||
"color",
|
||||
(void *)d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_albedo * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
oidnSetSharedFilterImage(albedo_filter_,
|
||||
"output",
|
||||
(void *)d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_albedo * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
set_filter_pass(albedo_filter_,
|
||||
"color",
|
||||
d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_albedo * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
set_filter_pass(albedo_filter_,
|
||||
"output",
|
||||
d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_albedo * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
oidnCommitFilter(albedo_filter_);
|
||||
oidnExecuteFilterAsync(albedo_filter_);
|
||||
|
||||
oidnSetSharedFilterImage(oidn_filter_,
|
||||
"albedo",
|
||||
(void *)d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_albedo * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
set_filter_pass(oidn_filter_,
|
||||
"albedo",
|
||||
d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_albedo * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
if (context.use_pass_normal) {
|
||||
if (params_.prefilter == DENOISER_PREFILTER_NONE) {
|
||||
oidnSetSharedFilterImage(oidn_filter_,
|
||||
"normal",
|
||||
(void *)d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_normal * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
set_filter_pass(oidn_filter_,
|
||||
"normal",
|
||||
d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_normal * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
}
|
||||
else {
|
||||
oidnSetSharedFilterImage(normal_filter_,
|
||||
"color",
|
||||
(void *)d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_normal * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
set_filter_pass(normal_filter_,
|
||||
"color",
|
||||
d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_normal * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
|
||||
oidnSetSharedFilterImage(normal_filter_,
|
||||
"output",
|
||||
(void *)d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_normal * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
set_filter_pass(normal_filter_,
|
||||
"output",
|
||||
d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_normal * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
|
||||
oidnCommitFilter(normal_filter_);
|
||||
oidnExecuteFilterAsync(normal_filter_);
|
||||
|
||||
oidnSetSharedFilterImage(oidn_filter_,
|
||||
"normal",
|
||||
(void *)d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_normal * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
set_filter_pass(oidn_filter_,
|
||||
"normal",
|
||||
d_guiding_buffer,
|
||||
OIDN_FORMAT_FLOAT3,
|
||||
context.buffer_params.width,
|
||||
context.buffer_params.height,
|
||||
context.guiding_params.pass_normal * sizeof(float),
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -409,6 +433,48 @@ bool OIDNDenoiserGPU::denoise_run(const DenoiseContext &context, const DenoisePa
|
|||
return true;
|
||||
}
|
||||
|
||||
void OIDNDenoiserGPU::set_filter_pass(OIDNFilter filter,
|
||||
const char *name,
|
||||
device_ptr ptr,
|
||||
int format,
|
||||
int width,
|
||||
int height,
|
||||
size_t offset_in_bytes,
|
||||
size_t pixel_stride_in_bytes,
|
||||
size_t row_stride_in_bytes)
|
||||
{
|
||||
# if defined(OIDN_DEVICE_METAL) && defined(WITH_METAL)
|
||||
if (denoiser_device_->info.type == DEVICE_METAL) {
|
||||
void *mtl_buffer = denoiser_device_->get_native_buffer(ptr);
|
||||
OIDNBuffer oidn_buffer = oidnNewSharedBufferFromMetal(oidn_device_, mtl_buffer);
|
||||
|
||||
oidnSetFilterImage(filter,
|
||||
name,
|
||||
oidn_buffer,
|
||||
(OIDNFormat)format,
|
||||
width,
|
||||
height,
|
||||
offset_in_bytes,
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
|
||||
oidnReleaseBuffer(oidn_buffer);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
oidnSetSharedFilterImage(filter,
|
||||
name,
|
||||
(void *)ptr,
|
||||
(OIDNFormat)format,
|
||||
width,
|
||||
height,
|
||||
offset_in_bytes,
|
||||
pixel_stride_in_bytes,
|
||||
row_stride_in_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
|
|
@ -51,6 +51,16 @@ class OIDNDenoiserGPU : public DenoiserGPU {
|
|||
|
||||
OIDNFilter create_filter();
|
||||
|
||||
void set_filter_pass(OIDNFilter filter,
|
||||
const char *name,
|
||||
device_ptr ptr,
|
||||
int format,
|
||||
int width,
|
||||
int height,
|
||||
size_t offset_in_bytes,
|
||||
size_t pixel_stride_in_bytes,
|
||||
size_t row_stride_in_bytes);
|
||||
|
||||
OIDNDevice oidn_device_ = nullptr;
|
||||
OIDNFilter oidn_filter_ = nullptr;
|
||||
OIDNFilter albedo_filter_ = nullptr;
|
||||
|
|
|
@ -69,6 +69,10 @@ class PassAccessor {
|
|||
* Allows to get pixels of render buffer into a partial slice of the destination. */
|
||||
int offset = 0;
|
||||
|
||||
/* Offset in floats from the beginning of pixels storage.
|
||||
* Is ignored for half4 destination. */
|
||||
int pixel_offset = 0;
|
||||
|
||||
/* Number of floats per pixel. When zero is the same as `num_components`.
|
||||
*
|
||||
* NOTE: Is ignored for half4 destination, as the half4 pixels are always 4-component
|
||||
|
|
|
@ -47,7 +47,7 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
|
|||
|
||||
parallel_for(0, buffer_params.window_height, [&](int64_t y) {
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
float *pixel = destination.pixels +
|
||||
float *pixel = destination.pixels + destination.pixel_offset +
|
||||
(y * buffer_params.width + destination.offset) * pixel_stride;
|
||||
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride, pixel_stride);
|
||||
});
|
||||
|
|
|
@ -48,6 +48,7 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
|
|||
&buffer_params.window_width,
|
||||
&offset,
|
||||
&buffer_params.stride,
|
||||
&destination.pixel_offset,
|
||||
&destination.offset,
|
||||
&destination_stride);
|
||||
|
||||
|
|
|
@ -809,6 +809,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
|
|||
int width, \
|
||||
int offset, \
|
||||
int stride, \
|
||||
int channel_offset, \
|
||||
int rgba_offset, \
|
||||
int rgba_stride) \
|
||||
{ \
|
||||
|
@ -824,7 +825,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
|
|||
ccl_global const float *buffer = render_buffer + offset + \
|
||||
buffer_pixel_index * kfilm_convert.pass_stride; \
|
||||
\
|
||||
ccl_global float *pixel = pixels + \
|
||||
ccl_global float *pixel = pixels + channel_offset + \
|
||||
(render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \
|
||||
\
|
||||
FILM_GET_PASS_PIXEL_F32(variant, input_channel_count); \
|
||||
|
|
Loading…
Reference in New Issue