Fix T78881: Cycles OpenImageDenoise not using albedo and normal correctly

Properly normalize buffers now. Also expose option to not use albedo and normal just like OptiX.
2020-07-13 16:45:15 +02:00 · 2020-07-13 16:45:15 +02:00 · 6e74a8b69f
parent 2b5e21fe00
commit 6e74a8b69f
6 changed files with 118 additions and 50 deletions
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@ -212,7 +212,7 @@ def enum_denoiser(self, context):
    items += enum_openimagedenoise_denoiser(self, context)
    return items

-enum_denoising_optix_input_passes = (
+enum_denoising_input_passes = (
    ('RGB', "Color", "Use only color as input", 1),
    ('RGB_ALBEDO', "Color + Albedo", "Use color and albedo data as input", 2),
    ('RGB_ALBEDO_NORMAL', "Color + Albedo + Normal", "Use color, albedo and normal data as input", 3),
@ -1451,11 +1451,18 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):

    denoising_optix_input_passes: EnumProperty(
        name="Input Passes",
-        description="Passes handed over to the OptiX denoiser (this can have different effects on the denoised image)",
-        items=enum_denoising_optix_input_passes,
+        description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
+        items=enum_denoising_input_passes,
        default='RGB_ALBEDO',
    )

+    denoising_openimagedenoise_input_passes: EnumProperty(
+        name="Input Passes",
+        description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
+        items=enum_denoising_input_passes,
+        default='RGB_ALBEDO_NORMAL',
+    )
+
    use_pass_crypto_object: BoolProperty(
        name="Cryptomatte Object",
        description="Render cryptomatte object pass, for isolating objects in compositing",
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@ -1008,6 +1008,7 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
            col.prop(cycles_view_layer, "denoising_optix_input_passes")
            return
        elif denoiser == 'OPENIMAGEDENOISE':
+            col.prop(cycles_view_layer, "denoising_openimagedenoise_input_passes")
            return

        col.prop(cycles_view_layer, "denoising_radius", text="Radius")
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@ -954,7 +954,13 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
      denoising.strength = get_float(clayer, "denoising_strength");
      denoising.feature_strength = get_float(clayer, "denoising_feature_strength");
      denoising.relative_pca = get_boolean(clayer, "denoising_relative_pca");
-      denoising.optix_input_passes = get_enum(clayer, "denoising_optix_input_passes");
+
+      denoising.input_passes = (DenoiserInput)get_enum(
+          clayer,
+          (denoising.type == DENOISER_OPTIX) ? "denoising_optix_input_passes" :
+                                               "denoising_openimagedenoise_input_passes",
+          DENOISER_INPUT_NUM,
+          DENOISER_INPUT_RGB_ALBEDO_NORMAL);

      denoising.store_passes = get_boolean(clayer, "denoising_store_passes");
    }
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@ -951,12 +951,13 @@ class CPUDevice : public Device {

  void denoise_openimagedenoise_buffer(DeviceTask &task,
                                       float *buffer,
-                                       size_t offset,
-                                       size_t stride,
-                                       size_t x,
-                                       size_t y,
-                                       size_t w,
-                                       size_t h)
+                                       const size_t offset,
+                                       const size_t stride,
+                                       const size_t x,
+                                       const size_t y,
+                                       const size_t w,
+                                       const size_t h,
+                                       const float scale)
  {
 #ifdef WITH_OPENIMAGEDENOISE
    assert(openimagedenoise_supported());
@ -982,31 +983,65 @@ class CPUDevice : public Device {
    }

    /* Set images with appropriate stride for our interleaved pass storage. */
-    const struct {
+    struct {
      const char *name;
-      int offset;
-    } passes[] = {{"color", task.pass_denoising_data + DENOISING_PASS_COLOR},
-                  {"normal", task.pass_denoising_data + DENOISING_PASS_NORMAL},
-                  {"albedo", task.pass_denoising_data + DENOISING_PASS_ALBEDO},
-                  {"output", 0},
+      const int offset;
+      const bool scale;
+      const bool use;
+      array<float> scaled_buffer;
+    } passes[] = {{"color", task.pass_denoising_data + DENOISING_PASS_COLOR, false, true},
+                  {"albedo",
+                   task.pass_denoising_data + DENOISING_PASS_ALBEDO,
+                   true,
+                   task.denoising.input_passes >= DENOISER_INPUT_RGB_ALBEDO},
+                  {"normal",
+                   task.pass_denoising_data + DENOISING_PASS_NORMAL,
+                   true,
+                   task.denoising.input_passes >= DENOISER_INPUT_RGB_ALBEDO_NORMAL},
+                  {"output", 0, false, true},
                  { NULL,
                    0 }};

    for (int i = 0; passes[i].name; i++) {
+      if (!passes[i].use) {
+        continue;
+      }
+
      const int64_t pixel_offset = offset + x + y * stride;
-      const int64_t buffer_offset = (pixel_offset * task.pass_stride + passes[i].offset) *
-                                    sizeof(float);
-      const int64_t pixel_stride = task.pass_stride * sizeof(float);
+      const int64_t buffer_offset = (pixel_offset * task.pass_stride + passes[i].offset);
+      const int64_t pixel_stride = task.pass_stride;
      const int64_t row_stride = stride * pixel_stride;

-      oidn_filter.setImage(passes[i].name,
-                           (char *)buffer + buffer_offset,
-                           oidn::Format::Float3,
-                           w,
-                           h,
-                           0,
-                           pixel_stride,
-                           row_stride);
+      if (passes[i].scale && scale != 1.0f) {
+        /* Normalize albedo and normal passes as they are scaled by the number of samples.
+         * For the color passes OIDN will perform autoexposure making it unnecessary. */
+        array<float> &scaled_buffer = passes[i].scaled_buffer;
+        scaled_buffer.resize(w * h * 3);
+
+        for (int y = 0; y < h; y++) {
+          const float *pass_row = buffer + buffer_offset + y * row_stride;
+          float *scaled_row = scaled_buffer.data() + y * w * 3;
+
+          for (int x = 0; x < w; x++) {
+            scaled_row[x * 3 + 0] = pass_row[x * pixel_stride + 0] * scale;
+            scaled_row[x * 3 + 1] = pass_row[x * pixel_stride + 1] * scale;
+            scaled_row[x * 3 + 2] = pass_row[x * pixel_stride + 2] * scale;
+          }
+        }
+
+        oidn_filter.setImage(
+            passes[i].name, scaled_buffer.data(), oidn::Format::Float3, w, h, 0, 0, 0);
+      }
+      else {
+        oidn_filter.setImage(passes[i].name,
+                             buffer + buffer_offset,
+                             oidn::Format::Float3,
+                             w,
+                             h,
+                             0,
+                             pixel_stride * sizeof(float),
+                             row_stride * sizeof(float));
+      }
    }

    /* Execute filter. */
@ -1021,6 +1056,7 @@ class CPUDevice : public Device {
    (void)y;
    (void)w;
    (void)h;
+    (void)scale;
 #endif
  }

@ -1037,7 +1073,8 @@ class CPUDevice : public Device {
                                      rtile.x,
                                      rtile.y,
                                      rtile.w,
-                                      rtile.h);
+                                      rtile.h,
+                                      1.0f / rtile.sample);

      /* todo: it may be possible to avoid this copy, but we have to ensure that
       * when other code copies data from the device it doesn't overwrite the
@ -1047,6 +1084,9 @@ class CPUDevice : public Device {
    else {
      /* Per-tile denoising. */
      rtile.sample = rtile.start_sample + rtile.num_samples;
+      const float scale = 1.0f / rtile.sample;
+      const float invscale = rtile.sample;
+      const size_t pass_stride = task.pass_stride;

      /* Map neighboring tiles into one buffer for denoising. */
      RenderTileNeighbors neighbors(rtile);
@ -1075,22 +1115,24 @@ class CPUDevice : public Device {
        const int ymax = min(ntile.y + ntile.h, rect.w);

        const size_t tile_offset = ntile.offset + xmin + ymin * ntile.stride;
-        const float *tile_buffer = (float *)ntile.buffer + tile_offset * task.pass_stride;
+        const float *tile_buffer = (float *)ntile.buffer + tile_offset * pass_stride;

        const size_t merged_stride = rect_size.x;
        const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride;
-        float *merged_buffer = merged.data() + merged_offset * task.pass_stride;
+        float *merged_buffer = merged.data() + merged_offset * pass_stride;

        for (int y = ymin; y < ymax; y++) {
-          memcpy(merged_buffer, tile_buffer, sizeof(float) * task.pass_stride * (xmax - xmin));
-          tile_buffer += ntile.stride * task.pass_stride;
-          merged_buffer += merged_stride * task.pass_stride;
+          for (int x = 0; x < pass_stride * (xmax - xmin); x++) {
+            merged_buffer[x] = tile_buffer[x] * scale;
+          }
+          tile_buffer += ntile.stride * pass_stride;
+          merged_buffer += merged_stride * pass_stride;
        }
      }

      /* Denoise */
      denoise_openimagedenoise_buffer(
-          task, merged.data(), 0, rect_size.x, 0, 0, rect_size.x, rect_size.y);
+          task, merged.data(), 0, rect_size.x, 0, 0, rect_size.x, rect_size.y, 1.0f);

      /* Copy back result from merged buffer. */
      RenderTile &ntile = neighbors.target;
@ -1101,16 +1143,20 @@ class CPUDevice : public Device {
        const int ymax = min(ntile.y + ntile.h, rect.w);

        const size_t tile_offset = ntile.offset + xmin + ymin * ntile.stride;
-        float *tile_buffer = (float *)ntile.buffer + tile_offset * task.pass_stride;
+        float *tile_buffer = (float *)ntile.buffer + tile_offset * pass_stride;

        const size_t merged_stride = rect_size.x;
        const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride;
-        const float *merged_buffer = merged.data() + merged_offset * task.pass_stride;
+        const float *merged_buffer = merged.data() + merged_offset * pass_stride;

        for (int y = ymin; y < ymax; y++) {
-          memcpy(tile_buffer, merged_buffer, sizeof(float) * task.pass_stride * (xmax - xmin));
-          tile_buffer += ntile.stride * task.pass_stride;
-          merged_buffer += merged_stride * task.pass_stride;
+          for (int x = 0; x < pass_stride * (xmax - xmin); x += pass_stride) {
+            tile_buffer[x + 0] = merged_buffer[x + 0] * invscale;
+            tile_buffer[x + 1] = merged_buffer[x + 1] * invscale;
+            tile_buffer[x + 2] = merged_buffer[x + 2] * invscale;
+          }
+          tile_buffer += ntile.stride * pass_stride;
+          merged_buffer += merged_stride * pass_stride;
        }
      }

--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@ -877,7 +877,7 @@ class OptiXDevice : public CUDADevice {

 #  if OPTIX_DENOISER_NO_PIXEL_STRIDE
      device_only_memory<float> input_rgb(this, "denoiser input rgb");
-      input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.optix_input_passes);
+      input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.input_passes);

      void *input_args[] = {&input_rgb.device_pointer,
                            &input_ptr,
@ -886,7 +886,7 @@ class OptiXDevice : public CUDADevice {
                            &input_stride,
                            &task.pass_stride,
                            const_cast<int *>(pass_offset),
-                            &task.denoising.optix_input_passes,
+                            &task.denoising.input_passes,
                            &rtile.sample};
      launch_filter_kernel(
          "kernel_cuda_filter_convert_to_rgb", rect_size.x, rect_size.y, input_args);
@ -897,7 +897,7 @@ class OptiXDevice : public CUDADevice {
 #  endif

      const bool recreate_denoiser = (denoiser == NULL) ||
-                                     (task.denoising.optix_input_passes != denoiser_input_passes);
+                                     (task.denoising.input_passes != denoiser_input_passes);
      if (recreate_denoiser) {
        // Destroy existing handle before creating new one
        if (denoiser != NULL) {
@ -906,9 +906,9 @@ class OptiXDevice : public CUDADevice {

        // Create OptiX denoiser handle on demand when it is first used
        OptixDenoiserOptions denoiser_options;
-        assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3);
+        assert(task.denoising.input_passes >= 1 && task.denoising.input_passes <= 3);
        denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>(
-            OPTIX_DENOISER_INPUT_RGB + (task.denoising.optix_input_passes - 1));
+            OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1));
 #  if OPTIX_ABI_VERSION < 28
        denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
 #  endif
@ -917,7 +917,7 @@ class OptiXDevice : public CUDADevice {
            optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));

        // OptiX denoiser handle was created with the requested number of input passes
-        denoiser_input_passes = task.denoising.optix_input_passes;
+        denoiser_input_passes = task.denoising.input_passes;
      }

      OptixDenoiserSizes sizes = {};
@ -992,7 +992,7 @@ class OptiXDevice : public CUDADevice {
                                                 denoiser_state.device_pointer,
                                                 scratch_offset,
                                                 input_layers,
-                                                 task.denoising.optix_input_passes,
+                                                 task.denoising.input_passes,
                                                 overlap_offset.x,
                                                 overlap_offset.y,
                                                 output_layers,
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@ -42,6 +42,14 @@ enum DenoiserType {
  DENOISER_ALL = ~0,
 };

+enum DenoiserInput {
+  DENOISER_INPUT_RGB = 1,
+  DENOISER_INPUT_RGB_ALBEDO = 2,
+  DENOISER_INPUT_RGB_ALBEDO_NORMAL = 3,
+
+  DENOISER_INPUT_NUM,
+};
+
 typedef int DenoiserTypeMask;

 class DenoiseParams {
@ -73,10 +81,10 @@ class DenoiseParams {
  /* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
  bool clamp_input;

-  /** Optix Denoiser **/
+  /** OIDN/Optix Denoiser **/

-  /* Passes handed over to the OptiX denoiser (default to color + albedo). */
-  int optix_input_passes;
+  /* Passes handed over to the OIDN/OptiX denoiser (default to color + albedo). */
+  DenoiserInput input_passes;

  DenoiseParams()
  {
@ -92,7 +100,7 @@ class DenoiseParams {
    neighbor_frames = 2;
    clamp_input = true;

-    optix_input_passes = 2;
+    input_passes = DENOISER_INPUT_RGB_ALBEDO_NORMAL;

    start_sample = 0;
  }