tornavis/source/blender/imbuf/intern/util_gpu.cc

/* SPDX-FileCopyrightText: 2001-2002 NaN Holding BV. All rights reserved.
 *
 * SPDX-License-Identifier: GPL-2.0-or-later */

/** \file
 * \ingroup imbuf
 */

#include "imbuf.h"

#include "BLI_utildefines.h"
#include "MEM_guardedalloc.h"

#include "BKE_global.h"

#include "GPU_capabilities.h"
#include "GPU_state.h"
#include "GPU_texture.h"

#include "IMB_colormanagement.h"
#include "IMB_imbuf.h"
#include "IMB_imbuf_types.h"

/* gpu ibuf utils */

static bool imb_is_grayscale_texture_format_compatible(const ImBuf *ibuf)
{
  if (ibuf->planes > 8) {
    return false;
  }

  if (ibuf->byte_buffer.data && !ibuf->float_buffer.data) {

    if (IMB_colormanagement_space_is_srgb(ibuf->byte_buffer.colorspace) ||
        IMB_colormanagement_space_is_scene_linear(ibuf->byte_buffer.colorspace))
    {
      /* Grey-scale byte buffers with these color transforms utilize float buffers under the hood
       * and can therefore be optimized. */
      return true;
    }
    else {
      /* TODO: Support gray-scale byte buffers.
       * The challenge is that Blender always stores byte images as RGBA. */
      return false;
    }
  }

  /* Only #IMBuf's with color-space that do not modify the chrominance of the texture data relative
   * to the scene color space can be uploaded as single channel textures. */
  if (IMB_colormanagement_space_is_data(ibuf->float_buffer.colorspace) ||
      IMB_colormanagement_space_is_srgb(ibuf->float_buffer.colorspace) ||
      IMB_colormanagement_space_is_scene_linear(ibuf->float_buffer.colorspace))
  {
    return true;
  }
  return false;
}

static void imb_gpu_get_format(const ImBuf *ibuf,
                               bool high_bitdepth,
                               bool use_grayscale,
                               eGPUTextureFormat *r_texture_format)
{
  const bool float_rect = (ibuf->float_buffer.data != nullptr);
  const bool is_grayscale = use_grayscale && imb_is_grayscale_texture_format_compatible(ibuf);

  if (float_rect) {
    /* Float. */
    const bool use_high_bitdepth = (!(ibuf->flags & IB_halffloat) && high_bitdepth);
    *r_texture_format = is_grayscale ? (use_high_bitdepth ? GPU_R32F : GPU_R16F) :
                                       (use_high_bitdepth ? GPU_RGBA32F : GPU_RGBA16F);
  }
  else {
    if (IMB_colormanagement_space_is_data(ibuf->byte_buffer.colorspace) ||
        IMB_colormanagement_space_is_scene_linear(ibuf->byte_buffer.colorspace))
    {
      /* Non-color data or scene linear, just store buffer as is. */
      *r_texture_format = (is_grayscale) ? GPU_R8 : GPU_RGBA8;
    }
    else if (IMB_colormanagement_space_is_srgb(ibuf->byte_buffer.colorspace)) {
      /* sRGB, store as byte texture that the GPU can decode directly. */
      *r_texture_format = (is_grayscale) ? GPU_R16F : GPU_SRGB8_A8;
    }
    else {
      /* Other colorspace, store as half float texture to avoid precision loss. */
      *r_texture_format = (is_grayscale) ? GPU_R16F : GPU_RGBA16F;
    }
  }
}

static const char *imb_gpu_get_swizzle(const ImBuf *ibuf)
{
  return imb_is_grayscale_texture_format_compatible(ibuf) ? "rrra" : "rgba";
}

/* Return false if no suitable format was found. */
static bool IMB_gpu_get_compressed_format(const ImBuf *ibuf, eGPUTextureFormat *r_texture_format)
{
  /* For DDS we only support data, scene linear and sRGB. Converting to
   * different colorspace would break the compression. */
  const bool use_srgb = (!IMB_colormanagement_space_is_data(ibuf->byte_buffer.colorspace) &&
                         !IMB_colormanagement_space_is_scene_linear(ibuf->byte_buffer.colorspace));

  if (ibuf->dds_data.fourcc == FOURCC_DXT1) {
    *r_texture_format = (use_srgb) ? GPU_SRGB8_A8_DXT1 : GPU_RGBA8_DXT1;
  }
  else if (ibuf->dds_data.fourcc == FOURCC_DXT3) {
    *r_texture_format = (use_srgb) ? GPU_SRGB8_A8_DXT3 : GPU_RGBA8_DXT3;
  }
  else if (ibuf->dds_data.fourcc == FOURCC_DXT5) {
    *r_texture_format = (use_srgb) ? GPU_SRGB8_A8_DXT5 : GPU_RGBA8_DXT5;
  }
  else {
    return false;
  }
  return true;
}

/**
 * Apply colormanagement and scale buffer if needed.
 * `*r_freedata` is set to true if the returned buffer need to be manually freed.
 */
static void *imb_gpu_get_data(const ImBuf *ibuf,
                              const bool do_rescale,
                              const int rescale_size[2],
                              const bool store_premultiplied,
                              bool *r_freedata,
                              eGPUDataFormat *out_data_format)
{
  bool is_float_rect = (ibuf->float_buffer.data != nullptr);
  const bool is_grayscale = imb_is_grayscale_texture_format_compatible(ibuf);
  void *data_rect = (is_float_rect) ? (void *)ibuf->float_buffer.data :
                                      (void *)ibuf->byte_buffer.data;
  bool freedata = false;

  if (is_float_rect) {
    /* Float image is already in scene linear colorspace or non-color data by
     * convention, no colorspace conversion needed. But we do require 4 channels
     * currently. */
    if (ibuf->channels != 4 || !store_premultiplied) {
      data_rect = MEM_mallocN(sizeof(float[4]) * ibuf->x * ibuf->y, __func__);
      *r_freedata = freedata = true;

      if (data_rect == nullptr) {
        return nullptr;
      }

      IMB_colormanagement_imbuf_to_float_texture(
          (float *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied);
    }
  }
  else {
    /* Byte image is in original colorspace from the file, and may need conversion.
     *
     * We must also convert to premultiplied for correct texture interpolation
     * and consistency with float images. */
    if (IMB_colormanagement_space_is_data(ibuf->byte_buffer.colorspace)) {
      /* Non-color data, just store buffer as is. */
    }
    else if (IMB_colormanagement_space_is_srgb(ibuf->byte_buffer.colorspace) ||
             IMB_colormanagement_space_is_scene_linear(ibuf->byte_buffer.colorspace))
    {
      /* sRGB or scene linear, store as byte texture that the GPU can decode directly. */
      data_rect = MEM_mallocN(
          (is_grayscale ? sizeof(float[4]) : sizeof(uchar[4])) * ibuf->x * ibuf->y, __func__);
      *r_freedata = freedata = true;

      if (data_rect == nullptr) {
        return nullptr;
      }

      /* Texture storage of images is defined by the alpha mode of the image. The
       * downside of this is that there can be artifacts near alpha edges. However,
       * this allows us to use sRGB texture formats and preserves color values in
       * zero alpha areas, and appears generally closer to what game engines that we
       * want to be compatible with do. */
      if (is_grayscale) {
        /* Convert to byte buffer to then pack as half floats reducing the buffer size by half. */
        IMB_colormanagement_imbuf_to_float_texture(
            (float *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied);
        is_float_rect = true;
      }
      else {
        IMB_colormanagement_imbuf_to_byte_texture(
            (uchar *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied);
      }
    }
    else {
      /* Other colorspace, store as float texture to avoid precision loss. */
      data_rect = MEM_mallocN(sizeof(float[4]) * ibuf->x * ibuf->y, __func__);
      *r_freedata = freedata = true;
      is_float_rect = true;

      if (data_rect == nullptr) {
        return nullptr;
      }

      /* Texture storage of images is defined by the alpha mode of the image. The
       * downside of this is that there can be artifacts near alpha edges. However,
       * this allows us to use sRGB texture formats and preserves color values in
       * zero alpha areas, and appears generally closer to what game engines that we
       * want to be compatible with do. */
      IMB_colormanagement_imbuf_to_float_texture(
          (float *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied);
    }
  }

  if (do_rescale) {
    uint8_t *rect = (is_float_rect) ? nullptr : (uint8_t *)data_rect;
    float *rect_float = (is_float_rect) ? (float *)data_rect : nullptr;

    ImBuf *scale_ibuf = IMB_allocFromBuffer(rect, rect_float, ibuf->x, ibuf->y, 4);
    IMB_scaleImBuf(scale_ibuf, UNPACK2(rescale_size));

    if (freedata) {
      MEM_freeN(data_rect);
    }

    data_rect = (is_float_rect) ? (void *)scale_ibuf->float_buffer.data :
                                  (void *)scale_ibuf->byte_buffer.data;
    *r_freedata = freedata = true;
    /* Steal the rescaled buffer to avoid double free. */
    (void)IMB_steal_byte_buffer(scale_ibuf);
    (void)IMB_steal_float_buffer(scale_ibuf);
    IMB_freeImBuf(scale_ibuf);
  }

  /* Pack first channel data manually at the start of the buffer. */
  if (is_grayscale) {
    void *src_rect = data_rect;

    if (freedata == false) {
      data_rect = MEM_mallocN((is_float_rect ? sizeof(float) : sizeof(uchar)) * ibuf->x * ibuf->y,
                              __func__);
      *r_freedata = freedata = true;
    }

    if (data_rect == nullptr) {
      return nullptr;
    }

    int buffer_size = do_rescale ? rescale_size[0] * rescale_size[1] : ibuf->x * ibuf->y;
    if (is_float_rect) {
      for (uint64_t i = 0; i < buffer_size; i++) {
        ((float *)data_rect)[i] = ((float *)src_rect)[i * 4];
      }
    }
    else {
      for (uint64_t i = 0; i < buffer_size; i++) {
        ((uchar *)data_rect)[i] = ((uchar *)src_rect)[i * 4];
      }
    }
  }

  *out_data_format = (is_float_rect) ? GPU_DATA_FLOAT : GPU_DATA_UBYTE;
  return data_rect;
}

GPUTexture *IMB_touch_gpu_texture(const char *name,
                                  ImBuf *ibuf,
                                  int w,
                                  int h,
                                  int layers,
                                  bool use_high_bitdepth,
                                  bool use_grayscale)
{
  eGPUTextureFormat tex_format;
  imb_gpu_get_format(ibuf, use_high_bitdepth, use_grayscale, &tex_format);

  GPUTexture *tex;
  if (layers > 0) {
    tex = GPU_texture_create_2d_array(
        name, w, h, layers, 9999, tex_format, GPU_TEXTURE_USAGE_SHADER_READ, nullptr);
  }
  else {
    tex = GPU_texture_create_2d(
        name, w, h, 9999, tex_format, GPU_TEXTURE_USAGE_SHADER_READ, nullptr);
  }

  GPU_texture_swizzle_set(tex, imb_gpu_get_swizzle(ibuf));
  GPU_texture_anisotropic_filter(tex, true);
  return tex;
}

void IMB_update_gpu_texture_sub(GPUTexture *tex,
                                ImBuf *ibuf,
                                int x,
                                int y,
                                int z,
                                int w,
                                int h,
                                bool use_high_bitdepth,
                                bool use_grayscale,
                                bool use_premult)
{
  const bool do_rescale = (ibuf->x != w || ibuf->y != h);
  const int size[2] = {w, h};

  eGPUTextureFormat tex_format;
  imb_gpu_get_format(ibuf, use_high_bitdepth, use_grayscale, &tex_format);

  bool freebuf = false;

  eGPUDataFormat data_format;
  void *data = imb_gpu_get_data(ibuf, do_rescale, size, use_premult, &freebuf, &data_format);

  /* Update Texture. */
  GPU_texture_update_sub(tex, data_format, data, x, y, z, w, h, 1);

  if (freebuf) {
    MEM_freeN(data);
  }
}

GPUTexture *IMB_create_gpu_texture(const char *name,
                                   ImBuf *ibuf,
                                   bool use_high_bitdepth,
                                   bool use_premult)
{
  GPUTexture *tex = nullptr;
  int size[2] = {GPU_texture_size_with_limit(ibuf->x), GPU_texture_size_with_limit(ibuf->y)};
  bool do_rescale = (ibuf->x != size[0]) || (ibuf->y != size[1]);

  /* Correct the smaller size to maintain the original aspect ratio of the image. */
  if (do_rescale && ibuf->x != ibuf->y) {
    if (size[0] > size[1]) {
      size[1] = int(ibuf->y * (float(size[0]) / ibuf->x));
    }
    else {
      size[0] = int(ibuf->x * (float(size[1]) / ibuf->y));
    }
  }

  if (ibuf->ftype == IMB_FTYPE_DDS) {
    eGPUTextureFormat compressed_format;
    if (!IMB_gpu_get_compressed_format(ibuf, &compressed_format)) {
      fprintf(stderr, "Unable to find a suitable DXT compression,");
    }
    else if (do_rescale) {
      fprintf(stderr, "Unable to load DXT image resolution,");
    }
    else if (!is_power_of_2_i(ibuf->x) || !is_power_of_2_i(ibuf->y)) {
      fprintf(stderr, "Unable to load non-power-of-two DXT image resolution,");
    }
    else {
      tex = GPU_texture_create_compressed_2d(name,
                                             ibuf->x,
                                             ibuf->y,
                                             ibuf->dds_data.nummipmaps,
                                             compressed_format,
                                             GPU_TEXTURE_USAGE_GENERAL,
                                             ibuf->dds_data.data);

      if (tex != nullptr) {
        return tex;
      }

      fprintf(stderr, "ST3C support not found,");
    }
    /* Fallback to uncompressed texture. */
    fprintf(stderr, " falling back to uncompressed.\n");
  }

  eGPUTextureFormat tex_format;
  imb_gpu_get_format(ibuf, use_high_bitdepth, true, &tex_format);

  bool freebuf = false;

  /* Create Texture. */
  tex = GPU_texture_create_2d(
      name, UNPACK2(size), 9999, tex_format, GPU_TEXTURE_USAGE_SHADER_READ, nullptr);
  if (tex == nullptr) {
    size[0] = max_ii(1, size[0] / 2);
    size[1] = max_ii(1, size[1] / 2);
    tex = GPU_texture_create_2d(
        name, UNPACK2(size), 9999, tex_format, GPU_TEXTURE_USAGE_SHADER_READ, nullptr);
    do_rescale = true;
  }
  BLI_assert(tex != nullptr);
  eGPUDataFormat data_format;
  void *data = imb_gpu_get_data(ibuf, do_rescale, size, use_premult, &freebuf, &data_format);
  GPU_texture_update(tex, data_format, data);

  GPU_texture_swizzle_set(tex, imb_gpu_get_swizzle(ibuf));
  GPU_texture_anisotropic_filter(tex, true);

  if (freebuf) {
    MEM_freeN(data);
  }

  return tex;
}

eGPUTextureFormat IMB_gpu_get_texture_format(const ImBuf *ibuf,
                                             bool high_bitdepth,
                                             bool use_grayscale)
{
  eGPUTextureFormat gpu_texture_format;
  imb_gpu_get_format(ibuf, high_bitdepth, use_grayscale, &gpu_texture_format);
  return gpu_texture_format;
}

void IMB_gpu_clamp_half_float(ImBuf *image_buffer)
{
  const float half_min = -65504;
  const float half_max = 65504;
  if (!image_buffer->float_buffer.data) {
    return;
  }

  float *rect_float = image_buffer->float_buffer.data;

  int rect_float_len = image_buffer->x * image_buffer->y *
                       (image_buffer->channels == 0 ? 4 : image_buffer->channels);

  for (int i = 0; i < rect_float_len; i++) {
    rect_float[i] = clamp_f(rect_float[i], half_min, half_max);
  }
}