Vulkan: Workaround for Unsupported R8G8B8 Vertex Buffer Formats

On some platforms `VK_FORMAT_R8G8B8_*` are not supported as vertex buffers. The obvious workaround for this is to use `VK_FORMAT_R8G8B8A8_*`. Using unsupported vertex formats would crash Blender as it is not able to compile the graphics pipelines that use them. Known platforms are: - NVIDIA Mobile GPUs (Quadro M1000M) - AMD Polaris (open source drivers) This PR adds the initial workings for other unsupported vertex buffer formats we need to fix in the future. `VKDevice.workarounds.vertex_formats` contain booleans if the workaround for a specific format should be turned on (`r8g8b8 = true`). `VertexFormatConverter` can be used to identify if conversions are needed and perform the conversion. Pull Request: https://projects.blender.org/blender/blender/pulls/114572
2023-11-08 09:44:22 +01:00 · 2023-11-08 09:44:22 +01:00 · f76ceddc98
parent 474b6fa070
commit f76ceddc98
12 changed files with 519 additions and 66 deletions
--- a/source/blender/gpu/vulkan/vk_backend.cc
+++ b/source/blender/gpu/vulkan/vk_backend.cc
@ -91,6 +91,7 @@ void VKBackend::detect_workarounds(VKDevice &device)
    workarounds.not_aligned_pixel_formats = true;
    workarounds.shader_output_layer = true;
    workarounds.shader_output_viewport_index = true;
+    workarounds.vertex_formats.r8g8b8 = true;

    device.workarounds_ = workarounds;
    return;
@ -106,6 +107,12 @@ void VKBackend::detect_workarounds(VKDevice &device)
    workarounds.not_aligned_pixel_formats = true;
  }

+  VkFormatProperties format_properties = {};
+  vkGetPhysicalDeviceFormatProperties(
+      device.physical_device_get(), VK_FORMAT_R8G8B8_UNORM, &format_properties);
+  workarounds.vertex_formats.r8g8b8 = (format_properties.bufferFeatures &
+                                       VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0;
+
  device.workarounds_ = workarounds;
 }

--- a/source/blender/gpu/vulkan/vk_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_buffer.cc
@ -85,7 +85,11 @@ void VKBuffer::update(const void *data) const
 {
  BLI_assert_msg(is_mapped(), "Cannot update a non-mapped buffer.");
  memcpy(mapped_memory_, data, size_in_bytes_);
+  flush();
+}

+void VKBuffer::flush() const
+{
  const VKDevice &device = VKBackend::get().device_get();
  VmaAllocator allocator = device.mem_allocator_get();
  vmaFlushAllocation(allocator, allocation_, 0, max_ii(size_in_bytes(), 1));
--- a/source/blender/gpu/vulkan/vk_buffer.hh
+++ b/source/blender/gpu/vulkan/vk_buffer.hh
@ -35,6 +35,7 @@ class VKBuffer {
  bool create(int64_t size, GPUUsageType usage, VkBufferUsageFlags buffer_usage);
  void clear(VKContext &context, uint32_t clear_value);
  void update(const void *data) const;
+  void flush() const;
  void read(void *data) const;
  bool free();

--- a/source/blender/gpu/vulkan/vk_data_conversion.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion.cc
@ -7,6 +7,9 @@
 */

 #include "vk_data_conversion.hh"
+#include "vk_device.hh"
+
+#include "gpu_vertex_format_private.h"

 #include "BLI_color.hh"

@ -951,50 +954,188 @@ void convert_device_to_host(void *dst_buffer,
 /** \name Vertex Attributes
 * \{ */

-static bool conversion_needed(const GPUVertAttr &vertex_attribute)
+static bool attribute_check(const GPUVertAttr attribute,
+                            GPUVertCompType comp_type,
+                            GPUVertFetchMode fetch_mode)
 {
-  return (vertex_attribute.fetch_mode == GPU_FETCH_INT_TO_FLOAT &&
-          ELEM(vertex_attribute.comp_type, GPU_COMP_I32, GPU_COMP_U32));
+  return attribute.comp_type == comp_type && attribute.fetch_mode == fetch_mode;
 }

-bool conversion_needed(const GPUVertFormat &vertex_format)
+static bool attribute_check(const GPUVertAttr attribute, GPUVertCompType comp_type, uint comp_len)
 {
+  return attribute.comp_type == comp_type && attribute.comp_len == comp_len;
+}
+
+void VertexFormatConverter::reset()
+{
+  source_format_ = nullptr;
+  device_format_ = nullptr;
+  GPU_vertformat_clear(&converted_format_);
+
+  needs_conversion_ = false;
+}
+
+bool VertexFormatConverter::is_initialized() const
+{
+  return device_format_ != nullptr;
+}
+
+void VertexFormatConverter::init(const GPUVertFormat *vertex_format,
+                                 const VKWorkarounds &workarounds)
+{
+  source_format_ = vertex_format;
+  device_format_ = vertex_format;
+
+  update_conversion_flags(*source_format_, workarounds);
+  if (needs_conversion_) {
+    init_device_format(workarounds);
+  }
+}
+
+const GPUVertFormat &VertexFormatConverter::device_format_get() const
+{
+  BLI_assert(is_initialized());
+  return *device_format_;
+}
+
+bool VertexFormatConverter::needs_conversion() const
+{
+  BLI_assert(is_initialized());
+  return needs_conversion_;
+}
+
+void VertexFormatConverter::update_conversion_flags(const GPUVertFormat &vertex_format,
+                                                    const VKWorkarounds &workarounds)
+{
+  needs_conversion_ = false;
+
  for (int attr_index : IndexRange(vertex_format.attr_len)) {
    const GPUVertAttr &vert_attr = vertex_format.attrs[attr_index];
-    if (conversion_needed(vert_attr)) {
-      return true;
+    update_conversion_flags(vert_attr, workarounds);
+  }
+}
+
+void VertexFormatConverter::update_conversion_flags(const GPUVertAttr &vertex_attribute,
+                                                    const VKWorkarounds &workarounds)
+{
+  /* I32/U32 to F32 conversion doesn't exist in vulkan. */
+  if (vertex_attribute.fetch_mode == GPU_FETCH_INT_TO_FLOAT &&
+      ELEM(vertex_attribute.comp_type, GPU_COMP_I32, GPU_COMP_U32))
+  {
+    needs_conversion_ = true;
+  }
+  /* r8g8b8 formats will be stored as r8g8b8a8. */
+  else if (workarounds.vertex_formats.r8g8b8 && attribute_check(vertex_attribute, GPU_COMP_U8, 3))
+  {
+    needs_conversion_ = true;
+  }
+}
+
+void VertexFormatConverter::init_device_format(const VKWorkarounds &workarounds)
+{
+  BLI_assert(needs_conversion_);
+  GPU_vertformat_copy(&converted_format_, source_format_);
+  bool needs_repack = false;
+
+  for (int attr_index : IndexRange(converted_format_.attr_len)) {
+    GPUVertAttr &vert_attr = converted_format_.attrs[attr_index];
+    make_device_compatible(vert_attr, workarounds, needs_repack);
+  }
+
+  if (needs_repack) {
+    VertexFormat_pack(&converted_format_);
+  }
+  device_format_ = &converted_format_;
+}
+
+void VertexFormatConverter::make_device_compatible(GPUVertAttr &vertex_attribute,
+                                                   const VKWorkarounds &workarounds,
+                                                   bool &r_needs_repack) const
+{
+  if (vertex_attribute.fetch_mode == GPU_FETCH_INT_TO_FLOAT &&
+      ELEM(vertex_attribute.comp_type, GPU_COMP_I32, GPU_COMP_U32))
+  {
+    vertex_attribute.fetch_mode = GPU_FETCH_FLOAT;
+    vertex_attribute.comp_type = GPU_COMP_F32;
+  }
+  else if (workarounds.vertex_formats.r8g8b8 && attribute_check(vertex_attribute, GPU_COMP_U8, 3))
+  {
+    vertex_attribute.comp_len = 4;
+    vertex_attribute.size = 4;
+    r_needs_repack = true;
+  }
+}
+
+void VertexFormatConverter::convert(void *device_data,
+                                    const void *source_data,
+                                    const uint vertex_len) const
+{
+  BLI_assert(needs_conversion_);
+  if (source_data != device_data) {
+    memcpy(device_data, source_data, device_format_->stride * vertex_len);
+  }
+
+  const void *source_row_data = static_cast<const uint8_t *>(source_data);
+  void *device_row_data = static_cast<uint8_t *>(device_data);
+  for (int vertex_index : IndexRange(vertex_len)) {
+    UNUSED_VARS(vertex_index);
+    convert_row(device_row_data, source_row_data);
+    source_row_data = static_cast<const uint8_t *>(source_row_data) + source_format_->stride;
+    device_row_data = static_cast<uint8_t *>(device_row_data) + device_format_->stride;
+  }
+}
+
+void VertexFormatConverter::convert_row(void *device_row_data, const void *source_row_data) const
+{
+  for (int attr_index : IndexRange(source_format_->attr_len)) {
+    const GPUVertAttr &device_attribute = device_format_->attrs[attr_index];
+    const GPUVertAttr &source_attribute = source_format_->attrs[attr_index];
+    convert_attribute(device_row_data, source_row_data, device_attribute, source_attribute);
+  }
+}
+
+void VertexFormatConverter::convert_attribute(void *device_row_data,
+                                              const void *source_row_data,
+                                              const GPUVertAttr &device_attribute,
+                                              const GPUVertAttr &source_attribute) const
+{
+  const void *source_attr_data = static_cast<const uint8_t *>(source_row_data) +
+                                 source_attribute.offset;
+  void *device_attr_data = static_cast<uint8_t *>(device_row_data) + device_attribute.offset;
+  if (source_attribute.comp_len == device_attribute.comp_len &&
+      source_attribute.comp_type == device_attribute.comp_type &&
+      source_attribute.fetch_mode == device_attribute.fetch_mode)
+  {
+    /* This check is done first to improve possible branch prediction. */
+  }
+  else if (attribute_check(source_attribute, GPU_COMP_I32, GPU_FETCH_INT_TO_FLOAT) &&
+           attribute_check(device_attribute, GPU_COMP_F32, GPU_FETCH_FLOAT))
+  {
+    for (int component : IndexRange(source_attribute.comp_len)) {
+      const int32_t *component_in = static_cast<const int32_t *>(source_attr_data) + component;
+      float *component_out = static_cast<float *>(device_attr_data) + component;
+      *component_out = float(*component_in);
    }
  }
-  return false;
-}
-
-void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint vertex_len)
-{
-  BLI_assert(vertex_format.deinterleaved == false);
-  for (int attr_index : IndexRange(vertex_format.attr_len)) {
-    const GPUVertAttr &vert_attr = vertex_format.attrs[attr_index];
-    if (!conversion_needed(vert_attr)) {
-      continue;
-    }
-    void *row_data = static_cast<uint8_t *>(data) + vert_attr.offset;
-    for (int vert_index = 0; vert_index < vertex_len; vert_index++) {
-      if (vert_attr.comp_type == GPU_COMP_I32) {
-        for (int component : IndexRange(vert_attr.comp_len)) {
-          int32_t *component_in = static_cast<int32_t *>(row_data) + component;
-          float *component_out = static_cast<float *>(row_data) + component;
-          *component_out = float(*component_in);
-        }
-      }
-      else if (vert_attr.comp_type == GPU_COMP_U32) {
-        for (int component : IndexRange(vert_attr.comp_len)) {
-          uint32_t *component_in = static_cast<uint32_t *>(row_data) + component;
-          float *component_out = static_cast<float *>(row_data) + component;
-          *component_out = float(*component_in);
-        }
-      }
-      row_data = static_cast<uint8_t *>(row_data) + vertex_format.stride;
+  else if (attribute_check(source_attribute, GPU_COMP_U32, GPU_FETCH_INT_TO_FLOAT) &&
+           attribute_check(device_attribute, GPU_COMP_F32, GPU_FETCH_FLOAT))
+  {
+    for (int component : IndexRange(source_attribute.comp_len)) {
+      const uint32_t *component_in = static_cast<const uint32_t *>(source_attr_data) + component;
+      float *component_out = static_cast<float *>(device_attr_data) + component;
+      *component_out = float(*component_in);
    }
  }
+  else if (attribute_check(source_attribute, GPU_COMP_U8, 3) &&
+           attribute_check(device_attribute, GPU_COMP_U8, 4))
+  {
+    const uchar3 *attr_in = static_cast<const uchar3 *>(source_attr_data);
+    uchar4 *attr_out = static_cast<uchar4 *>(device_attr_data);
+    *attr_out = uchar4(attr_in->x, attr_in->y, attr_in->z, 255);
+  }
+  else {
+    BLI_assert_unreachable();
+  }
 }

 /* \} */
--- a/source/blender/gpu/vulkan/vk_data_conversion.hh
+++ b/source/blender/gpu/vulkan/vk_data_conversion.hh
@ -12,7 +12,10 @@

 #include "gpu_texture_private.hh"

+#include "vk_common.hh"
+
 namespace blender::gpu {
+struct VKWorkarounds;

 /**
 * Convert host buffer to device buffer.
@ -76,6 +79,119 @@ bool conversion_needed(const GPUVertFormat &vertex_format);
 */
 void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint vertex_len);

+/* -------------------------------------------------------------------- */
+/** \name Vertex Buffers
+ * \{ */
+
+/**
+ * Utility to make vertex buffers device compatible.
+ *
+ * Some vertex formats used by vertex buffers cannot be handled by vulkan.
+ * This could be that vulkan doesn't support it, or that the device itself doesn't support it.
+ *
+ * In this case the vertex buffer needs to be converted before it can be uploaded.
+ * The approach is to do this during upload so we reduce the read/write actions during
+ * transform/upload.
+ */
+struct VertexFormatConverter {
+ private:
+  /** The original format of the vertex buffer constructed by Blender. */
+  const GPUVertFormat *source_format_ = nullptr;
+
+  /**
+   * The format of the vertex buffer that is compatible by the device.
+   *
+   * This can be #source_format when no conversion is needed, or points to #conversion_format when
+   * conversion is needed.
+   */
+  const GPUVertFormat *device_format_ = nullptr;
+
+  bool needs_conversion_ = false;
+
+  /**
+   * When conversion is needed, this is filled with a variant of source_format_ that is compatible
+   * with Vulkan and the active workarounds passed by the #init method.
+   */
+  GPUVertFormat converted_format_;
+
+ public:
+  /**
+   * Has this instance already been initialized?
+   *
+   * Call #init to initialize the instance.
+   */
+  bool is_initialized() const;
+
+  /**
+   * Initialize the vertex format converter instance.
+   *
+   * Can be run on both initialized and uninitialized instances.
+   * After calling this method:
+   * - #is_initialized will return true.
+   * - #device_format_get will return the GPUVertFormat that needs to be used to
+   *   setup the vertex attribute bindings.
+   * - #convert can be called to convert source data to device data.
+   */
+  void init(const GPUVertFormat *vertex_format, const VKWorkarounds &workarounds);
+
+  /**
+   * Get the #GPUVertFormat that is compatible with the Vulkan and the active workarounds passed by
+   * the #init function.
+   *
+   * Will assert when this isn't initialized.
+   */
+  const GPUVertFormat &device_format_get() const;
+
+  /**
+   * Can be called after init to check if conversion is needed.
+   */
+  bool needs_conversion() const;
+
+  /**
+   * Convert src_data to device data.
+   *
+   * Only call this after init and when needs_conversion returns true. Will assert if this isn't
+   * the case. src_data and device_data can point to the same memory address and perform an inline
+   * conversion.
+   *
+   * After this method completes the src_data is converted to device compatible data.
+   */
+  void convert(void *device_data, const void *src_data, const uint vertex_len) const;
+
+  /**
+   * Reset this instance by clearing internal data.
+   *
+   * After calling this #is_initialized() will be false.
+   */
+  void reset();
+
+ private:
+  /**
+   * Update conversion flags happens at the start of initialization and updated the
+   * #needs_conversion flag.
+   */
+  void update_conversion_flags(const GPUVertFormat &vertex_format,
+                               const VKWorkarounds &workarounds);
+  void update_conversion_flags(const GPUVertAttr &vertex_attribute,
+                               const VKWorkarounds &workarounds);
+
+  /**
+   * Update the conversion_format to contain a device compatible version of the #source_format_.
+   */
+  void init_device_format(const VKWorkarounds &workarounds);
+  void make_device_compatible(GPUVertAttr &vertex_attribute,
+                              const VKWorkarounds &workarounds,
+                              bool &needs_repack) const;
+
+  void convert_row(void *device_row_data, const void *source_row_data) const;
+  void convert_attribute(void *device_row_data,
+                         const void *source_row_data,
+                         const GPUVertAttr &device_attribute,
+                         const GPUVertAttr &source_attribute) const;
+};
+
+/* \} */
+
 /* -------------------------------------------------------------------- */
 /** \name Floating point conversions
 * \{ */
@ -243,4 +359,5 @@ uint32_t convert_float_formats(uint32_t value)
 }

 /* \} */
+
 };  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_data_conversion_test.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion_test.cc
@ -5,6 +5,9 @@
 #include "testing/testing.h"

 #include "vk_data_conversion.hh"
+#include "vk_device.hh"
+
+#include "gpu_vertex_format_private.h"

 namespace blender::gpu::tests {
 static void test_f32_f16(uint32_t f32_in, uint32_t f16_expected)
@ -99,4 +102,156 @@ TEST(VulkanDataConversion, infinity_lower)
  EXPECT_EQ(f16_inf, f16_inf_expected);
 }

+TEST(VulkanDataConversion, vertex_format_i32_as_float)
+{
+  GPUVertFormat source_format;
+  GPU_vertformat_clear(&source_format);
+  GPU_vertformat_attr_add(&source_format, "pos", GPU_COMP_I32, 2, GPU_FETCH_INT_TO_FLOAT);
+  VertexFormat_pack(&source_format);
+
+  union TestData {
+    int2 pos_i;
+    float2 pos_fl;
+  };
+  TestData test_data[4];
+  test_data[0].pos_i = int2(0, 1);
+  test_data[1].pos_i = int2(1, 2);
+  test_data[2].pos_i = int2(2, 3);
+  test_data[3].pos_i = int2(3, 4);
+
+  VKWorkarounds workarounds = {};
+  VertexFormatConverter converter;
+  converter.init(&source_format, workarounds);
+
+  EXPECT_TRUE(converter.needs_conversion());
+
+  converter.convert(&test_data, &test_data, 4);
+
+  EXPECT_EQ(test_data[0].pos_fl, float2(0.0, 1.0));
+  EXPECT_EQ(test_data[1].pos_fl, float2(1.0, 2.0));
+  EXPECT_EQ(test_data[2].pos_fl, float2(2.0, 3.0));
+  EXPECT_EQ(test_data[3].pos_fl, float2(3.0, 4.0));
+}
+
+TEST(VulkanDataConversion, vertex_format_u32_as_float)
+{
+  GPUVertFormat source_format;
+  GPU_vertformat_clear(&source_format);
+  GPU_vertformat_attr_add(&source_format, "pos", GPU_COMP_U32, 3, GPU_FETCH_INT_TO_FLOAT);
+  VertexFormat_pack(&source_format);
+
+  union TestData {
+    uint3 pos_u;
+    float3 pos_fl;
+  };
+  TestData test_data[4];
+  test_data[0].pos_u = uint3(0, 1, 2);
+  test_data[1].pos_u = uint3(1, 2, 3);
+  test_data[2].pos_u = uint3(2, 3, 4);
+  test_data[3].pos_u = uint3(3, 4, 5);
+
+  VKWorkarounds workarounds = {};
+  VertexFormatConverter converter;
+  converter.init(&source_format, workarounds);
+
+  EXPECT_TRUE(converter.needs_conversion());
+
+  converter.convert(&test_data, &test_data, 4);
+
+  EXPECT_EQ(test_data[0].pos_fl, float3(0.0, 1.0, 2.0));
+  EXPECT_EQ(test_data[1].pos_fl, float3(1.0, 2.0, 3.0));
+  EXPECT_EQ(test_data[2].pos_fl, float3(2.0, 3.0, 4.0));
+  EXPECT_EQ(test_data[3].pos_fl, float3(3.0, 4.0, 5.0));
+}
+
+TEST(VulkanDataConversion, vertex_format_r8g8b8)
+{
+  GPUVertFormat source_format;
+  GPU_vertformat_clear(&source_format);
+  GPU_vertformat_attr_add(&source_format, "color", GPU_COMP_U8, 3, GPU_FETCH_INT_TO_FLOAT_UNIT);
+  VertexFormat_pack(&source_format);
+
+  struct SourceData {
+    uchar3 color;
+    uint8_t _pad;
+  };
+  struct DeviceData {
+    uchar4 color;
+  };
+
+  SourceData test_data_in[4];
+  test_data_in[0].color = uchar3(255, 0, 0);
+  test_data_in[1].color = uchar3(255, 255, 255);
+  test_data_in[2].color = uchar3(255, 0, 0);
+  test_data_in[3].color = uchar3(255, 255, 255);
+
+  VKWorkarounds workarounds = {};
+  VertexFormatConverter converter;
+  converter.init(&source_format, workarounds);
+
+  EXPECT_FALSE(converter.needs_conversion());
+
+  /* Enable workaround for r8g8b8 vertex formats. */
+  workarounds.vertex_formats.r8g8b8 = true;
+
+  converter.init(&source_format, workarounds);
+  EXPECT_TRUE(converter.needs_conversion());
+
+  DeviceData test_data_out[4];
+  converter.convert(test_data_out, test_data_in, 4);
+
+  EXPECT_EQ(test_data_out[0].color, uchar4(255, 0, 0, 255));
+  EXPECT_EQ(test_data_out[1].color, uchar4(255, 255, 255, 255));
+  EXPECT_EQ(test_data_out[2].color, uchar4(255, 0, 0, 255));
+  EXPECT_EQ(test_data_out[3].color, uchar4(255, 255, 255, 255));
+}
+
+TEST(VulkanDataConversion, vertex_format_multiple_attributes)
+{
+  GPUVertFormat source_format;
+  GPU_vertformat_clear(&source_format);
+  GPU_vertformat_attr_add(&source_format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+  GPU_vertformat_attr_add(&source_format, "color", GPU_COMP_U8, 3, GPU_FETCH_INT_TO_FLOAT_UNIT);
+  GPU_vertformat_attr_add(&source_format, "flag", GPU_COMP_U32, 1, GPU_FETCH_INT);
+  VertexFormat_pack(&source_format);
+
+  struct SourceData {
+    float3 pos;
+    uchar3 color;
+    uint8_t _pad;
+    uint flag;
+  };
+  struct DeviceData {
+    float3 pos;
+    uchar4 color;
+    uint flag;
+  };
+
+  SourceData test_data_in[4];
+  test_data_in[0] = {float3(1.0, 2.0, 3.0), uchar3(255, 0, 0), 0, 0};
+  test_data_in[1] = {float3(4.0, 5.0, 6.0), uchar3(0, 255, 0), 0, 1};
+  test_data_in[2] = {float3(7.0, 8.0, 9.0), uchar3(0, 0, 255), 0, 2};
+  test_data_in[3] = {float3(10.0, 11.0, 12.0), uchar3(255, 255, 255), 0, 3};
+
+  VKWorkarounds workarounds = {};
+  workarounds.vertex_formats.r8g8b8 = true;
+  VertexFormatConverter converter;
+  converter.init(&source_format, workarounds);
+  EXPECT_TRUE(converter.needs_conversion());
+
+  DeviceData test_data_out[4];
+  converter.convert(test_data_out, test_data_in, 4);
+
+  DeviceData expected_data[4];
+  expected_data[0] = {float3(1.0, 2.0, 3.0), uchar4(255, 0, 0, 255), 0};
+  expected_data[1] = {float3(4.0, 5.0, 6.0), uchar4(0, 255, 0, 255), 1};
+  expected_data[2] = {float3(7.0, 8.0, 9.0), uchar4(0, 0, 255, 255), 2};
+  expected_data[3] = {float3(10.0, 11.0, 12.0), uchar4(255, 255, 255, 255), 3};
+  for (int i : IndexRange(4)) {
+    EXPECT_EQ(test_data_out[i].pos, expected_data[i].pos);
+    EXPECT_EQ(test_data_out[i].color, expected_data[i].color);
+    EXPECT_EQ(test_data_out[i].flag, expected_data[i].flag);
+  }
+}
+
 }  // namespace blender::gpu::tests
--- a/source/blender/gpu/vulkan/vk_device.hh
+++ b/source/blender/gpu/vulkan/vk_device.hh
@ -40,6 +40,14 @@ struct VKWorkarounds {
   * #VkPhysicalDeviceVulkan12Features::shaderOutputLayer enabled.
   */
  bool shader_output_layer = false;
+
+  struct {
+    /**
+     * Is the workaround enabled for devices that don't support using VK_FORMAT_R8G8B8_* as vertex
+     * buffer.
+     */
+    bool r8g8b8 = false;
+  } vertex_formats;
 };

 class VKDevice : public NonCopyable {
--- a/source/blender/gpu/vulkan/vk_immediate.cc
+++ b/source/blender/gpu/vulkan/vk_immediate.cc
@ -9,6 +9,7 @@
 */

 #include "vk_immediate.hh"
+#include "vk_backend.hh"
 #include "vk_data_conversion.hh"
 #include "vk_state_manager.hh"

@ -20,7 +21,10 @@ VKImmediate::~VKImmediate() {}
 uchar *VKImmediate::begin()
 {
  VKContext &context = *VKContext::get();
-  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+  const VKWorkarounds &workarounds = VKBackend::get().device_get().workarounds_get();
+  vertex_format_converter.init(&vertex_format, workarounds);
+  const size_t bytes_needed = vertex_buffer_size(&vertex_format_converter.device_format_get(),
+                                                 vertex_len);
  const bool new_buffer_needed = !has_active_resource() || buffer_bytes_free() < bytes_needed;

  std::unique_ptr<VKBuffer> &buffer = tracked_resource_for(context, new_buffer_needed);
@ -37,14 +41,13 @@ void VKImmediate::end()
    return;
  }

-  if (conversion_needed(vertex_format)) {
-    // Slow path
+  if (vertex_format_converter.needs_conversion()) {
    /* Determine the start of the subbuffer. The `vertex_data` attribute changes when new vertices
     * are loaded.
     */
    uchar *data = static_cast<uchar *>(active_resource()->mapped_memory_get()) +
                  subbuffer_offset_get();
-    convert_in_place(data, vertex_format, vertex_idx);
+    vertex_format_converter.convert(data, data, vertex_idx);
  }

  VKContext &context = *VKContext::get();
@ -60,6 +63,7 @@ void VKImmediate::end()

  buffer_offset_ += current_subbuffer_len_;
  current_subbuffer_len_ = 0;
+  vertex_format_converter.reset();
 }

 VkDeviceSize VKImmediate::subbuffer_offset_get()
--- a/source/blender/gpu/vulkan/vk_immediate.hh
+++ b/source/blender/gpu/vulkan/vk_immediate.hh
@ -17,6 +17,7 @@

 #include "vk_buffer.hh"
 #include "vk_context.hh"
+#include "vk_data_conversion.hh"
 #include "vk_mem_alloc.h"
 #include "vk_resource_tracker.hh"
 #include "vk_vertex_attribute_object.hh"
@ -32,6 +33,7 @@ class VKImmediate : public Immediate, VKResourceTracker<VKBuffer> {

  VkDeviceSize buffer_offset_ = 0;
  VkDeviceSize current_subbuffer_len_ = 0;
+  VertexFormatConverter vertex_format_converter;

 public:
  VKImmediate();
--- a/source/blender/gpu/vulkan/vk_vertex_attribute_object.cc
+++ b/source/blender/gpu/vulkan/vk_vertex_attribute_object.cc
@ -129,15 +129,27 @@ void VKVertexAttributeObject::update_bindings(const VKContext &context, VKBatch
  for (int v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
    VKVertexBuffer *vbo = batch.instance_buffer_get(v);
    if (vbo) {
-      update_bindings(
-          vbo->format, vbo, nullptr, vbo->vertex_len, interface, occupied_attributes, true);
+      vbo->device_format_ensure();
+      update_bindings(vbo->device_format_get(),
+                      vbo,
+                      nullptr,
+                      vbo->vertex_len,
+                      interface,
+                      occupied_attributes,
+                      true);
    }
  }
  for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
    VKVertexBuffer *vbo = batch.vertex_buffer_get(v);
    if (vbo) {
-      update_bindings(
-          vbo->format, vbo, nullptr, vbo->vertex_len, interface, occupied_attributes, false);
+      vbo->device_format_ensure();
+      update_bindings(vbo->device_format_get(),
+                      vbo,
+                      nullptr,
+                      vbo->vertex_len,
+                      interface,
+                      occupied_attributes,
+                      false);
    }
  }

@ -241,7 +253,7 @@ void VKVertexAttributeObject::update_bindings(VKImmediate &immediate)
  VKBufferWithOffset immediate_buffer = {*immediate.active_resource(),
                                         immediate.subbuffer_offset_get()};

-  update_bindings(immediate.vertex_format,
+  update_bindings(immediate.vertex_format_converter.device_format_get(),
                  nullptr,
                  &immediate_buffer,
                  immediate.vertex_len,
--- a/source/blender/gpu/vulkan/vk_vertex_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_vertex_buffer.cc
@ -126,22 +126,6 @@ void VKVertexBuffer::release_data()
  MEM_SAFE_FREE(data);
 }

-static bool inplace_conversion_supported(const GPUUsageType &usage)
-{
-  return ELEM(usage, GPU_USAGE_STATIC, GPU_USAGE_STREAM);
-}
-
-void *VKVertexBuffer::convert() const
-{
-  void *out_data = data;
-  if (!inplace_conversion_supported(usage_)) {
-    out_data = MEM_dupallocN(out_data);
-  }
-  BLI_assert(format.deinterleaved);
-  convert_in_place(out_data, format, vertex_len);
-  return out_data;
-}
-
 void VKVertexBuffer::upload_data()
 {
  if (!buffer_.is_allocated()) {
@ -152,13 +136,13 @@ void VKVertexBuffer::upload_data()
  }

  if (flag & GPU_VERTBUF_DATA_DIRTY) {
-    void *data_to_upload = data;
-    if (conversion_needed(format)) {
-      data_to_upload = convert();
+    device_format_ensure();
+    if (vertex_format_converter.needs_conversion()) {
+      vertex_format_converter.convert(buffer_.mapped_memory_get(), data, vertex_len);
+      buffer_.flush();
    }
-    buffer_.update(data_to_upload);
-    if (data_to_upload != data) {
-      MEM_SAFE_FREE(data_to_upload);
+    else {
+      buffer_.update(data);
    }
    if (usage_ == GPU_USAGE_STATIC) {
      MEM_SAFE_FREE(data);
@ -174,6 +158,19 @@ void VKVertexBuffer::duplicate_data(VertBuf * /*dst*/)
  NOT_YET_IMPLEMENTED
 }

+void VKVertexBuffer::device_format_ensure()
+{
+  if (!vertex_format_converter.is_initialized()) {
+    const VKWorkarounds &workarounds = VKBackend::get().device_get().workarounds_get();
+    vertex_format_converter.init(&format, workarounds);
+  }
+}
+
+const GPUVertFormat &VKVertexBuffer::device_format_get() const
+{
+  return vertex_format_converter.device_format_get();
+}
+
 void VKVertexBuffer::allocate()
 {
  buffer_.create(size_alloc_get(),
--- a/source/blender/gpu/vulkan/vk_vertex_buffer.hh
+++ b/source/blender/gpu/vulkan/vk_vertex_buffer.hh
@ -12,6 +12,7 @@

 #include "vk_bindable_resource.hh"
 #include "vk_buffer.hh"
+#include "vk_data_conversion.hh"

 namespace blender::gpu {

@ -20,6 +21,8 @@ class VKVertexBuffer : public VertBuf, public VKBindableResource {
  /** When a vertex buffer is used as a UNIFORM_TEXEL_BUFFER the buffer requires a buffer view. */
  VkBufferView vk_buffer_view_ = VK_NULL_HANDLE;

+  VertexFormatConverter vertex_format_converter;
+
 public:
  ~VKVertexBuffer();

@ -43,6 +46,9 @@ class VKVertexBuffer : public VertBuf, public VKBindableResource {
    return vk_buffer_view_;
  }

+  void device_format_ensure();
+  const GPUVertFormat &device_format_get() const;
+
 protected:
  void acquire_data() override;
  void resize_data() override;
@ -52,7 +58,6 @@ class VKVertexBuffer : public VertBuf, public VKBindableResource {

 private:
  void allocate();
-  void *convert() const;

  /* VKTexture requires access to `buffer_` to convert a vertex buffer to a texture. */
  friend class VKTexture;