diff --git a/source/blender/draw/engines/eevee_next/eevee_shadow.cc b/source/blender/draw/engines/eevee_next/eevee_shadow.cc index 686195c76f2..66df398a22b 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shadow.cc +++ b/source/blender/draw/engines/eevee_next/eevee_shadow.cc @@ -1328,6 +1328,8 @@ void ShadowModule::set_view(View &view) shadow_multi_view_.compute_procedural_bounds(); + statistics_buf_.current().async_flush_to_host(); + /* Isolate shadow update into own command buffer. * If parameter buffer exceeds limits, then other work will not be impacted. */ bool use_flush = (shadow_technique == ShadowTechnique::TILE_COPY) && diff --git a/source/blender/draw/intern/DRW_gpu_wrapper.hh b/source/blender/draw/intern/DRW_gpu_wrapper.hh index 1290f9f5230..3bdbc6ae6bd 100644 --- a/source/blender/draw/intern/DRW_gpu_wrapper.hh +++ b/source/blender/draw/intern/DRW_gpu_wrapper.hh @@ -248,6 +248,11 @@ class StorageCommon : public DataBuffer, NonMovable, NonCopyable GPU_storagebuf_clear_to_zero(ssbo_); } + void async_flush_to_host() + { + GPU_storagebuf_sync_to_host(ssbo_); + } + void read() { GPU_storagebuf_read(ssbo_, this->data_); diff --git a/source/blender/gpu/GPU_storage_buffer.h b/source/blender/gpu/GPU_storage_buffer.h index 46cc5b030dc..5d7c72fb1cb 100644 --- a/source/blender/gpu/GPU_storage_buffer.h +++ b/source/blender/gpu/GPU_storage_buffer.h @@ -48,10 +48,24 @@ void GPU_storagebuf_clear_to_zero(GPUStorageBuf *ssbo); */ void GPU_storagebuf_clear(GPUStorageBuf *ssbo, uint32_t clear_value); +/** + * Explicitly sync updated storage buffer contents back to host within the GPU command stream. This + * ensures any changes made by the GPU are visible to the host. + * NOTE: This command is only valid for host-visible storage buffers. + */ +void GPU_storagebuf_sync_to_host(GPUStorageBuf *ssbo); + /** * Read back content of the buffer to CPU for inspection. * Slow! Only use for inspection / debugging. - * NOTE: Not synchronized. Use appropriate barrier before reading. + * + * NOTE: If GPU_storagebuf_sync_to_host is called, this command is synchronized against that call. + * If pending GPU updates to the storage buffer are not yet visible to the host, the command will + * stall until dependent GPU work has completed. + * + * Otherwise, this command is unsynchronized and will return current visible storage buffer + * contents immediately. + * Alternatively, use appropriate barrier or GPU_finish before reading. */ void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data); diff --git a/source/blender/gpu/intern/gpu_storage_buffer.cc b/source/blender/gpu/intern/gpu_storage_buffer.cc index 0d1254592fe..b0d2e550117 100644 --- a/source/blender/gpu/intern/gpu_storage_buffer.cc +++ b/source/blender/gpu/intern/gpu_storage_buffer.cc @@ -106,6 +106,11 @@ void GPU_storagebuf_copy_sub_from_vertbuf( unwrap(ssbo)->copy_sub(unwrap(src), dst_offset, src_offset, copy_size); } +void GPU_storagebuf_sync_to_host(GPUStorageBuf *ssbo) +{ + unwrap(ssbo)->async_flush_to_host(); +} + void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data) { unwrap(ssbo)->read(data); diff --git a/source/blender/gpu/intern/gpu_storage_buffer_private.hh b/source/blender/gpu/intern/gpu_storage_buffer_private.hh index 39541944901..c099b67fba2 100644 --- a/source/blender/gpu/intern/gpu_storage_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_storage_buffer_private.hh @@ -47,6 +47,7 @@ class StorageBuf { virtual void clear(uint32_t clear_value) = 0; virtual void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) = 0; virtual void read(void *data) = 0; + virtual void async_flush_to_host() = 0; }; /* Syntactic sugar. */ diff --git a/source/blender/gpu/metal/mtl_storage_buffer.hh b/source/blender/gpu/metal/mtl_storage_buffer.hh index 168838d86b2..af114c1e768 100644 --- a/source/blender/gpu/metal/mtl_storage_buffer.hh +++ b/source/blender/gpu/metal/mtl_storage_buffer.hh @@ -54,6 +54,10 @@ class MTLStorageBuf : public StorageBuf { /** Usage type. */ GPUUsageType usage_; + /* Synchronization event for host reads. */ + id gpu_write_fence_ = nil; + uint64_t host_read_signal_value_ = 0; + public: MTLStorageBuf(size_t size, GPUUsageType usage, const char *name); ~MTLStorageBuf(); @@ -68,6 +72,7 @@ class MTLStorageBuf : public StorageBuf { void clear(uint32_t clear_value) override; void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override; void read(void *data) override; + void async_flush_to_host() override; void init(); diff --git a/source/blender/gpu/metal/mtl_storage_buffer.mm b/source/blender/gpu/metal/mtl_storage_buffer.mm index 3796f292350..40006459f77 100644 --- a/source/blender/gpu/metal/mtl_storage_buffer.mm +++ b/source/blender/gpu/metal/mtl_storage_buffer.mm @@ -8,6 +8,7 @@ #include "BLI_string.h" +#include "GPU_state.h" #include "gpu_backend.hh" #include "gpu_context_private.hh" @@ -19,6 +20,8 @@ #include "mtl_uniform_buffer.hh" #include "mtl_vertex_buffer.hh" +#include "PIL_time.h" + namespace blender::gpu { /* -------------------------------------------------------------------- */ @@ -71,6 +74,11 @@ MTLStorageBuf::~MTLStorageBuf() has_data_ = false; } + if (gpu_write_fence_ != nil) { + [gpu_write_fence_ release]; + gpu_write_fence_ = nil; + } + /* Ensure SSBO is not bound to active CTX. * SSBO bindings are reset upon Context-switch so we do not need * to check deactivated context's. */ @@ -172,6 +180,7 @@ void MTLStorageBuf::update(const void *data) toBuffer:dst_buf destinationOffset:0 size:size_in_bytes_]; + staging_buf->free(); } else { /* Upload data. */ @@ -323,6 +332,40 @@ void MTLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, ui size:copy_size]; } +void MTLStorageBuf::async_flush_to_host() +{ + bool device_only = (usage_ == GPU_USAGE_DEVICE_ONLY); + BLI_assert_msg(!device_only, + "Storage buffers with usage GPU_USAGE_DEVICE_ONLY cannot have their data " + "synchronized to the host."); + if (device_only) { + return; + } + + MTLContext *ctx = MTLContext::get(); + BLI_assert(ctx); + + if (gpu_write_fence_ == nil) { + gpu_write_fence_ = [ctx->device newSharedEvent]; + } + + if (metal_buffer_ == nullptr) { + this->init(); + } + + /* For discrete memory systems, explicitly flush GPU-resident memory back to host. */ + id storage_buf_mtl = this->metal_buffer_->get_metal_buffer(); + if (storage_buf_mtl.storageMode == MTLStorageModeManaged) { + id blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); + [blit_encoder synchronizeResource:storage_buf_mtl]; + } + + /* Encode event signal and flush command buffer to ensure GPU work is in the pipeline for future + * reads. */ + ctx->main_command_buffer.encode_signal_event(gpu_write_fence_, ++host_read_signal_value_); + GPU_flush(); +} + void MTLStorageBuf::read(void *data) { if (data == nullptr) { @@ -333,19 +376,71 @@ void MTLStorageBuf::read(void *data) this->init(); } - /* Managed buffers need to be explicitly flushed back to host. */ - if (metal_buffer_->get_resource_options() & MTLResourceStorageModeManaged) { + /* Device-only storage buffers cannot be read directly and require staging. This path should only + be used for unit testing. */ + bool device_only = (usage_ == GPU_USAGE_DEVICE_ONLY); + if (device_only) { + /** Read storage buffer contents via staging buffer. */ /* Fetch active context. */ MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); BLI_assert(ctx); - /* Ensure GPU updates are flushed back to CPU. */ - id blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); - [blit_encoder synchronizeResource:metal_buffer_->get_metal_buffer()]; - } + /* Prepare staging buffer. */ + gpu::MTLBuffer *staging_buf = MTLContext::get_global_memory_manager()->allocate(size_in_bytes_, + true); + id staging_buf_mtl = staging_buf->get_metal_buffer(); + BLI_assert(staging_buf_mtl != nil); - /* Read data. NOTE: Unless explicitly synchronized with GPU work, results may not be ready. */ - memcpy(data, metal_buffer_->get_host_ptr(), size_in_bytes_); + /* Ensure destination buffer. */ + id storage_buf_mtl = this->metal_buffer_->get_metal_buffer(); + BLI_assert(storage_buf_mtl != nil); + + id blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); + [blit_encoder copyFromBuffer:storage_buf_mtl + sourceOffset:0 + toBuffer:staging_buf_mtl + destinationOffset:0 + size:size_in_bytes_]; + if (staging_buf_mtl.storageMode == MTLStorageModeManaged) { + [blit_encoder synchronizeResource:staging_buf_mtl]; + } + + /* Device-only reads will always stall the GPU pipe. */ + GPU_finish(); + MTL_LOG_WARNING( + "Device-only storage buffer being read. This will stall the GPU pipeline. Ensure this " + "path is only used in testing."); + + /* Read contents back to data. */ + memcpy(data, staging_buf->get_host_ptr(), size_in_bytes_); + staging_buf->free(); + } + else { + /** Direct storage buffer read. */ + /* If we have a synchronization event from a prior memory sync, ensure memory is fully synced. + * Otherwise, assume read is asynchronous. */ + if (gpu_write_fence_ != nil) { + /* Ensure the GPU updates are visible to the host before reading. */ + while (gpu_write_fence_.signaledValue < host_read_signal_value_) { + PIL_sleep_ms(1); + } + } + + /* Managed buffers need to be explicitly flushed back to host. */ + if (metal_buffer_->get_resource_options() & MTLResourceStorageModeManaged) { + /* Fetch active context. */ + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /* Ensure GPU updates are flushed back to CPU. */ + id blit_encoder = + ctx->main_command_buffer.ensure_begin_blit_encoder(); + [blit_encoder synchronizeResource:metal_buffer_->get_metal_buffer()]; + } + + /* Read data. NOTE: Unless explicitly synchronized with GPU work, results may not be ready. */ + memcpy(data, metal_buffer_->get_host_ptr(), size_in_bytes_); + } } id MTLStorageBuf::get_metal_buffer() diff --git a/source/blender/gpu/opengl/gl_storage_buffer.cc b/source/blender/gpu/opengl/gl_storage_buffer.cc index 5591eb717e0..9bc42456104 100644 --- a/source/blender/gpu/opengl/gl_storage_buffer.cc +++ b/source/blender/gpu/opengl/gl_storage_buffer.cc @@ -161,6 +161,11 @@ void GLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uin } } +void GLStorageBuf::async_flush_to_host() +{ + GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE); +} + void GLStorageBuf::read(void *data) { if (ssbo_id_ == 0) { diff --git a/source/blender/gpu/opengl/gl_storage_buffer.hh b/source/blender/gpu/opengl/gl_storage_buffer.hh index 20144d0c8f0..d24e9ae713f 100644 --- a/source/blender/gpu/opengl/gl_storage_buffer.hh +++ b/source/blender/gpu/opengl/gl_storage_buffer.hh @@ -37,6 +37,7 @@ class GLStorageBuf : public StorageBuf { void clear(uint32_t clear_value) override; void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override; void read(void *data) override; + void async_flush_to_host() override; /* Special internal function to bind SSBOs to indirect argument targets. */ void bind_as(GLenum target); diff --git a/source/blender/gpu/vulkan/vk_storage_buffer.cc b/source/blender/gpu/vulkan/vk_storage_buffer.cc index 873c04d699e..9b296a312ee 100644 --- a/source/blender/gpu/vulkan/vk_storage_buffer.cc +++ b/source/blender/gpu/vulkan/vk_storage_buffer.cc @@ -91,6 +91,11 @@ void VKStorageBuffer::copy_sub(VertBuf *src, uint dst_offset, uint src_offset, u command_buffer.submit(); } +void VKStorageBuffer::async_flush_to_host() +{ + GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE); +} + void VKStorageBuffer::read(void *data) { ensure_allocated(); diff --git a/source/blender/gpu/vulkan/vk_storage_buffer.hh b/source/blender/gpu/vulkan/vk_storage_buffer.hh index 889f94570b3..8c6d317fd4e 100644 --- a/source/blender/gpu/vulkan/vk_storage_buffer.hh +++ b/source/blender/gpu/vulkan/vk_storage_buffer.hh @@ -33,6 +33,7 @@ class VKStorageBuffer : public StorageBuf, public VKBindableResource { void clear(uint32_t clear_value) override; void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override; void read(void *data) override; + void async_flush_to_host() override; VkBuffer vk_handle() const {