diff --git a/source/blender/draw/engines/eevee_next/eevee_shadow.cc b/source/blender/draw/engines/eevee_next/eevee_shadow.cc
index 686195c76f2..66df398a22b 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shadow.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shadow.cc
@@ -1328,6 +1328,8 @@ void ShadowModule::set_view(View &view)
 
       shadow_multi_view_.compute_procedural_bounds();
 
+      statistics_buf_.current().async_flush_to_host();
+
       /* Isolate shadow update into own command buffer.
        * If parameter buffer exceeds limits, then other work will not be impacted.  */
       bool use_flush = (shadow_technique == ShadowTechnique::TILE_COPY) &&
diff --git a/source/blender/draw/intern/DRW_gpu_wrapper.hh b/source/blender/draw/intern/DRW_gpu_wrapper.hh
index 1290f9f5230..3bdbc6ae6bd 100644
--- a/source/blender/draw/intern/DRW_gpu_wrapper.hh
+++ b/source/blender/draw/intern/DRW_gpu_wrapper.hh
@@ -248,6 +248,11 @@ class StorageCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable
     GPU_storagebuf_clear_to_zero(ssbo_);
   }
 
+  void async_flush_to_host()
+  {
+    GPU_storagebuf_sync_to_host(ssbo_);
+  }
+
   void read()
   {
     GPU_storagebuf_read(ssbo_, this->data_);
diff --git a/source/blender/gpu/GPU_storage_buffer.h b/source/blender/gpu/GPU_storage_buffer.h
index 46cc5b030dc..5d7c72fb1cb 100644
--- a/source/blender/gpu/GPU_storage_buffer.h
+++ b/source/blender/gpu/GPU_storage_buffer.h
@@ -48,10 +48,24 @@ void GPU_storagebuf_clear_to_zero(GPUStorageBuf *ssbo);
  */
 void GPU_storagebuf_clear(GPUStorageBuf *ssbo, uint32_t clear_value);
 
+/**
+ * Explicitly sync updated storage buffer contents back to host within the GPU command stream. This
+ * ensures any changes made by the GPU are visible to the host.
+ * NOTE: This command is only valid for host-visible storage buffers.
+ */
+void GPU_storagebuf_sync_to_host(GPUStorageBuf *ssbo);
+
 /**
  * Read back content of the buffer to CPU for inspection.
  * Slow! Only use for inspection / debugging.
- * NOTE: Not synchronized. Use appropriate barrier before reading.
+ *
+ * NOTE: If GPU_storagebuf_sync_to_host is called, this command is synchronized against that call.
+ * If pending GPU updates to the storage buffer are not yet visible to the host, the command will
+ * stall until dependent GPU work has completed.
+ *
+ * Otherwise, this command is unsynchronized and will return current visible storage buffer
+ * contents immediately.
+ * Alternatively, use appropriate barrier or GPU_finish before reading.
  */
 void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data);
 
diff --git a/source/blender/gpu/intern/gpu_storage_buffer.cc b/source/blender/gpu/intern/gpu_storage_buffer.cc
index 0d1254592fe..b0d2e550117 100644
--- a/source/blender/gpu/intern/gpu_storage_buffer.cc
+++ b/source/blender/gpu/intern/gpu_storage_buffer.cc
@@ -106,6 +106,11 @@ void GPU_storagebuf_copy_sub_from_vertbuf(
   unwrap(ssbo)->copy_sub(unwrap(src), dst_offset, src_offset, copy_size);
 }
 
+void GPU_storagebuf_sync_to_host(GPUStorageBuf *ssbo)
+{
+  unwrap(ssbo)->async_flush_to_host();
+}
+
 void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data)
 {
   unwrap(ssbo)->read(data);
diff --git a/source/blender/gpu/intern/gpu_storage_buffer_private.hh b/source/blender/gpu/intern/gpu_storage_buffer_private.hh
index 39541944901..c099b67fba2 100644
--- a/source/blender/gpu/intern/gpu_storage_buffer_private.hh
+++ b/source/blender/gpu/intern/gpu_storage_buffer_private.hh
@@ -47,6 +47,7 @@ class StorageBuf {
   virtual void clear(uint32_t clear_value) = 0;
   virtual void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) = 0;
   virtual void read(void *data) = 0;
+  virtual void async_flush_to_host() = 0;
 };
 
 /* Syntactic sugar. */
diff --git a/source/blender/gpu/metal/mtl_storage_buffer.hh b/source/blender/gpu/metal/mtl_storage_buffer.hh
index 168838d86b2..af114c1e768 100644
--- a/source/blender/gpu/metal/mtl_storage_buffer.hh
+++ b/source/blender/gpu/metal/mtl_storage_buffer.hh
@@ -54,6 +54,10 @@ class MTLStorageBuf : public StorageBuf {
   /** Usage type. */
   GPUUsageType usage_;
 
+  /* Synchronization event for host reads. */
+  id<MTLSharedEvent> gpu_write_fence_ = nil;
+  uint64_t host_read_signal_value_ = 0;
+
  public:
   MTLStorageBuf(size_t size, GPUUsageType usage, const char *name);
   ~MTLStorageBuf();
@@ -68,6 +72,7 @@ class MTLStorageBuf : public StorageBuf {
   void clear(uint32_t clear_value) override;
   void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override;
   void read(void *data) override;
+  void async_flush_to_host() override;
 
   void init();
 
diff --git a/source/blender/gpu/metal/mtl_storage_buffer.mm b/source/blender/gpu/metal/mtl_storage_buffer.mm
index 3796f292350..40006459f77 100644
--- a/source/blender/gpu/metal/mtl_storage_buffer.mm
+++ b/source/blender/gpu/metal/mtl_storage_buffer.mm
@@ -8,6 +8,7 @@
 
 #include "BLI_string.h"
 
+#include "GPU_state.h"
 #include "gpu_backend.hh"
 #include "gpu_context_private.hh"
 
@@ -19,6 +20,8 @@
 #include "mtl_uniform_buffer.hh"
 #include "mtl_vertex_buffer.hh"
 
+#include "PIL_time.h"
+
 namespace blender::gpu {
 
 /* -------------------------------------------------------------------- */
@@ -71,6 +74,11 @@ MTLStorageBuf::~MTLStorageBuf()
     has_data_ = false;
   }
 
+  if (gpu_write_fence_ != nil) {
+    [gpu_write_fence_ release];
+    gpu_write_fence_ = nil;
+  }
+
   /* Ensure SSBO is not bound to active CTX.
    * SSBO bindings are reset upon Context-switch so we do not need
    * to check deactivated context's. */
@@ -172,6 +180,7 @@ void MTLStorageBuf::update(const void *data)
                           toBuffer:dst_buf
                  destinationOffset:0
                               size:size_in_bytes_];
+      staging_buf->free();
     }
     else {
       /* Upload data. */
@@ -323,6 +332,40 @@ void MTLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, ui
                           size:copy_size];
 }
 
+void MTLStorageBuf::async_flush_to_host()
+{
+  bool device_only = (usage_ == GPU_USAGE_DEVICE_ONLY);
+  BLI_assert_msg(!device_only,
+                 "Storage buffers with usage GPU_USAGE_DEVICE_ONLY cannot have their data "
+                 "synchronized to the host.");
+  if (device_only) {
+    return;
+  }
+
+  MTLContext *ctx = MTLContext::get();
+  BLI_assert(ctx);
+
+  if (gpu_write_fence_ == nil) {
+    gpu_write_fence_ = [ctx->device newSharedEvent];
+  }
+
+  if (metal_buffer_ == nullptr) {
+    this->init();
+  }
+
+  /* For discrete memory systems, explicitly flush GPU-resident memory back to host. */
+  id<MTLBuffer> storage_buf_mtl = this->metal_buffer_->get_metal_buffer();
+  if (storage_buf_mtl.storageMode == MTLStorageModeManaged) {
+    id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
+    [blit_encoder synchronizeResource:storage_buf_mtl];
+  }
+
+  /* Encode event signal and flush command buffer to ensure GPU work is in the pipeline for future
+   * reads. */
+  ctx->main_command_buffer.encode_signal_event(gpu_write_fence_, ++host_read_signal_value_);
+  GPU_flush();
+}
+
 void MTLStorageBuf::read(void *data)
 {
   if (data == nullptr) {
@@ -333,19 +376,71 @@ void MTLStorageBuf::read(void *data)
     this->init();
   }
 
-  /* Managed buffers need to be explicitly flushed back to host. */
-  if (metal_buffer_->get_resource_options() & MTLResourceStorageModeManaged) {
+  /* Device-only storage buffers cannot be read directly and require staging. This path should only
+  be used for unit testing. */
+  bool device_only = (usage_ == GPU_USAGE_DEVICE_ONLY);
+  if (device_only) {
+    /** Read storage buffer contents via staging buffer. */
     /* Fetch active context. */
     MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
     BLI_assert(ctx);
 
-    /* Ensure GPU updates are flushed back to CPU. */
-    id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
-    [blit_encoder synchronizeResource:metal_buffer_->get_metal_buffer()];
-  }
+    /* Prepare staging buffer. */
+    gpu::MTLBuffer *staging_buf = MTLContext::get_global_memory_manager()->allocate(size_in_bytes_,
+                                                                                    true);
+    id<MTLBuffer> staging_buf_mtl = staging_buf->get_metal_buffer();
+    BLI_assert(staging_buf_mtl != nil);
 
-  /* Read data. NOTE: Unless explicitly synchronized with GPU work, results may not be ready. */
-  memcpy(data, metal_buffer_->get_host_ptr(), size_in_bytes_);
+    /* Ensure destination buffer. */
+    id<MTLBuffer> storage_buf_mtl = this->metal_buffer_->get_metal_buffer();
+    BLI_assert(storage_buf_mtl != nil);
+
+    id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
+    [blit_encoder copyFromBuffer:storage_buf_mtl
+                    sourceOffset:0
+                        toBuffer:staging_buf_mtl
+               destinationOffset:0
+                            size:size_in_bytes_];
+    if (staging_buf_mtl.storageMode == MTLStorageModeManaged) {
+      [blit_encoder synchronizeResource:staging_buf_mtl];
+    }
+
+    /* Device-only reads will always stall the GPU pipe. */
+    GPU_finish();
+    MTL_LOG_WARNING(
+        "Device-only storage buffer being read. This will stall the GPU pipeline. Ensure this "
+        "path is only used in testing.");
+
+    /* Read contents back to data. */
+    memcpy(data, staging_buf->get_host_ptr(), size_in_bytes_);
+    staging_buf->free();
+  }
+  else {
+    /** Direct storage buffer read. */
+    /* If we have a synchronization event from a prior memory sync, ensure memory is fully synced.
+     * Otherwise, assume read is asynchronous. */
+    if (gpu_write_fence_ != nil) {
+      /* Ensure the GPU updates are visible to the host before reading. */
+      while (gpu_write_fence_.signaledValue < host_read_signal_value_) {
+        PIL_sleep_ms(1);
+      }
+    }
+
+    /* Managed buffers need to be explicitly flushed back to host. */
+    if (metal_buffer_->get_resource_options() & MTLResourceStorageModeManaged) {
+      /* Fetch active context. */
+      MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+      BLI_assert(ctx);
+
+      /* Ensure GPU updates are flushed back to CPU. */
+      id<MTLBlitCommandEncoder> blit_encoder =
+          ctx->main_command_buffer.ensure_begin_blit_encoder();
+      [blit_encoder synchronizeResource:metal_buffer_->get_metal_buffer()];
+    }
+
+    /* Read data. NOTE: Unless explicitly synchronized with GPU work, results may not be ready. */
+    memcpy(data, metal_buffer_->get_host_ptr(), size_in_bytes_);
+  }
 }
 
 id<MTLBuffer> MTLStorageBuf::get_metal_buffer()
diff --git a/source/blender/gpu/opengl/gl_storage_buffer.cc b/source/blender/gpu/opengl/gl_storage_buffer.cc
index 5591eb717e0..9bc42456104 100644
--- a/source/blender/gpu/opengl/gl_storage_buffer.cc
+++ b/source/blender/gpu/opengl/gl_storage_buffer.cc
@@ -161,6 +161,11 @@ void GLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uin
   }
 }
 
+void GLStorageBuf::async_flush_to_host()
+{
+  GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
+}
+
 void GLStorageBuf::read(void *data)
 {
   if (ssbo_id_ == 0) {
diff --git a/source/blender/gpu/opengl/gl_storage_buffer.hh b/source/blender/gpu/opengl/gl_storage_buffer.hh
index 20144d0c8f0..d24e9ae713f 100644
--- a/source/blender/gpu/opengl/gl_storage_buffer.hh
+++ b/source/blender/gpu/opengl/gl_storage_buffer.hh
@@ -37,6 +37,7 @@ class GLStorageBuf : public StorageBuf {
   void clear(uint32_t clear_value) override;
   void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override;
   void read(void *data) override;
+  void async_flush_to_host() override;
 
   /* Special internal function to bind SSBOs to indirect argument targets. */
   void bind_as(GLenum target);
diff --git a/source/blender/gpu/vulkan/vk_storage_buffer.cc b/source/blender/gpu/vulkan/vk_storage_buffer.cc
index 873c04d699e..9b296a312ee 100644
--- a/source/blender/gpu/vulkan/vk_storage_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_storage_buffer.cc
@@ -91,6 +91,11 @@ void VKStorageBuffer::copy_sub(VertBuf *src, uint dst_offset, uint src_offset, u
   command_buffer.submit();
 }
 
+void VKStorageBuffer::async_flush_to_host()
+{
+  GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
+}
+
 void VKStorageBuffer::read(void *data)
 {
   ensure_allocated();
diff --git a/source/blender/gpu/vulkan/vk_storage_buffer.hh b/source/blender/gpu/vulkan/vk_storage_buffer.hh
index 889f94570b3..8c6d317fd4e 100644
--- a/source/blender/gpu/vulkan/vk_storage_buffer.hh
+++ b/source/blender/gpu/vulkan/vk_storage_buffer.hh
@@ -33,6 +33,7 @@ class VKStorageBuffer : public StorageBuf, public VKBindableResource {
   void clear(uint32_t clear_value) override;
   void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override;
   void read(void *data) override;
+  void async_flush_to_host() override;
 
   VkBuffer vk_handle() const
   {