Metal: Add support for atomic image operations

Texture Atomics have been added in Metal 3.1
and enable the original implementations of
shadow update and irradiance cache baking.

However, a fallback solution will be
required for versions under macOS 14.0 utilising
buffer-backed textures instead.

This patch also includes a stub implementation if
building/running on older macOS versions which
provides locally-synchronized texture access in
place of atomics. This enables some effects to be
partially tested, and ensures non-guarded use
of imageAtomic functions does not result
in compilation failure.

Authored by Apple: Michael Parkin-White

Pull Request: https://projects.blender.org/blender/blender/pulls/112866
This commit is contained in:
Jason Fielder 2023-09-25 21:56:46 +02:00 committed by Clément Foucault
parent 499c39cfb9
commit ee03bb38cb
9 changed files with 195 additions and 12 deletions

View File

@ -942,7 +942,8 @@ void IrradianceBake::clusters_build()
if (max_virtual_offset_ == 0.0f) {
return;
}
eGPUTextureUsage texture_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
eGPUTextureUsage texture_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE |
GPU_TEXTURE_USAGE_ATOMIC;
cluster_list_tx_.ensure_3d(GPU_R32I, capture_info_buf_.irradiance_grid_size, texture_usage);
cluster_list_tx_.clear(int4(-1));

View File

@ -662,7 +662,8 @@ void ShadowModule::init()
const int2 atlas_extent = shadow_page_size_ * int2(SHADOW_PAGE_PER_ROW);
const int atlas_layers = divide_ceil_u(shadow_page_len_, SHADOW_PAGE_PER_LAYER);
eGPUTextureUsage tex_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
eGPUTextureUsage tex_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE |
GPU_TEXTURE_USAGE_ATOMIC;
if (atlas_tx_.ensure_2d_array(atlas_type, atlas_extent, atlas_layers, tex_usage)) {
/* Global update. */
do_full_update = true;

View File

@ -91,6 +91,7 @@ GPU_SHADER_CREATE_INFO(eevee_surfel_cluster_build)
GPU_SHADER_CREATE_INFO(eevee_surfel_list_build)
.local_group_size(SURFEL_GROUP_SIZE)
.builtins(BuiltinBits::TEXTURE_ATOMIC)
.additional_info("eevee_shared", "eevee_surfel_common", "draw_view")
.storage_buf(0, Qualifier::READ_WRITE, "int", "list_start_buf[]")
.storage_buf(6, Qualifier::READ_WRITE, "SurfelListInfoData", "list_info_buf")

View File

@ -193,6 +193,7 @@ GPU_SHADER_CREATE_INFO(eevee_surf_shadow)
.define("USE_ATOMIC")
.builtins(BuiltinBits::VIEWPORT_INDEX)
.builtins(BuiltinBits::LAYER)
.builtins(BuiltinBits::TEXTURE_ATOMIC)
.vertex_out(eevee_surf_iface)
.vertex_out(eevee_surf_flat_iface)
.storage_buf(SHADOW_RENDER_MAP_BUF_SLOT,

View File

@ -548,9 +548,12 @@ typedef enum eGPUTextureUsage {
/* When used, the texture will not have any backing storage and can solely exist as a virtual
* frame-buffer attachment. */
GPU_TEXTURE_USAGE_MEMORYLESS = (1 << 5),
/* Whether a texture can support atomic operations. */
GPU_TEXTURE_USAGE_ATOMIC = (1 << 6),
/* Create a texture whose usage cannot be defined prematurely.
* This is unoptimized and should not be used. */
GPU_TEXTURE_USAGE_GENERAL = (0xFF & (~GPU_TEXTURE_USAGE_MEMORYLESS)),
GPU_TEXTURE_USAGE_GENERAL = (0xFF &
(~(GPU_TEXTURE_USAGE_MEMORYLESS | GPU_TEXTURE_USAGE_ATOMIC))),
} eGPUTextureUsage;
ENUM_OPERATORS(eGPUTextureUsage, GPU_TEXTURE_USAGE_GENERAL);

View File

@ -208,6 +208,9 @@ enum class BuiltinBits {
*/
VIEWPORT_INDEX = (1 << 17),
/* Texture atomics requires usage options to alter compilation flag. */
TEXTURE_ATOMIC = (1 << 18),
/* Not a builtin but a flag we use to tag shaders that use the debug features. */
USE_DEBUG_DRAW = (1 << 29),
USE_DEBUG_PRINT = (1 << 30),

View File

@ -300,6 +300,14 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
options.languageVersion = MTLLanguageVersion2_3;
}
}
#if defined(MAC_OS_VERSION_14_0)
if (@available(macOS 14.00, *)) {
/* Texture atomics require Metal 3.1. */
if (bool(info->builtins_ & BuiltinBits::TEXTURE_ATOMIC)) {
options.languageVersion = MTLLanguageVersion3_1;
}
}
#endif
NSString *source_to_compile = shd_builder_->msl_source_vert_;

View File

@ -618,6 +618,14 @@ inline MTLTextureUsage mtl_usage_from_gpu(eGPUTextureUsage usage)
if (usage & GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW) {
mtl_usage = mtl_usage | MTLTextureUsagePixelFormatView;
}
#if defined(MAC_OS_VERSION_14_0)
if (@available(macOS 14.0, *)) {
if (usage & GPU_TEXTURE_USAGE_ATOMIC) {
mtl_usage = mtl_usage | MTLTextureUsageShaderAtomic;
}
}
#endif
return mtl_usage;
}

View File

@ -201,15 +201,6 @@ template<typename T> T atomicExchange(device T &mem, T data)
return atomic_exchange_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
/* Unblock texture atomic compilation.
* TODO(Metal): This is not correct for global atomic behavior, but will be safe within a single
* thread.
* We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */
#define imageAtomicMin(tex, coord, data) \
uint val = _texelFetch_internal(tex, coord, 0).r; \
_texture_write_internal(tex, coord, uint4((val < data) ? val : data)); \
tex.texture->fence();
/* Used to replace 'out' in function parameters with thread-local reference
* shortened to avoid expanding the GLSL source string. */
#define THD thread
@ -984,6 +975,172 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S
tex.texture->write(value, uint3(_coord.xyz));
}
/* Texture atomic operations are only supported in Metal 3.1 and onwards (macOS 14.0 Sonoma). */
#if __METAL_VERSION__ >= 310
/* Image atomic operations. */
# define imageAtomicMin(tex, coord, data) _texture_image_atomic_min_internal(tex, coord, data)
# define imageAtomicExchange(tex, coord, data) \
_texture_image_atomic_exchange_internal(tex, coord, data)
/* Atomic Min. */
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
vec<S, 4> data)
{
return tex.texture->atomic_fetch_min(uint(coord), data);
}
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
int2 coord,
vec<S, 4> data)
{
return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), data);
}
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
vec<S, 4> data)
{
return tex.texture->atomic_fetch_min(uint2(coord.xy), data);
}
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
vec<S, 4> data)
{
return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), data);
}
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
vec<S, 4> data)
{
return tex.texture->atomic_fetch_min(uint3(coord), data);
}
/* Atomic Exchange. */
template<typename S, access A, int N>
vec<S, N> _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
vec<S, N> data)
{
return tex.texture->atomic_exchange(uint(coord), data);
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), data);
}
template<typename S, access A, int N>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_exchange(uint2(coord.xy), data);
}
template<typename S, access A, int N>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), data);
}
template<typename S, access A, int N>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_exchange(uint3(coord), data);
}
#else
/* Fallback for imageAtomicMin if atomics are unsupported.
* for general concurrent thread access, but will allow inter-thread writing.
* This assumes 2D texture array.
* NOTE: Implementations should switch to a buffer-backed texture write in these cases. */
# define imageAtomicMin(tex, coord, data) \
auto val = _texelFetch_internal(tex, coord, 0).r; \
tex.texture->fence(); \
tex.texture->write(min(val, data), uint2(coord.xy), uint(coord.z)); \
tex.texture->fence();
/* Fallback for atomic Exchange if atomics are unavailable.
* NOTE: Implementations should switch to a buffer-backed texture write in these cases. */
# define imageAtomicExchange(tex, coord, data) \
_texture_image_atomic_exchange_internal_fallback(tex, coord, data)
template<typename S, access A>
S _texture_image_atomic_exchange_internal_fallback(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
S data)
{
S val = tex.texture->read(uint(coord), data).x;
tex.texture->fence();
tex.texture->write(data, uint(coord));
tex.texture->fence();
return val;
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal_fallback(
thread _mtl_combined_image_sampler_1d_array<S, A> tex, int2 coord, S data)
{
S val = tex.texture->read(uint(coord.x), uint(coord.y), data).x;
tex.texture->fence();
tex.texture->write(data, uint(coord.x), uint(coord.y));
tex.texture->fence();
return val;
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal_fallback(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
S data)
{
S val = tex.texture->read(uint2(coord), data).x;
tex.texture->fence();
tex.texture->write(data, uint2(coord.x));
tex.texture->fence();
return val;
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal_fallback(
thread _mtl_combined_image_sampler_2d_array<S, A> tex, int3 coord, S data)
{
S val = tex.texture->read(uint2(coord.xy), uint(coord.z), data).x;
tex.texture->fence();
tex.texture->write(data, uint2(coord.xy), uint(coord.z));
tex.texture->fence();
return val;
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal_fallback(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
S data)
{
S val = tex.texture->read(uint3(coord), data).x;
tex.texture->fence();
tex.texture->write(data, uint3(coord));
tex.texture->fence();
return val;
}
#endif
/* Matrix compare operators. */
/** TODO(fclem): Template. */
inline bool operator==(float4x4 a, float4x4 b)