Metal: Add support for atomic image operations
Texture Atomics have been added in Metal 3.1 and enable the original implementations of shadow update and irradiance cache baking. However, a fallback solution will be required for versions under macOS 14.0 utilising buffer-backed textures instead. This patch also includes a stub implementation if building/running on older macOS versions which provides locally-synchronized texture access in place of atomics. This enables some effects to be partially tested, and ensures non-guarded use of imageAtomic functions does not result in compilation failure. Authored by Apple: Michael Parkin-White Pull Request: https://projects.blender.org/blender/blender/pulls/112866
This commit is contained in:
parent
499c39cfb9
commit
ee03bb38cb
|
@ -942,7 +942,8 @@ void IrradianceBake::clusters_build()
|
|||
if (max_virtual_offset_ == 0.0f) {
|
||||
return;
|
||||
}
|
||||
eGPUTextureUsage texture_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
|
||||
eGPUTextureUsage texture_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE |
|
||||
GPU_TEXTURE_USAGE_ATOMIC;
|
||||
|
||||
cluster_list_tx_.ensure_3d(GPU_R32I, capture_info_buf_.irradiance_grid_size, texture_usage);
|
||||
cluster_list_tx_.clear(int4(-1));
|
||||
|
|
|
@ -662,7 +662,8 @@ void ShadowModule::init()
|
|||
const int2 atlas_extent = shadow_page_size_ * int2(SHADOW_PAGE_PER_ROW);
|
||||
const int atlas_layers = divide_ceil_u(shadow_page_len_, SHADOW_PAGE_PER_LAYER);
|
||||
|
||||
eGPUTextureUsage tex_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
|
||||
eGPUTextureUsage tex_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE |
|
||||
GPU_TEXTURE_USAGE_ATOMIC;
|
||||
if (atlas_tx_.ensure_2d_array(atlas_type, atlas_extent, atlas_layers, tex_usage)) {
|
||||
/* Global update. */
|
||||
do_full_update = true;
|
||||
|
|
|
@ -91,6 +91,7 @@ GPU_SHADER_CREATE_INFO(eevee_surfel_cluster_build)
|
|||
|
||||
GPU_SHADER_CREATE_INFO(eevee_surfel_list_build)
|
||||
.local_group_size(SURFEL_GROUP_SIZE)
|
||||
.builtins(BuiltinBits::TEXTURE_ATOMIC)
|
||||
.additional_info("eevee_shared", "eevee_surfel_common", "draw_view")
|
||||
.storage_buf(0, Qualifier::READ_WRITE, "int", "list_start_buf[]")
|
||||
.storage_buf(6, Qualifier::READ_WRITE, "SurfelListInfoData", "list_info_buf")
|
||||
|
|
|
@ -193,6 +193,7 @@ GPU_SHADER_CREATE_INFO(eevee_surf_shadow)
|
|||
.define("USE_ATOMIC")
|
||||
.builtins(BuiltinBits::VIEWPORT_INDEX)
|
||||
.builtins(BuiltinBits::LAYER)
|
||||
.builtins(BuiltinBits::TEXTURE_ATOMIC)
|
||||
.vertex_out(eevee_surf_iface)
|
||||
.vertex_out(eevee_surf_flat_iface)
|
||||
.storage_buf(SHADOW_RENDER_MAP_BUF_SLOT,
|
||||
|
|
|
@ -548,9 +548,12 @@ typedef enum eGPUTextureUsage {
|
|||
/* When used, the texture will not have any backing storage and can solely exist as a virtual
|
||||
* frame-buffer attachment. */
|
||||
GPU_TEXTURE_USAGE_MEMORYLESS = (1 << 5),
|
||||
/* Whether a texture can support atomic operations. */
|
||||
GPU_TEXTURE_USAGE_ATOMIC = (1 << 6),
|
||||
/* Create a texture whose usage cannot be defined prematurely.
|
||||
* This is unoptimized and should not be used. */
|
||||
GPU_TEXTURE_USAGE_GENERAL = (0xFF & (~GPU_TEXTURE_USAGE_MEMORYLESS)),
|
||||
GPU_TEXTURE_USAGE_GENERAL = (0xFF &
|
||||
(~(GPU_TEXTURE_USAGE_MEMORYLESS | GPU_TEXTURE_USAGE_ATOMIC))),
|
||||
} eGPUTextureUsage;
|
||||
|
||||
ENUM_OPERATORS(eGPUTextureUsage, GPU_TEXTURE_USAGE_GENERAL);
|
||||
|
|
|
@ -208,6 +208,9 @@ enum class BuiltinBits {
|
|||
*/
|
||||
VIEWPORT_INDEX = (1 << 17),
|
||||
|
||||
/* Texture atomics requires usage options to alter compilation flag. */
|
||||
TEXTURE_ATOMIC = (1 << 18),
|
||||
|
||||
/* Not a builtin but a flag we use to tag shaders that use the debug features. */
|
||||
USE_DEBUG_DRAW = (1 << 29),
|
||||
USE_DEBUG_PRINT = (1 << 30),
|
||||
|
|
|
@ -300,6 +300,14 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
|
|||
options.languageVersion = MTLLanguageVersion2_3;
|
||||
}
|
||||
}
|
||||
#if defined(MAC_OS_VERSION_14_0)
|
||||
if (@available(macOS 14.00, *)) {
|
||||
/* Texture atomics require Metal 3.1. */
|
||||
if (bool(info->builtins_ & BuiltinBits::TEXTURE_ATOMIC)) {
|
||||
options.languageVersion = MTLLanguageVersion3_1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
NSString *source_to_compile = shd_builder_->msl_source_vert_;
|
||||
|
||||
|
|
|
@ -618,6 +618,14 @@ inline MTLTextureUsage mtl_usage_from_gpu(eGPUTextureUsage usage)
|
|||
if (usage & GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW) {
|
||||
mtl_usage = mtl_usage | MTLTextureUsagePixelFormatView;
|
||||
}
|
||||
#if defined(MAC_OS_VERSION_14_0)
|
||||
if (@available(macOS 14.0, *)) {
|
||||
if (usage & GPU_TEXTURE_USAGE_ATOMIC) {
|
||||
|
||||
mtl_usage = mtl_usage | MTLTextureUsageShaderAtomic;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return mtl_usage;
|
||||
}
|
||||
|
||||
|
|
|
@ -201,15 +201,6 @@ template<typename T> T atomicExchange(device T &mem, T data)
|
|||
return atomic_exchange_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
|
||||
/* Unblock texture atomic compilation.
|
||||
* TODO(Metal): This is not correct for global atomic behavior, but will be safe within a single
|
||||
* thread.
|
||||
* We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */
|
||||
#define imageAtomicMin(tex, coord, data) \
|
||||
uint val = _texelFetch_internal(tex, coord, 0).r; \
|
||||
_texture_write_internal(tex, coord, uint4((val < data) ? val : data)); \
|
||||
tex.texture->fence();
|
||||
|
||||
/* Used to replace 'out' in function parameters with thread-local reference
|
||||
* shortened to avoid expanding the GLSL source string. */
|
||||
#define THD thread
|
||||
|
@ -984,6 +975,172 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S
|
|||
tex.texture->write(value, uint3(_coord.xyz));
|
||||
}
|
||||
|
||||
/* Texture atomic operations are only supported in Metal 3.1 and onwards (macOS 14.0 Sonoma). */
|
||||
#if __METAL_VERSION__ >= 310
|
||||
|
||||
/* Image atomic operations. */
|
||||
# define imageAtomicMin(tex, coord, data) _texture_image_atomic_min_internal(tex, coord, data)
|
||||
# define imageAtomicExchange(tex, coord, data) \
|
||||
_texture_image_atomic_exchange_internal(tex, coord, data)
|
||||
|
||||
/* Atomic Min. */
|
||||
template<typename S, access A>
|
||||
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
int coord,
|
||||
vec<S, 4> data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint(coord), data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
int2 coord,
|
||||
vec<S, 4> data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
int2 coord,
|
||||
vec<S, 4> data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint2(coord.xy), data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
int3 coord,
|
||||
vec<S, 4> data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
int3 coord,
|
||||
vec<S, 4> data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint3(coord), data);
|
||||
}
|
||||
|
||||
/* Atomic Exchange. */
|
||||
template<typename S, access A, int N>
|
||||
vec<S, N> _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
int coord,
|
||||
vec<S, N> data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint(coord), data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), data);
|
||||
}
|
||||
|
||||
template<typename S, access A, int N>
|
||||
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint2(coord.xy), data);
|
||||
}
|
||||
|
||||
template<typename S, access A, int N>
|
||||
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), data);
|
||||
}
|
||||
|
||||
template<typename S, access A, int N>
|
||||
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint3(coord), data);
|
||||
}
|
||||
|
||||
#else
|
||||
/* Fallback for imageAtomicMin if atomics are unsupported.
|
||||
* for general concurrent thread access, but will allow inter-thread writing.
|
||||
* This assumes 2D texture array.
|
||||
* NOTE: Implementations should switch to a buffer-backed texture write in these cases. */
|
||||
# define imageAtomicMin(tex, coord, data) \
|
||||
auto val = _texelFetch_internal(tex, coord, 0).r; \
|
||||
tex.texture->fence(); \
|
||||
tex.texture->write(min(val, data), uint2(coord.xy), uint(coord.z)); \
|
||||
tex.texture->fence();
|
||||
|
||||
/* Fallback for atomic Exchange if atomics are unavailable.
|
||||
* NOTE: Implementations should switch to a buffer-backed texture write in these cases. */
|
||||
# define imageAtomicExchange(tex, coord, data) \
|
||||
_texture_image_atomic_exchange_internal_fallback(tex, coord, data)
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal_fallback(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
int coord,
|
||||
S data)
|
||||
{
|
||||
S val = tex.texture->read(uint(coord), data).x;
|
||||
tex.texture->fence();
|
||||
tex.texture->write(data, uint(coord));
|
||||
tex.texture->fence();
|
||||
return val;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal_fallback(
|
||||
thread _mtl_combined_image_sampler_1d_array<S, A> tex, int2 coord, S data)
|
||||
{
|
||||
S val = tex.texture->read(uint(coord.x), uint(coord.y), data).x;
|
||||
tex.texture->fence();
|
||||
tex.texture->write(data, uint(coord.x), uint(coord.y));
|
||||
tex.texture->fence();
|
||||
return val;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal_fallback(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
S val = tex.texture->read(uint2(coord), data).x;
|
||||
tex.texture->fence();
|
||||
tex.texture->write(data, uint2(coord.x));
|
||||
tex.texture->fence();
|
||||
return val;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal_fallback(
|
||||
thread _mtl_combined_image_sampler_2d_array<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
S val = tex.texture->read(uint2(coord.xy), uint(coord.z), data).x;
|
||||
tex.texture->fence();
|
||||
tex.texture->write(data, uint2(coord.xy), uint(coord.z));
|
||||
tex.texture->fence();
|
||||
return val;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal_fallback(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
S val = tex.texture->read(uint3(coord), data).x;
|
||||
tex.texture->fence();
|
||||
tex.texture->write(data, uint3(coord));
|
||||
tex.texture->fence();
|
||||
return val;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Matrix compare operators. */
|
||||
/** TODO(fclem): Template. */
|
||||
inline bool operator==(float4x4 a, float4x4 b)
|
||||
|
|
Loading…
Reference in New Issue