Metal: Add support for atomic image operations
Texture Atomics have been added in Metal 3.1 and enable the original implementations of shadow update and irradiance cache baking. However, a fallback solution will be required for versions under macOS 14.0 utilising buffer-backed textures instead. This patch also includes a stub implementation if building/running on older macOS versions which provides locally-synchronized texture access in place of atomics. This enables some effects to be partially tested, and ensures non-guarded use of imageAtomic functions does not result in compilation failure. Authored by Apple: Michael Parkin-White Pull Request: https://projects.blender.org/blender/blender/pulls/112866
This commit is contained in:
parent
499c39cfb9
commit
ee03bb38cb
|
@ -942,7 +942,8 @@ void IrradianceBake::clusters_build()
|
||||||
if (max_virtual_offset_ == 0.0f) {
|
if (max_virtual_offset_ == 0.0f) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
eGPUTextureUsage texture_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
|
eGPUTextureUsage texture_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE |
|
||||||
|
GPU_TEXTURE_USAGE_ATOMIC;
|
||||||
|
|
||||||
cluster_list_tx_.ensure_3d(GPU_R32I, capture_info_buf_.irradiance_grid_size, texture_usage);
|
cluster_list_tx_.ensure_3d(GPU_R32I, capture_info_buf_.irradiance_grid_size, texture_usage);
|
||||||
cluster_list_tx_.clear(int4(-1));
|
cluster_list_tx_.clear(int4(-1));
|
||||||
|
|
|
@ -662,7 +662,8 @@ void ShadowModule::init()
|
||||||
const int2 atlas_extent = shadow_page_size_ * int2(SHADOW_PAGE_PER_ROW);
|
const int2 atlas_extent = shadow_page_size_ * int2(SHADOW_PAGE_PER_ROW);
|
||||||
const int atlas_layers = divide_ceil_u(shadow_page_len_, SHADOW_PAGE_PER_LAYER);
|
const int atlas_layers = divide_ceil_u(shadow_page_len_, SHADOW_PAGE_PER_LAYER);
|
||||||
|
|
||||||
eGPUTextureUsage tex_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
|
eGPUTextureUsage tex_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE |
|
||||||
|
GPU_TEXTURE_USAGE_ATOMIC;
|
||||||
if (atlas_tx_.ensure_2d_array(atlas_type, atlas_extent, atlas_layers, tex_usage)) {
|
if (atlas_tx_.ensure_2d_array(atlas_type, atlas_extent, atlas_layers, tex_usage)) {
|
||||||
/* Global update. */
|
/* Global update. */
|
||||||
do_full_update = true;
|
do_full_update = true;
|
||||||
|
|
|
@ -91,6 +91,7 @@ GPU_SHADER_CREATE_INFO(eevee_surfel_cluster_build)
|
||||||
|
|
||||||
GPU_SHADER_CREATE_INFO(eevee_surfel_list_build)
|
GPU_SHADER_CREATE_INFO(eevee_surfel_list_build)
|
||||||
.local_group_size(SURFEL_GROUP_SIZE)
|
.local_group_size(SURFEL_GROUP_SIZE)
|
||||||
|
.builtins(BuiltinBits::TEXTURE_ATOMIC)
|
||||||
.additional_info("eevee_shared", "eevee_surfel_common", "draw_view")
|
.additional_info("eevee_shared", "eevee_surfel_common", "draw_view")
|
||||||
.storage_buf(0, Qualifier::READ_WRITE, "int", "list_start_buf[]")
|
.storage_buf(0, Qualifier::READ_WRITE, "int", "list_start_buf[]")
|
||||||
.storage_buf(6, Qualifier::READ_WRITE, "SurfelListInfoData", "list_info_buf")
|
.storage_buf(6, Qualifier::READ_WRITE, "SurfelListInfoData", "list_info_buf")
|
||||||
|
|
|
@ -193,6 +193,7 @@ GPU_SHADER_CREATE_INFO(eevee_surf_shadow)
|
||||||
.define("USE_ATOMIC")
|
.define("USE_ATOMIC")
|
||||||
.builtins(BuiltinBits::VIEWPORT_INDEX)
|
.builtins(BuiltinBits::VIEWPORT_INDEX)
|
||||||
.builtins(BuiltinBits::LAYER)
|
.builtins(BuiltinBits::LAYER)
|
||||||
|
.builtins(BuiltinBits::TEXTURE_ATOMIC)
|
||||||
.vertex_out(eevee_surf_iface)
|
.vertex_out(eevee_surf_iface)
|
||||||
.vertex_out(eevee_surf_flat_iface)
|
.vertex_out(eevee_surf_flat_iface)
|
||||||
.storage_buf(SHADOW_RENDER_MAP_BUF_SLOT,
|
.storage_buf(SHADOW_RENDER_MAP_BUF_SLOT,
|
||||||
|
|
|
@ -548,9 +548,12 @@ typedef enum eGPUTextureUsage {
|
||||||
/* When used, the texture will not have any backing storage and can solely exist as a virtual
|
/* When used, the texture will not have any backing storage and can solely exist as a virtual
|
||||||
* frame-buffer attachment. */
|
* frame-buffer attachment. */
|
||||||
GPU_TEXTURE_USAGE_MEMORYLESS = (1 << 5),
|
GPU_TEXTURE_USAGE_MEMORYLESS = (1 << 5),
|
||||||
|
/* Whether a texture can support atomic operations. */
|
||||||
|
GPU_TEXTURE_USAGE_ATOMIC = (1 << 6),
|
||||||
/* Create a texture whose usage cannot be defined prematurely.
|
/* Create a texture whose usage cannot be defined prematurely.
|
||||||
* This is unoptimized and should not be used. */
|
* This is unoptimized and should not be used. */
|
||||||
GPU_TEXTURE_USAGE_GENERAL = (0xFF & (~GPU_TEXTURE_USAGE_MEMORYLESS)),
|
GPU_TEXTURE_USAGE_GENERAL = (0xFF &
|
||||||
|
(~(GPU_TEXTURE_USAGE_MEMORYLESS | GPU_TEXTURE_USAGE_ATOMIC))),
|
||||||
} eGPUTextureUsage;
|
} eGPUTextureUsage;
|
||||||
|
|
||||||
ENUM_OPERATORS(eGPUTextureUsage, GPU_TEXTURE_USAGE_GENERAL);
|
ENUM_OPERATORS(eGPUTextureUsage, GPU_TEXTURE_USAGE_GENERAL);
|
||||||
|
|
|
@ -208,6 +208,9 @@ enum class BuiltinBits {
|
||||||
*/
|
*/
|
||||||
VIEWPORT_INDEX = (1 << 17),
|
VIEWPORT_INDEX = (1 << 17),
|
||||||
|
|
||||||
|
/* Texture atomics requires usage options to alter compilation flag. */
|
||||||
|
TEXTURE_ATOMIC = (1 << 18),
|
||||||
|
|
||||||
/* Not a builtin but a flag we use to tag shaders that use the debug features. */
|
/* Not a builtin but a flag we use to tag shaders that use the debug features. */
|
||||||
USE_DEBUG_DRAW = (1 << 29),
|
USE_DEBUG_DRAW = (1 << 29),
|
||||||
USE_DEBUG_PRINT = (1 << 30),
|
USE_DEBUG_PRINT = (1 << 30),
|
||||||
|
|
|
@ -300,6 +300,14 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
|
||||||
options.languageVersion = MTLLanguageVersion2_3;
|
options.languageVersion = MTLLanguageVersion2_3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#if defined(MAC_OS_VERSION_14_0)
|
||||||
|
if (@available(macOS 14.00, *)) {
|
||||||
|
/* Texture atomics require Metal 3.1. */
|
||||||
|
if (bool(info->builtins_ & BuiltinBits::TEXTURE_ATOMIC)) {
|
||||||
|
options.languageVersion = MTLLanguageVersion3_1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
NSString *source_to_compile = shd_builder_->msl_source_vert_;
|
NSString *source_to_compile = shd_builder_->msl_source_vert_;
|
||||||
|
|
||||||
|
|
|
@ -618,6 +618,14 @@ inline MTLTextureUsage mtl_usage_from_gpu(eGPUTextureUsage usage)
|
||||||
if (usage & GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW) {
|
if (usage & GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW) {
|
||||||
mtl_usage = mtl_usage | MTLTextureUsagePixelFormatView;
|
mtl_usage = mtl_usage | MTLTextureUsagePixelFormatView;
|
||||||
}
|
}
|
||||||
|
#if defined(MAC_OS_VERSION_14_0)
|
||||||
|
if (@available(macOS 14.0, *)) {
|
||||||
|
if (usage & GPU_TEXTURE_USAGE_ATOMIC) {
|
||||||
|
|
||||||
|
mtl_usage = mtl_usage | MTLTextureUsageShaderAtomic;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return mtl_usage;
|
return mtl_usage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -201,15 +201,6 @@ template<typename T> T atomicExchange(device T &mem, T data)
|
||||||
return atomic_exchange_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
return atomic_exchange_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Unblock texture atomic compilation.
|
|
||||||
* TODO(Metal): This is not correct for global atomic behavior, but will be safe within a single
|
|
||||||
* thread.
|
|
||||||
* We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */
|
|
||||||
#define imageAtomicMin(tex, coord, data) \
|
|
||||||
uint val = _texelFetch_internal(tex, coord, 0).r; \
|
|
||||||
_texture_write_internal(tex, coord, uint4((val < data) ? val : data)); \
|
|
||||||
tex.texture->fence();
|
|
||||||
|
|
||||||
/* Used to replace 'out' in function parameters with thread-local reference
|
/* Used to replace 'out' in function parameters with thread-local reference
|
||||||
* shortened to avoid expanding the GLSL source string. */
|
* shortened to avoid expanding the GLSL source string. */
|
||||||
#define THD thread
|
#define THD thread
|
||||||
|
@ -984,6 +975,172 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S
|
||||||
tex.texture->write(value, uint3(_coord.xyz));
|
tex.texture->write(value, uint3(_coord.xyz));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Texture atomic operations are only supported in Metal 3.1 and onwards (macOS 14.0 Sonoma). */
|
||||||
|
#if __METAL_VERSION__ >= 310
|
||||||
|
|
||||||
|
/* Image atomic operations. */
|
||||||
|
# define imageAtomicMin(tex, coord, data) _texture_image_atomic_min_internal(tex, coord, data)
|
||||||
|
# define imageAtomicExchange(tex, coord, data) \
|
||||||
|
_texture_image_atomic_exchange_internal(tex, coord, data)
|
||||||
|
|
||||||
|
/* Atomic Min. */
|
||||||
|
template<typename S, access A>
|
||||||
|
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||||
|
int coord,
|
||||||
|
vec<S, 4> data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_fetch_min(uint(coord), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||||
|
int2 coord,
|
||||||
|
vec<S, 4> data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||||
|
int2 coord,
|
||||||
|
vec<S, 4> data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_fetch_min(uint2(coord.xy), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||||
|
int3 coord,
|
||||||
|
vec<S, 4> data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||||
|
int3 coord,
|
||||||
|
vec<S, 4> data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_fetch_min(uint3(coord), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Atomic Exchange. */
|
||||||
|
template<typename S, access A, int N>
|
||||||
|
vec<S, N> _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||||
|
int coord,
|
||||||
|
vec<S, N> data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_exchange(uint(coord), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||||
|
int2 coord,
|
||||||
|
S data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A, int N>
|
||||||
|
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||||
|
int2 coord,
|
||||||
|
S data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_exchange(uint2(coord.xy), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A, int N>
|
||||||
|
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||||
|
int3 coord,
|
||||||
|
S data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A, int N>
|
||||||
|
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||||
|
int3 coord,
|
||||||
|
S data)
|
||||||
|
{
|
||||||
|
return tex.texture->atomic_exchange(uint3(coord), data);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
/* Fallback for imageAtomicMin if atomics are unsupported.
|
||||||
|
* for general concurrent thread access, but will allow inter-thread writing.
|
||||||
|
* This assumes 2D texture array.
|
||||||
|
* NOTE: Implementations should switch to a buffer-backed texture write in these cases. */
|
||||||
|
# define imageAtomicMin(tex, coord, data) \
|
||||||
|
auto val = _texelFetch_internal(tex, coord, 0).r; \
|
||||||
|
tex.texture->fence(); \
|
||||||
|
tex.texture->write(min(val, data), uint2(coord.xy), uint(coord.z)); \
|
||||||
|
tex.texture->fence();
|
||||||
|
|
||||||
|
/* Fallback for atomic Exchange if atomics are unavailable.
|
||||||
|
* NOTE: Implementations should switch to a buffer-backed texture write in these cases. */
|
||||||
|
# define imageAtomicExchange(tex, coord, data) \
|
||||||
|
_texture_image_atomic_exchange_internal_fallback(tex, coord, data)
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
S _texture_image_atomic_exchange_internal_fallback(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||||
|
int coord,
|
||||||
|
S data)
|
||||||
|
{
|
||||||
|
S val = tex.texture->read(uint(coord), data).x;
|
||||||
|
tex.texture->fence();
|
||||||
|
tex.texture->write(data, uint(coord));
|
||||||
|
tex.texture->fence();
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
S _texture_image_atomic_exchange_internal_fallback(
|
||||||
|
thread _mtl_combined_image_sampler_1d_array<S, A> tex, int2 coord, S data)
|
||||||
|
{
|
||||||
|
S val = tex.texture->read(uint(coord.x), uint(coord.y), data).x;
|
||||||
|
tex.texture->fence();
|
||||||
|
tex.texture->write(data, uint(coord.x), uint(coord.y));
|
||||||
|
tex.texture->fence();
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
S _texture_image_atomic_exchange_internal_fallback(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||||
|
int2 coord,
|
||||||
|
S data)
|
||||||
|
{
|
||||||
|
S val = tex.texture->read(uint2(coord), data).x;
|
||||||
|
tex.texture->fence();
|
||||||
|
tex.texture->write(data, uint2(coord.x));
|
||||||
|
tex.texture->fence();
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
S _texture_image_atomic_exchange_internal_fallback(
|
||||||
|
thread _mtl_combined_image_sampler_2d_array<S, A> tex, int3 coord, S data)
|
||||||
|
{
|
||||||
|
S val = tex.texture->read(uint2(coord.xy), uint(coord.z), data).x;
|
||||||
|
tex.texture->fence();
|
||||||
|
tex.texture->write(data, uint2(coord.xy), uint(coord.z));
|
||||||
|
tex.texture->fence();
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename S, access A>
|
||||||
|
S _texture_image_atomic_exchange_internal_fallback(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||||
|
int3 coord,
|
||||||
|
S data)
|
||||||
|
{
|
||||||
|
S val = tex.texture->read(uint3(coord), data).x;
|
||||||
|
tex.texture->fence();
|
||||||
|
tex.texture->write(data, uint3(coord));
|
||||||
|
tex.texture->fence();
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Matrix compare operators. */
|
/* Matrix compare operators. */
|
||||||
/** TODO(fclem): Template. */
|
/** TODO(fclem): Template. */
|
||||||
inline bool operator==(float4x4 a, float4x4 b)
|
inline bool operator==(float4x4 a, float4x4 b)
|
||||||
|
|
Loading…
Reference in New Issue