Metal: MTLTexture core implementation for Metal backend, with minimal surrounding functionality.

This covers implementation of the GPUTexture abstraction for the Metal backend, with additional utility functionality as required.

Some components have been temporarily disabled pending dependencies on upcoming Metal backend components, and these will be addressed as the backend is fleshed out.

One core challenge addressed in the Metal backend is the requirement for read/update routines for textures. MTLBlitCommandEncoders offer a limited range of the full functionality provided by OpenGLs texture update and read functions such that a series of compute kernels have been implemented to provide advanced functionality such as data format conversion and partial/swizzled component updates.

This diff is provided in full, but if further division is required for purposes of code review, this can be done.

Authored by Apple: Michael Parkin-White

Ref T96261

Reviewed By: fclem

Maniphest Tasks: T96261

Differential Revision: https://developer.blender.org/D14543
This commit is contained in:
Clément Foucault 2022-04-27 12:34:57 +02:00 committed by Clément Foucault
parent 68ca12a7fc
commit cdd4354c81
24 changed files with 4236 additions and 19 deletions

View File

@ -186,9 +186,17 @@ set(OPENGL_SRC
set(METAL_SRC
metal/mtl_backend.mm
metal/mtl_context.mm
metal/mtl_debug.mm
metal/mtl_texture.mm
metal/mtl_texture_util.mm
metal/mtl_backend.hh
metal/mtl_capabilities.hh
metal/mtl_common.hh
metal/mtl_context.hh
metal/mtl_debug.hh
metal/mtl_texture.hh
)
# Select Backend source based on availability
@ -210,6 +218,18 @@ if(NOT WITH_SYSTEM_GLEW)
)
endif()
set(MSL_SRC
metal/kernels/compute_texture_update.msl
metal/kernels/compute_texture_read.msl
metal/kernels/depth_2d_update_float_frag.glsl
metal/kernels/depth_2d_update_int24_frag.glsl
metal/kernels/depth_2d_update_int32_frag.glsl
metal/kernels/depth_2d_update_vert.glsl
metal/kernels/gpu_shader_fullscreen_blit_vert.glsl
metal/kernels/gpu_shader_fullscreen_blit_frag.glsl
)
set(GLSL_SRC
GPU_shader_shared.h
shaders/opengl/glsl_shader_defines.glsl
@ -392,6 +412,15 @@ foreach(GLSL_FILE ${GLSL_SRC})
data_to_c_simple(${GLSL_FILE} GLSL_C)
endforeach()
if(WITH_METAL_BACKEND)
set(MSL_C)
foreach(MSL_FILE ${MSL_SRC})
data_to_c_simple(${MSL_FILE} MSL_C)
endforeach()
list(APPEND GLSL_C ${MSL_C})
endif()
blender_add_lib(bf_gpu_shaders "${GLSL_C}" "" "" "")
list(APPEND LIB

View File

@ -157,6 +157,7 @@ void GPU_stencil_reference_set(uint reference);
void GPU_stencil_write_mask_set(uint write_mask);
void GPU_stencil_compare_mask_set(uint compare_mask);
eGPUFaceCullTest GPU_face_culling_get(void);
eGPUBlend GPU_blend_get(void);
eGPUDepthTest GPU_depth_test_get(void);
eGPUWriteMask GPU_write_mask_get(void);

View File

@ -175,8 +175,18 @@ typedef enum eGPUDataFormat {
GPU_DATA_UINT_24_8,
GPU_DATA_10_11_11_REV,
GPU_DATA_2_10_10_10_REV,
GPU_DATA_HALF_FLOAT
} eGPUDataFormat;
typedef enum eGPUTextureUsage {
GPU_TEXTURE_USAGE_SHADER_READ = (1 << 0),
GPU_TEXTURE_USAGE_SHADER_WRITE = (1 << 1),
GPU_TEXTURE_USAGE_ATTACHMENT = (1 << 2),
GPU_TEXTURE_USAGE_GENERAL = 0xFF
} eGPUTextureUsage;
ENUM_OPERATORS(eGPUTextureUsage, GPU_TEXTURE_USAGE_GENERAL)
unsigned int GPU_texture_memory_usage_get(void);
/**

View File

@ -48,6 +48,12 @@ void GPU_face_culling(eGPUFaceCullTest culling)
SET_IMMUTABLE_STATE(culling_test, culling);
}
eGPUFaceCullTest GPU_face_culling_get()
{
GPUState &state = Context::get()->state_manager->state;
return (eGPUFaceCullTest)state.culling_test;
}
void GPU_front_facing(bool invert)
{
SET_IMMUTABLE_STATE(invert_facing, invert);

View File

@ -131,7 +131,6 @@ class Texture {
/* TODO(fclem): Legacy. Should be removed at some point. */
virtual uint gl_bindcode_get() const = 0;
int width_get() const
{
return w_;
@ -458,6 +457,72 @@ inline bool validate_data_format(eGPUTextureFormat tex_format, eGPUDataFormat da
}
}
/* Ensure valid upload formats. With format conversion support, certain types can be extended to
* allow upload from differing source formats. If these cases are added, amend accordingly. */
inline bool validate_data_format_mtl(eGPUTextureFormat tex_format, eGPUDataFormat data_format)
{
switch (tex_format) {
case GPU_DEPTH_COMPONENT24:
case GPU_DEPTH_COMPONENT16:
case GPU_DEPTH_COMPONENT32F:
return ELEM(data_format, GPU_DATA_FLOAT, GPU_DATA_UINT);
case GPU_DEPTH24_STENCIL8:
case GPU_DEPTH32F_STENCIL8:
/* Data can be provided as a 4-byte UINT. */
return ELEM(data_format, GPU_DATA_UINT_24_8, GPU_DATA_UINT);
case GPU_R8UI:
case GPU_R16UI:
case GPU_RG16UI:
case GPU_R32UI:
case GPU_RGBA32UI:
case GPU_RGBA16UI:
case GPU_RG8UI:
case GPU_RG32UI:
return data_format == GPU_DATA_UINT;
case GPU_R32I:
case GPU_RG16I:
case GPU_R16I:
case GPU_RGBA8I:
case GPU_RGBA32I:
case GPU_RGBA16I:
case GPU_RG8I:
case GPU_RG32I:
case GPU_R8I:
return data_format == GPU_DATA_INT;
case GPU_R8:
case GPU_RG8:
case GPU_RGBA8:
case GPU_RGBA8_DXT1:
case GPU_RGBA8_DXT3:
case GPU_RGBA8_DXT5:
case GPU_RGBA8UI:
case GPU_SRGB8_A8:
case GPU_SRGB8_A8_DXT1:
case GPU_SRGB8_A8_DXT3:
case GPU_SRGB8_A8_DXT5:
return ELEM(data_format, GPU_DATA_UBYTE, GPU_DATA_FLOAT);
case GPU_RGB10_A2:
return ELEM(data_format, GPU_DATA_2_10_10_10_REV, GPU_DATA_FLOAT);
case GPU_R11F_G11F_B10F:
return ELEM(data_format, GPU_DATA_10_11_11_REV, GPU_DATA_FLOAT);
case GPU_RGBA16F:
return ELEM(data_format, GPU_DATA_HALF_FLOAT, GPU_DATA_FLOAT);
case GPU_RGBA32F:
case GPU_RGBA16:
case GPU_RG32F:
case GPU_RG16F:
case GPU_RG16:
case GPU_R32F:
case GPU_R16F:
case GPU_R16:
case GPU_RGB16F:
return data_format == GPU_DATA_FLOAT;
default:
BLI_assert_msg(0, "Unrecognized data format");
return data_format == GPU_DATA_FLOAT;
}
}
inline eGPUDataFormat to_data_format(eGPUTextureFormat tex_format)
{
switch (tex_format) {

View File

@ -0,0 +1,182 @@
/* MATCHING eGPUTextureType. */
#define GPU_TEXTURE_1D (1 << 0)
#define GPU_TEXTURE_2D (1 << 1)
#define GPU_TEXTURE_3D (1 << 2)
#define GPU_TEXTURE_CUBE (1 << 3)
#define GPU_TEXTURE_ARRAY (1 << 4)
#define GPU_TEXTURE_BUFFER (1 << 5)
#define GPU_TEXTURE_1D_ARRAY (GPU_TEXTURE_1D | GPU_TEXTURE_ARRAY)
#define GPU_TEXTURE_2D_ARRAY (GPU_TEXTURE_2D | GPU_TEXTURE_ARRAY)
#define GPU_TEXTURE_CUBE_ARRAY (GPU_TEXTURE_CUBE | GPU_TEXTURE_ARRAY)
/* Determine input texture type. */
#if IS_DEPTH_FORMAT == 1
# define TEX_NAME_BASE depth
#else
# define TEX_NAME_BASE texture
#endif
#define JOIN(x, y) x##y
#define FUNC_NAME(x, y) JOIN(x, y)
/* Assign parameters based on texture type. */
#if TEX_TYPE == GPU_TEXTURE_1D
# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 1d)
# define DIMS 1
#elif TEX_TYPE == GPU_TEXTURE_2D
# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 2d)
# define DIMS 2
#elif TEX_TYPE == GPU_TEXTURE_3D
# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 3d)
# define DIMS 3
#elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY
# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 1d_array)
# define DIMS 2
#elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY
# define TEX_TYPE_NAME FUNC_NAME(TEX_NAME_BASE, 2d_array)
# define DIMS 3
#endif
/* Position dimensionality for threadgroup. */
#if DIMS == 1
# define POSITION_TYPE uint
#elif DIMS == 2
# define POSITION_TYPE uint2
#elif DIMS == 3
# define POSITION_TYPE uint3
#endif
using namespace metal;
template<typename T> T denormalize(float val)
{
return T(float(DEPTH_SCALE_FACTOR) * val);
};
template<> int denormalize<int>(float val)
{
return int((float(DEPTH_SCALE_FACTOR) * val - 1.0f) / 2.0f);
}
template<> uint denormalize<uint>(float val)
{
return uint(float(DEPTH_SCALE_FACTOR) * val);
}
template<typename T> T convert_type(float type)
{
return T(type);
}
template<> uchar convert_type<uchar>(float val)
{
return uchar(val * float(0xFF));
}
template<> uint convert_type<uint>(float val)
{
return uint(val * double(0xFFFFFFFFu));
}
struct TextureReadParams {
int mip_index;
int extent[3];
int offset[3];
};
#if IS_DEPTH_FORMAT == 1
constexpr sampler pixelSampler = sampler(coord::pixel, address::clamp_to_edge, filter::nearest);
#endif
kernel void compute_texture_read(constant TextureReadParams &params [[buffer(0)]],
device OUTPUT_DATA_TYPE *output_data [[buffer(1)]],
#if IS_DEPTH_FORMAT == 1
TEX_TYPE_NAME<float, access::sample> read_tex [[texture(0)]],
#else
TEX_TYPE_NAME<INPUT_DATA_TYPE, access::read> read_tex
[[texture(0)]],
#endif
POSITION_TYPE position [[thread_position_in_grid]])
{
/* Read colour. */
vec<INPUT_DATA_TYPE, 4> read_colour;
/* 1D TEXTURE */
#if TEX_TYPE == GPU_TEXTURE_1D
/* xx, yy, layer determined by kernel invocation pattern */
uint xx = position[0];
int index = (xx)*COMPONENT_COUNT_OUTPUT;
read_colour = read_tex.read(uint(params.offset[0]) + uint(xx));
/* 2D TEXTURE */
#elif TEX_TYPE == GPU_TEXTURE_2D
/* xx, yy, layer determined by kernel invocation pattern */
uint xx = position[0];
uint yy = position[1];
int index = (yy * params.extent[0] + xx) * COMPONENT_COUNT_OUTPUT;
/* Read data */
# if IS_DEPTH_FORMAT == 1
output_data[index] = denormalize<OUTPUT_DATA_TYPE>(
read_tex.sample(pixelSampler, float2(params.offset[0], params.offset[1]) + float2(xx, yy)));
# else
read_colour = read_tex.read(uint2(params.offset[0], params.offset[1]) + uint2(xx, yy));
# endif
/* 3D TEXTURE */
#elif TEX_TYPE == GPU_TEXTURE_3D
/* xx, yy, layer determined by kernel invocation pattern */
uint xx = position[0];
uint yy = position[1];
uint zz = position[2];
int index = (zz * (params.extent[0] * params.extent[1]) + yy * params.extnt[0] + xx) *
COMPONENT_COUNT_INPUT;
read_colour = read_tex.read(uint3(params.offset[0], params.offset[1], params.offset[2]) +
uint3(xx, yy, zz));
/* 1D ARRAY TEXTURE */
#elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY
/* xx, yy, layer determined by kernel invocation pattern */
uint xx = position[0];
uint layer = position[1];
int index = (layer * params.extent[0] + xx) * COMPONENT_COUNT_OUTPUT;
read_colour = read_tex.read(uint(params.offset[0]) + uint(xx), uint(params.offset[1]) + layer);
/* 2D ARRAY TEXTURE */
#elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY
/* xx, yy, layer determined by kernel invocation pattern */
uint xx = position[0];
uint yy = position[1];
uint layer = position[2];
int index = (layer * (params.extent[0] * params.extent[1]) + yy * params.extent[0] + xx) *
COMPONENT_COUNT_INPUT;
/* Read data */
# if IS_DEPTH_FORMAT == 1
output_data[index] = denormalize<OUTPUT_DATA_TYPE>(
read_tex.sample(pixelSampler,
float2(params.offset[0], params.offset[1]) + float2(xx, yy),
uint(params.offset[2] + layer)));
# else
read_colour = read_tex.read(uint2(params.offset[0], params.offset[1]) + uint2(xx, yy),
uint(params.offset[2] + layer));
# endif
#endif
/* Output per-component colour data. */
#if IS_DEPTH_FORMAT != 1
/* Write data to block */
for (int i = 0; i < WRITE_COMPONENT_COUNT; i++) {
output_data[index + i] = convert_type<OUTPUT_DATA_TYPE>(read_colour[i]);
}
/* Fill in empty cells if more components are being read than exist */
for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) {
output_data[index + i] = convert_type<OUTPUT_DATA_TYPE>(0);
}
#endif
}

View File

@ -0,0 +1,165 @@
using namespace metal;
/* MATCHING eGPUTextureType. */
#define GPU_TEXTURE_1D (1 << 0)
#define GPU_TEXTURE_2D (1 << 1)
#define GPU_TEXTURE_3D (1 << 2)
#define GPU_TEXTURE_CUBE (1 << 3)
#define GPU_TEXTURE_ARRAY (1 << 4)
#define GPU_TEXTURE_BUFFER (1 << 5)
#define GPU_TEXTURE_1D_ARRAY (GPU_TEXTURE_1D | GPU_TEXTURE_ARRAY)
#define GPU_TEXTURE_2D_ARRAY (GPU_TEXTURE_2D | GPU_TEXTURE_ARRAY)
#define GPU_TEXTURE_CUBE_ARRAY (GPU_TEXTURE_CUBE | GPU_TEXTURE_ARRAY)
/* Assign parameters based on texture type. */
#if TEX_TYPE == GPU_TEXTURE_1D
# define TEX_TYPE_NAME texture1d
# define DIMS 1
#elif TEX_TYPE == GPU_TEXTURE_2D
# define TEX_TYPE_NAME texture2d
# define DIMS 2
#elif TEX_TYPE == GPU_TEXTURE_3D
# define TEX_TYPE_NAME texture3d
# define DIMS 3
#elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY
# define TEX_TYPE_NAME texture1d_array
# define DIMS 2
#elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY
# define TEX_TYPE_NAME texture2d_array
# define DIMS 3
#endif
/* Position dimensionality for threadgroup. */
#if DIMS == 1
# define POSITION_TYPE uint
#elif DIMS == 2
# define POSITION_TYPE uint2
#elif DIMS == 3
# define POSITION_TYPE uint3
#endif
float3 mtl_linear_to_srgb_attr(float3 c)
{
c = max(c, float3(0.0));
float3 c1 = c * 12.92;
float3 c2 = 1.055 * pow(c, float3(1.0 / 2.4)) - 0.055;
return mix(c1, c2, step(float3(0.0031308), c));
}
float3 mtl_srgb_to_linear_attr(float3 c)
{
c = max(c, float3(0.0));
float3 c1 = c * (1.0 / 12.92);
float3 c2 = pow((c + 0.055) * (1.0 / 1.055), float3(2.4));
return mix(c1, c2, step(float3(0.04045), c));
}
struct TextureUpdateParams {
int mip_index;
int extent[3];
int offset[3];
uint unpack_row_length;
};
kernel void compute_texture_update(constant TextureUpdateParams &params [[buffer(0)]],
constant INPUT_DATA_TYPE *input_data [[buffer(1)]],
TEX_TYPE_NAME<OUTPUT_DATA_TYPE, access::write> update_tex
[[texture(0)]],
POSITION_TYPE position [[thread_position_in_grid]])
{
/* 1D TEXTURE */
#if TEX_TYPE == GPU_TEXTURE_1D
/* xx, yy, layer determined by kernel invocation pattern */
uint xx = position;
int index = xx * COMPONENT_COUNT_INPUT;
vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output;
for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(input_data[index + i]);
}
for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(0);
}
update_tex.write(output, uint(params.offset[0]) + uint(xx));
/* 2D TEXTURE */
#elif TEX_TYPE == GPU_TEXTURE_2D
/* xx, yy, layer determined by kernel invocation pattern */
uint xx = position[0];
uint yy = position[1];
int index = (yy * params.unpack_row_length + xx) * COMPONENT_COUNT_INPUT;
vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output;
for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(input_data[index + i]);
}
for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(0);
}
update_tex.write(output, uint2(params.offset[0], params.offset[1]) + uint2(xx, yy));
/* 3D TEXTURE */
#elif TEX_TYPE == GPU_TEXTURE_3D
/* xx, yy, zz determined by kernel invocation pattern */
uint xx = position[0];
uint yy = position[1];
uint zz = position[2];
int index = (zz * (params.unpack_row_length * params.extent[1]) + yy * params.unpack_row_length +
xx) *
COMPONENT_COUNT_INPUT;
vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output;
for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(input_data[index + i]);
}
for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(0);
}
update_tex.write(
output, uint3(params.offset[0], params.offset[1], params.offset[2]) + uint3(xx, yy, zz));
/* 1D ARRAY TEXTURE */
#elif TEX_TYPE == GPU_TEXTURE_1D_ARRAY
/* xx, yy, layer determined by kernel invocation pattern */
uint xx = position[0];
uint layer = position[1];
int index = (layer * params.unpack_row_length + xx) * COMPONENT_COUNT_INPUT;
vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output;
for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(input_data[index + i]);
}
for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(0);
}
update_tex.write(
output, uint(params.offset[0]) + uint(xx), uint(params.offset[1]) + uint(layer));
/* 2D ARRAY TEXTURE */
#elif TEX_TYPE == GPU_TEXTURE_2D_ARRAY
/* xx, yy, layer determined by kernel invocation pattern */
uint xx = position[0];
uint yy = position[1];
uint layer = position[2];
int index = (layer * (params.unpack_row_length * params.extent[1]) +
yy * params.unpack_row_length + xx) *
COMPONENT_COUNT_INPUT;
vec<OUTPUT_DATA_TYPE, /*COMPONENT_COUNT_OUTPUT*/ 4> output;
for (int i = 0; i < COMPONENT_COUNT_INPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(input_data[index + i]);
}
for (int i = COMPONENT_COUNT_INPUT; i < COMPONENT_COUNT_OUTPUT; i++) {
output[i] = OUTPUT_DATA_TYPE(0);
}
update_tex.write(
output, uint2(params.offset[0], params.offset[1]) + uint2(xx, yy), params.offset[2] + layer);
#endif
}

View File

@ -0,0 +1,10 @@
uniform sampler2D source_data;
uniform int mip;
in vec2 texCoord_interp;
void main()
{
gl_FragDepth = textureLod(source_data, texCoord_interp, mip).r;
}

View File

@ -0,0 +1,13 @@
uniform isampler2D source_data;
uniform int mip;
in vec2 texCoord_interp;
void main()
{
uint val = textureLod(source_data, texCoord_interp, mip).r;
uint stencil = (val >> 24) & 0xFFu;
uint depth = (val)&0xFFFFFFu;
gl_FragDepth = float(depth) / float(0xFFFFFFu);
}

View File

@ -0,0 +1,12 @@
uniform isampler2D source_data;
uniform int mip;
in vec2 texCoord_interp;
void main()
{
uint val = textureLod(source_data, texCoord_interp, mip).r;
uint depth = (val) & (0xFFFFFFFFu);
gl_FragDepth = float(depth) / float(0xFFFFFFFFu);
}

View File

@ -0,0 +1,33 @@
uniform vec2 extent;
uniform vec2 offset;
uniform vec2 size;
out vec2 texCoord_interp;
in vec2 pos;
void main()
{
vec4 rect = vec4(offset.x, offset.y, offset.x + extent.x, offset.y + extent.y);
rect /= vec4(size, size);
vec4 tex = rect;
rect = rect * 2.0 - 1.0;
/* QUAD */
if (pos.x == 0.0 && pos.y == 0.0) {
rect.xy = rect.xy;
texCoord_interp = tex.xy;
}
else if (pos.x == 0.0 && pos.y == 1.0) {
rect.xy = rect.xw;
texCoord_interp = tex.xw;
}
else if (pos.x == 1.0 && pos.y == 1.0) {
rect.xy = rect.zw;
texCoord_interp = tex.zw;
}
else {
rect.xy = rect.zy;
texCoord_interp = tex.zy;
}
gl_Position = vec4(rect.xy, 0.0f, 1.0f);
}

View File

@ -0,0 +1,12 @@
in vec4 uvcoordsvar;
uniform sampler2D imageTexture;
uniform int mip;
out vec4 fragColor;
void main()
{
vec4 tex_color = textureLod(imageTexture, uvcoordsvar.xy, mip);
fragColor = tex_color;
}

View File

@ -0,0 +1,18 @@
out vec4 uvcoordsvar;
in vec2 pos;
uniform vec2 fullscreen;
uniform vec2 size;
uniform vec2 dst_offset;
uniform vec2 src_offset;
void main()
{
/* The position represents a 0-1 square, we first scale it by the size we want to have it on
* screen next we divide by the fullscreen size, this will bring everything in range [0,1]. Next
* we scale to NDC range [-1,1]. */
gl_Position = vec4((((pos * size + dst_offset) / fullscreen) * 2.0 - 1.0), 1.0, 1.0);
vec2 uvoff = (src_offset / fullscreen);
uvcoordsvar = vec4(pos + uvoff, 0.0, 0.0);
}

View File

@ -11,8 +11,7 @@
#include "gpu_backend.hh"
#include "mtl_capabilities.hh"
namespace blender {
namespace gpu {
namespace blender::gpu {
class Batch;
class DrawList;
@ -20,7 +19,6 @@ class FrameBuffer;
class IndexBuf;
class QueryPool;
class Shader;
class Texture;
class UniformBuf;
class VertBuf;
class MTLContext;
@ -32,6 +30,11 @@ class MTLBackend : public GPUBackend {
/* Capabilities. */
static MTLCapabilities capabilities;
static MTLCapabilities &get_capabilities()
{
return MTLBackend::capabilities;
}
inline ~MTLBackend()
{
MTLBackend::platform_exit();
@ -49,6 +52,11 @@ class MTLBackend : public GPUBackend {
/* Placeholder */
}
void compute_dispatch_indirect(StorageBuf *indirect_buf) override
{
/* Placeholder */
}
/* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
* objects. */
Context *context_alloc(void *ghost_window) override;
@ -60,6 +68,7 @@ class MTLBackend : public GPUBackend {
Shader *shader_alloc(const char *name) override;
Texture *texture_alloc(const char *name) override;
UniformBuf *uniformbuf_alloc(int size, const char *name) override;
StorageBuf *storagebuf_alloc(int size, GPUUsageType usage, const char *name) override;
VertBuf *vertbuf_alloc() override;
/* Render Frame Coordination. */
@ -75,5 +84,4 @@ class MTLBackend : public GPUBackend {
static void capabilities_init(MTLContext *ctx);
};
} // namespace gpu
} // namespace blender
} // namespace blender::gpu

View File

@ -8,6 +8,7 @@
#include "gpu_backend.hh"
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "gpu_capabilities_private.hh"
#include "gpu_platform_private.hh"
@ -16,8 +17,7 @@
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
namespace blender {
namespace gpu {
namespace blender::gpu {
/* Global per-thread AutoReleasePool. */
thread_local NSAutoreleasePool *g_autoreleasepool = nil;
@ -33,8 +33,7 @@ void MTLBackend::samplers_update(){
Context *MTLBackend::context_alloc(void *ghost_window)
{
/* TODO(Metal): Implement MTLContext. */
return nullptr;
return new MTLContext(ghost_window);
};
Batch *MTLBackend::batch_alloc()
@ -75,8 +74,7 @@ Shader *MTLBackend::shader_alloc(const char *name)
Texture *MTLBackend::texture_alloc(const char *name)
{
/* TODO(Metal): Implement MTLTexture. */
return nullptr;
return new gpu::MTLTexture(name);
}
UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name)
@ -85,6 +83,12 @@ UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name)
return nullptr;
};
StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name)
{
/* TODO(Metal): Implement MTLStorageBuf. */
return nullptr;
}
VertBuf *MTLBackend::vertbuf_alloc()
{
/* TODO(Metal): Implement MTLVertBuf. */
@ -404,5 +408,4 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
/** \} */
} // gpu
} // blender
} // blender::gpu

View File

@ -12,11 +12,11 @@ namespace gpu {
/*** Derived from: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf ***/
/** Upper Bound/Fixed Limits **/
#define METAL_MAX_TEXTURE_SLOTS 128
#define METAL_MAX_SAMPLER_SLOTS METAL_MAX_TEXTURE_SLOTS
#define METAL_MAX_UNIFORM_BUFFER_BINDINGS 31
#define METAL_MAX_VERTEX_INPUT_ATTRIBUTES 31
#define METAL_MAX_UNIFORMS_PER_BLOCK 64
#define MTL_MAX_TEXTURE_SLOTS 128
#define MTL_MAX_SAMPLER_SLOTS MTL_MAX_TEXTURE_SLOTS
#define MTL_MAX_UNIFORM_BUFFER_BINDINGS 31
#define MTL_MAX_VERTEX_INPUT_ATTRIBUTES 31
#define MTL_MAX_UNIFORMS_PER_BLOCK 64
/* Context-specific limits -- populated in 'MTLBackend::platform_init' */
typedef struct MTLCapabilities {

View File

@ -0,0 +1,8 @@
#ifndef __MTL_COMMON
#define __MTL_COMMON
// -- Renderer Options --
#define MTL_MAX_SET_BYTES_SIZE 4096
#define MTL_FORCE_WAIT_IDLE 0
#endif

View File

@ -0,0 +1,185 @@
/** \file
* \ingroup gpu
*/
#include "MEM_guardedalloc.h"
#include "gpu_context_private.hh"
#include "GPU_context.h"
#include "mtl_texture.hh"
#include <Cocoa/Cocoa.h>
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
@class CAMetalLayer;
@class MTLCommandQueue;
@class MTLRenderPipelineState;
namespace blender::gpu {
typedef struct MTLContextTextureUtils {
/* Depth Update Utilities */
/* Depth texture updates are not directly supported with Blit operations, similarly, we cannot
* use a compute shader to write to depth, so we must instead render to a depth target.
* These processes use vertex/fragment shaders to render texture data from an intermediate
* source, in order to prime the depth buffer*/
blender::Map<DepthTextureUpdateRoutineSpecialisation, GPUShader *> depth_2d_update_shaders;
GPUShader *fullscreen_blit_shader = nullptr;
/* Texture Read/Update routines */
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
texture_1d_read_compute_psos;
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
texture_1d_array_read_compute_psos;
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
texture_2d_read_compute_psos;
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
texture_2d_array_read_compute_psos;
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
texture_3d_read_compute_psos;
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
texture_cube_read_compute_psos;
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
texture_cube_array_read_compute_psos;
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
texture_buffer_read_compute_psos;
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
texture_1d_update_compute_psos;
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
texture_1d_array_update_compute_psos;
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
texture_2d_update_compute_psos;
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
texture_2d_array_update_compute_psos;
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
texture_3d_update_compute_psos;
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
texture_cube_update_compute_psos;
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
texture_cube_array_update_compute_psos;
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
texture_buffer_update_compute_psos;
template<typename T>
inline void free_cached_pso_map(blender::Map<T, id<MTLComputePipelineState>> &map)
{
for (typename blender::Map<T, id<MTLComputePipelineState>>::MutableItem item : map.items()) {
[item.value release];
}
map.clear();
}
inline void init()
{
fullscreen_blit_shader = nullptr;
}
inline void cleanup()
{
if (fullscreen_blit_shader) {
GPU_shader_free(fullscreen_blit_shader);
}
/* Free Read shader maps */
free_cached_pso_map(texture_1d_read_compute_psos);
free_cached_pso_map(texture_1d_read_compute_psos);
free_cached_pso_map(texture_1d_array_read_compute_psos);
free_cached_pso_map(texture_2d_read_compute_psos);
free_cached_pso_map(texture_2d_array_read_compute_psos);
free_cached_pso_map(texture_3d_read_compute_psos);
free_cached_pso_map(texture_cube_read_compute_psos);
free_cached_pso_map(texture_cube_array_read_compute_psos);
free_cached_pso_map(texture_buffer_read_compute_psos);
free_cached_pso_map(texture_1d_update_compute_psos);
free_cached_pso_map(texture_1d_array_update_compute_psos);
free_cached_pso_map(texture_2d_update_compute_psos);
free_cached_pso_map(texture_2d_array_update_compute_psos);
free_cached_pso_map(texture_3d_update_compute_psos);
free_cached_pso_map(texture_cube_update_compute_psos);
free_cached_pso_map(texture_cube_array_update_compute_psos);
free_cached_pso_map(texture_buffer_update_compute_psos);
}
} MTLContextTextureUtils;
typedef struct MTLContextGlobalShaderPipelineState {
/* ..TODO(Metal): More elements to be added as backend fleshed out.. */
/*** DATA and IMAGE access state ***/
uint unpack_row_length;
} MTLContextGlobalShaderPipelineState;
/* Metal Buffer */
typedef struct MTLTemporaryBufferRange {
id<MTLBuffer> metal_buffer;
void *host_ptr;
unsigned long long buffer_offset;
unsigned long long size;
MTLResourceOptions options;
void flush();
bool requires_flush();
} MTLTemporaryBufferRange;
/** MTLContext -- Core render loop and state management **/
/* Note(Metal): Partial MTLContext stub to provide wrapper functionality
* for work-in-progress MTL* classes. */
class MTLContext : public Context {
friend class MTLBackend;
private:
/* Compute and specialisation caches */
MTLContextTextureUtils texture_utils_;
public:
/* METAL API Resource Handles. */
id<MTLCommandQueue> queue = nil;
id<MTLDevice> device = nil;
/* GPUContext interface. */
MTLContext(void *ghost_window);
~MTLContext();
static void check_error(const char *info);
void activate(void) override;
void deactivate(void) override;
void flush(void) override;
void finish(void) override;
void memory_statistics_get(int *total_mem, int *free_mem) override;
void debug_group_begin(const char *name, int index) override;
void debug_group_end(void) override;
/*** Context Utility functions */
/*
* All below functions modify the global state for the context, controlling the flow of
* rendering, binding resources, setting global state, resource management etc;
*/
/* Metal Context Core functions */
/* Command Buffer Management */
id<MTLCommandBuffer> get_active_command_buffer();
/* Render Pass State and Management */
void begin_render_pass();
void end_render_pass();
/* Shaders and Pipeline state */
MTLContextGlobalShaderPipelineState pipeline_state;
/* Texture utilities */
MTLContextTextureUtils &get_texture_utils()
{
return this->texture_utils_;
}
};
} // namespace blender::gpu

View File

@ -0,0 +1,101 @@
/** \file
* \ingroup gpu
*/
#include "mtl_context.hh"
#include "mtl_debug.hh"
using namespace blender;
using namespace blender::gpu;
namespace blender::gpu {
/* -------------------------------------------------------------------- */
/** \name Memory Management
* \{ */
bool MTLTemporaryBufferRange::requires_flush()
{
/* We do not need to flush shared memory */
return this->options & MTLResourceStorageModeManaged;
}
void MTLTemporaryBufferRange::flush()
{
if (this->requires_flush()) {
BLI_assert(this->metal_buffer);
BLI_assert((this->buffer_offset + this->size) <= [this->metal_buffer length]);
BLI_assert(this->buffer_offset >= 0);
[this->metal_buffer
didModifyRange:NSMakeRange(this->buffer_offset, this->size - this->buffer_offset)];
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name MTLContext
* \{ */
/* Placeholder functions */
MTLContext::MTLContext(void *ghost_window)
{
/* Init debug. */
debug::mtl_debug_init();
/* TODO(Metal): Implement. */
}
MTLContext::~MTLContext()
{
/* TODO(Metal): Implement. */
}
void MTLContext::check_error(const char *info)
{
/* TODO(Metal): Implement. */
}
void MTLContext::activate(void)
{
/* TODO(Metal): Implement. */
}
void MTLContext::deactivate(void)
{
/* TODO(Metal): Implement. */
}
void MTLContext::flush(void)
{
/* TODO(Metal): Implement. */
}
void MTLContext::finish(void)
{
/* TODO(Metal): Implement. */
}
void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
{
/* TODO(Metal): Implement. */
*total_mem = 0;
*free_mem = 0;
}
id<MTLCommandBuffer> MTLContext::get_active_command_buffer()
{
/* TODO(Metal): Implement. */
return nil;
}
/* Render Pass State and Management */
void MTLContext::begin_render_pass()
{
/* TODO(Metal): Implement. */
}
void MTLContext::end_render_pass()
{
/* TODO(Metal): Implement. */
}
/** \} */
} // blender::gpu

View File

@ -0,0 +1,58 @@
/** \file
* \ingroup gpu
*/
#pragma once
#include "BKE_global.h"
#include "CLG_log.h"
namespace blender {
namespace gpu {
namespace debug {
extern CLG_LogRef LOG;
/* Initialise debugging. */
void mtl_debug_init();
/* Using Macro's instead of variadic template due to non-string-literal
* warning for CLG_logf when indirectly passing format string. */
#define EXPAND_ARGS(...) , ##__VA_ARGS__
#define MTL_LOG_ERROR(info, ...) \
{ \
if (G.debug & G_DEBUG_GPU) { \
CLG_logf(debug::LOG.type, \
CLG_SEVERITY_ERROR, \
"[Metal Viewport Error]", \
"", \
info EXPAND_ARGS(__VA_ARGS__)); \
} \
BLI_assert(false); \
}
#define MTL_LOG_WARNING(info, ...) \
{ \
if (G.debug & G_DEBUG_GPU) { \
CLG_logf(debug::LOG.type, \
CLG_SEVERITY_WARN, \
"[Metal Viewport Warning]", \
"", \
info EXPAND_ARGS(__VA_ARGS__)); \
} \
}
#define MTL_LOG_INFO(info, ...) \
{ \
if (G.debug & G_DEBUG_GPU) { \
CLG_logf(debug::LOG.type, \
CLG_SEVERITY_INFO, \
"[Metal Viewport Info]", \
"", \
info EXPAND_ARGS(__VA_ARGS__)); \
} \
}
} // namespace debug
} // namespace gpu
} // namespace blender

View File

@ -0,0 +1,66 @@
/** \file
* \ingroup gpu
*
* Debug features of OpenGL.
*/
#include "BLI_compiler_attrs.h"
#include "BLI_string.h"
#include "BLI_system.h"
#include "BLI_utildefines.h"
#include "BKE_global.h"
#include "GPU_debug.h"
#include "GPU_platform.h"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "CLG_log.h"
#include <utility>
namespace blender::gpu::debug {
CLG_LogRef LOG = {"gpu.debug.metal"};
void mtl_debug_init()
{
CLOG_ENSURE(&LOG);
}
} // namespace blender::gpu::debug
namespace blender::gpu {
/* -------------------------------------------------------------------- */
/** \name Debug Groups
*
* Useful for debugging through XCode GPU Debugger. This ensures all the API calls grouped into
* "passes".
* \{ */
void MTLContext::debug_group_begin(const char *name, int index)
{
if (G.debug & G_DEBUG_GPU) {
id<MTLCommandBuffer> cmd = this->get_active_command_buffer();
if (cmd != nil) {
[cmd pushDebugGroup:[NSString stringWithFormat:@"%s_%d", name, index]];
}
}
}
void MTLContext::debug_group_end()
{
if (G.debug & G_DEBUG_GPU) {
id<MTLCommandBuffer> cmd = this->get_active_command_buffer();
if (cmd != nil) {
[cmd popDebugGroup];
}
}
}
/** \} */
} // namespace blender::gpu

View File

@ -0,0 +1,605 @@
/** \file
* \ingroup gpu
*/
#pragma once
#include <Cocoa/Cocoa.h>
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
#include "BLI_assert.h"
#include "MEM_guardedalloc.h"
#include "gpu_texture_private.hh"
#include "BLI_map.hh"
#include "GPU_texture.h"
#include <mutex>
#include <thread>
@class CAMetalLayer;
@class MTLCommandQueue;
@class MTLRenderPipelineState;
struct GPUFrameBuffer;
/* Texture Update system structs. */
struct TextureUpdateRoutineSpecialisation {
/* The METAL type of data in input array, e.g. half, float, short, int */
std::string input_data_type;
/* The type of the texture data texture2d<T,..>, e.g. T=float, half, int etc. */
std::string output_data_type;
/* Number of image channels provided in input texture data array (min=1, max=4). */
int component_count_input;
/* Number of channels the destination texture has (min=1, max=4). */
int component_count_output;
inline bool operator==(const TextureUpdateRoutineSpecialisation &other) const
{
return ((input_data_type == other.input_data_type) &&
(output_data_type == other.output_data_type) &&
(component_count_input == other.component_count_input) &&
(component_count_output == other.component_count_output));
}
};
template<> struct blender::DefaultHash<TextureUpdateRoutineSpecialisation> {
inline uint64_t operator()(const TextureUpdateRoutineSpecialisation &key) const
{
DefaultHash<std::string> string_hasher;
return (uint64_t)string_hasher(
key.input_data_type + key.output_data_type +
std::to_string((key.component_count_input << 8) + key.component_count_output));
}
};
/* Type of data is being writen to the depth target:
* 0 = floating point (0.0 - 1.0)
* 1 = 24 bit integer (0 - 2^24)
* 2 = 32 bit integer (0 - 2^32) */
typedef enum {
MTL_DEPTH_UPDATE_MODE_FLOAT = 0,
MTL_DEPTH_UPDATE_MODE_INT24 = 1,
MTL_DEPTH_UPDATE_MODE_INT32 = 2
} DepthTextureUpdateMode;
struct DepthTextureUpdateRoutineSpecialisation {
DepthTextureUpdateMode data_mode;
inline bool operator==(const DepthTextureUpdateRoutineSpecialisation &other) const
{
return ((data_mode == other.data_mode));
}
};
template<> struct blender::DefaultHash<DepthTextureUpdateRoutineSpecialisation> {
inline uint64_t operator()(const DepthTextureUpdateRoutineSpecialisation &key) const
{
return (uint64_t)(key.data_mode);
}
};
/* Texture Read system structs. */
struct TextureReadRoutineSpecialisation {
std::string input_data_type;
std::string output_data_type;
int component_count_input;
int component_count_output;
/* Format for depth data.
* 0 = Not a Depth format,
* 1 = FLOAT DEPTH,
* 2 = 24Bit Integer Depth,
* 4 = 32bit unsigned Integer Depth. */
int depth_format_mode;
inline bool operator==(const TextureReadRoutineSpecialisation &other) const
{
return ((input_data_type == other.input_data_type) &&
(output_data_type == other.output_data_type) &&
(component_count_input == other.component_count_input) &&
(component_count_output == other.component_count_output) &&
(depth_format_mode == other.depth_format_mode));
}
};
template<> struct blender::DefaultHash<TextureReadRoutineSpecialisation> {
inline uint64_t operator()(const TextureReadRoutineSpecialisation &key) const
{
DefaultHash<std::string> string_hasher;
return (uint64_t)string_hasher(key.input_data_type + key.output_data_type +
std::to_string((key.component_count_input << 8) +
key.component_count_output +
(key.depth_format_mode << 28)));
}
};
namespace blender::gpu {
class MTLContext;
class MTLVertBuf;
/* Metal Texture internal implementation. */
static const int MTL_MAX_MIPMAP_COUNT = 15; /* Max: 16384x16384 */
static const int MTL_MAX_FBO_ATTACHED = 16;
/* Samplers */
typedef struct MTLSamplerState {
eGPUSamplerState state;
/* Mip min and mip max on sampler state always the same.
* Level range now controlled with textureView to be consistent with GL baseLevel. */
inline bool operator==(const MTLSamplerState &other) const
{
/* Add other parameters as needed. */
return (this->state == other.state);
}
operator unsigned int() const
{
return (unsigned int)state;
}
operator uint64_t() const
{
return (uint64_t)state;
}
} MTLSamplerState;
const MTLSamplerState DEFAULT_SAMPLER_STATE = {GPU_SAMPLER_DEFAULT /*, 0, 9999*/};
} // namespace blender::gpu
template<> struct blender::DefaultHash<blender::gpu::MTLSamplerState> {
inline uint64_t operator()(const blender::gpu::MTLSamplerState &key) const
{
const DefaultHash<unsigned int> uint_hasher;
uint64_t main_hash = (uint64_t)uint_hasher((unsigned int)(key.state));
/* Hash other parameters as needed. */
return main_hash;
}
};
namespace blender::gpu {
class MTLTexture : public Texture {
friend class MTLContext;
friend class MTLStateManager;
friend class MTLFrameBuffer;
private:
/* Where the textures data comes from. */
enum {
MTL_TEXTURE_MODE_DEFAULT, /* Texture is self-initialised (Standard). */
MTL_TEXTURE_MODE_EXTERNAL, /* Texture source from external id<MTLTexture> handle */
MTL_TEXTURE_MODE_VBO, /* Texture source initialised from VBO */
MTL_TEXTURE_MODE_TEXTURE_VIEW /* Texture is a view into an existing texture. */
} resource_mode_;
/* 'baking' refers to the generation of GPU-backed resources. This flag ensures GPU resources are
* ready. Baking is generally deferred until as late as possible, to ensure all associated
* resource state has been specified up-front. */
bool is_baked_;
MTLTextureDescriptor *texture_descriptor_;
id<MTLTexture> texture_;
MTLTextureUsage usage_;
/* Texture Storage. */
id<MTLBuffer> texture_buffer_;
unsigned int aligned_w_ = 0;
/* Blit Framebuffer. */
GPUFrameBuffer *blit_fb_ = nullptr;
unsigned int blit_fb_slice_ = 0;
unsigned int blit_fb_mip_ = 0;
/* Texure view properties */
/* In Metal, we use texture views to either limit mipmap ranges,
* , apply a swizzle mask, or both.
* We apply the mip limit in the view rather than in the sampler, as
* certain effects and functionality such as textureSize rely on the base level
* being modified.
*
* Texture views can also point to external textures, rather than the owned
* texture if MTL_TEXTURE_MODE_TEXTURE_VIEW is used.
* If this mode is used, source_texture points to a GPUTexture from which
* we pull their texture handle as a root.
*/
const GPUTexture *source_texture_ = nullptr;
enum TextureViewDirtyState {
TEXTURE_VIEW_NOT_DIRTY = 0,
TEXTURE_VIEW_SWIZZLE_DIRTY = (1 << 0),
TEXTURE_VIEW_MIP_DIRTY = (1 << 1)
};
id<MTLTexture> mip_swizzle_view_;
char tex_swizzle_mask_[4];
MTLTextureSwizzleChannels mtl_swizzle_mask_;
bool mip_range_dirty_ = false;
int mip_texture_base_level_ = 0;
int mip_texture_max_level_ = 1000;
int mip_texture_base_layer_ = 0;
int texture_view_dirty_flags_ = TEXTURE_VIEW_NOT_DIRTY;
/* Max mip-maps for currently allocated texture resource. */
int mtl_max_mips_ = 1;
/* VBO. */
MTLVertBuf *vert_buffer_;
id<MTLBuffer> vert_buffer_mtl_;
int vert_buffer_offset_;
/* Core parameters and subresources. */
eGPUTextureUsage gpu_image_usage_flags_;
/* Whether the texture's properties or state has changed (e.g. mipmap range), and re-baking of
* GPU resource is required. */
bool is_dirty_;
bool is_bound_;
public:
MTLTexture(const char *name);
MTLTexture(const char *name,
eGPUTextureFormat format,
eGPUTextureType type,
id<MTLTexture> metal_texture);
~MTLTexture();
void update_sub(
int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data) override;
void generate_mipmap(void) override;
void copy_to(Texture *dst) override;
void clear(eGPUDataFormat format, const void *data) override;
void swizzle_set(const char swizzle_mask[4]) override;
void stencil_texture_mode_set(bool use_stencil) override{
/* TODO(Metal): implement. */
};
void mip_range_set(int min, int max) override;
void *read(int mip, eGPUDataFormat type) override;
/* Remove once no longer required -- will just return 0 for now in MTL path*/
uint gl_bindcode_get(void) const override;
bool texture_is_baked();
inline const char *get_name()
{
return name_;
}
protected:
bool init_internal(void) override;
bool init_internal(GPUVertBuf *vbo) override;
bool init_internal(const GPUTexture *src,
int mip_offset,
int layer_offset) override; /* Texture View */
private:
/* Common Constructor, default initialisation */
void mtl_texture_init();
/* Post-construction and member initialisation, prior to baking.
* Called during init_internal */
void prepare_internal();
/* Generate Metal GPU resources and upload data if needed */
void ensure_baked();
/* Delete associated Metal GPU resources. */
void reset();
void ensure_mipmaps(int miplvl);
/* Flags a given mip level as being used. */
void add_subresource(unsigned int level);
void read_internal(int mip,
int x_off,
int y_off,
int z_off,
int width,
int height,
int depth,
eGPUDataFormat desired_output_format,
int num_output_components,
int debug_data_size,
void *r_data);
void bake_mip_swizzle_view();
id<MTLTexture> get_metal_handle();
id<MTLTexture> get_metal_handle_base();
MTLSamplerState get_sampler_state();
void blit(id<MTLBlitCommandEncoder> blit_encoder,
unsigned int src_x_offset,
unsigned int src_y_offset,
unsigned int src_z_offset,
unsigned int src_slice,
unsigned int src_mip,
gpu::MTLTexture *dest,
unsigned int dst_x_offset,
unsigned int dst_y_offset,
unsigned int dst_z_offset,
unsigned int dst_slice,
unsigned int dst_mip,
unsigned int width,
unsigned int height,
unsigned int depth);
void blit(gpu::MTLTexture *dest,
unsigned int src_x_offset,
unsigned int src_y_offset,
unsigned int dst_x_offset,
unsigned int dst_y_offset,
unsigned int src_mip,
unsigned int dst_mip,
unsigned int dst_slice,
int width,
int height);
GPUFrameBuffer *get_blit_framebuffer(unsigned int dst_slice, unsigned int dst_mip);
MEM_CXX_CLASS_ALLOC_FUNCS("gpu::MTLTexture")
/* Texture Update function Utilities. */
/* Metal texture updating does not provide the same range of functionality for type conversiona
* and format compatibilities as are available in OpenGL. To achieve the same level of
* functionality, we need to instead use compute kernels to perform texture data conversions
* where appropriate.
* There are a number of different inputs which affect permutations and thus require different
* shaders and PSOs, such as:
* - Texture format
* - Texture type (e.g. 2D, 3D, 2D Array, Depth etc;)
* - Source data format and component count (e.g. floating point)
*
* MECHANISM:
*
* blender::map<INPUT DEFINES STRUCT, compute PSO> update_2d_array_kernel_psos;
* - Generate compute shader with configured kernel below with variable parameters depending
* on input/output format configurations. Do not need to keep source or descriptors around,
* just PSO, as same input defines will always generate the same code.
*
* - IF datatype IS an exact match e.g. :
* - Per-component size matches (e.g. GPU_DATA_UBYTE)
* OR GPU_DATA_10_11_11_REV && GPU_R11G11B10 (equiv)
* OR D24S8 and GPU_DATA_UINT_24_8
* We can Use BLIT ENCODER.
*
* OTHERWISE TRIGGER COMPUTE:
* - Compute sizes will vary. Threads per grid WILL match 'extent'.
* Dimensions will vary depending on texture type.
* - Will use setBytes with 'TextureUpdateParams' struct to pass in useful member params.
*/
struct TextureUpdateParams {
int mip_index;
int extent[3]; /* Width, Height, Slice on 2D Array tex*/
int offset[3]; /* Width, Height, Slice on 2D Array tex*/
uint unpack_row_length; /* Number of pixels between bytes in input data */
};
id<MTLComputePipelineState> texture_update_1d_get_kernel(
TextureUpdateRoutineSpecialisation specialisation);
id<MTLComputePipelineState> texture_update_1d_array_get_kernel(
TextureUpdateRoutineSpecialisation specialisation);
id<MTLComputePipelineState> texture_update_2d_get_kernel(
TextureUpdateRoutineSpecialisation specialisation);
id<MTLComputePipelineState> texture_update_2d_array_get_kernel(
TextureUpdateRoutineSpecialisation specialisation);
id<MTLComputePipelineState> texture_update_3d_get_kernel(
TextureUpdateRoutineSpecialisation specialisation);
id<MTLComputePipelineState> mtl_texture_update_impl(
TextureUpdateRoutineSpecialisation specialisation_params,
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
&specialisation_cache,
eGPUTextureType texture_type);
/* Depth Update Utilities */
/* Depth texture updates are not directly supported with Blit operations, similarly, we cannot
* use a compute shader to write to depth, so we must instead render to a depth target.
* These processes use vertex/fragment shaders to render texture data from an intermediate
* source, in order to prime the depth buffer*/
GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialisation);
void update_sub_depth_2d(
int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data);
/* Texture Read function utilities -- Follows a similar mechanism to the updating routines */
struct TextureReadParams {
int mip_index;
int extent[3]; /* Width, Height, Slice on 2D Array tex*/
int offset[3]; /* Width, Height, Slice on 2D Array tex*/
};
id<MTLComputePipelineState> texture_read_1d_get_kernel(
TextureReadRoutineSpecialisation specialisation);
id<MTLComputePipelineState> texture_read_1d_array_get_kernel(
TextureReadRoutineSpecialisation specialisation);
id<MTLComputePipelineState> texture_read_2d_get_kernel(
TextureReadRoutineSpecialisation specialisation);
id<MTLComputePipelineState> texture_read_2d_array_get_kernel(
TextureReadRoutineSpecialisation specialisation);
id<MTLComputePipelineState> texture_read_3d_get_kernel(
TextureReadRoutineSpecialisation specialisation);
id<MTLComputePipelineState> mtl_texture_read_impl(
TextureReadRoutineSpecialisation specialisation_params,
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
&specialisation_cache,
eGPUTextureType texture_type);
/* fullscreen blit utilities. */
GPUShader *fullscreen_blit_sh_get();
};
/* Utility */
MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format);
int get_mtl_format_bytesize(MTLPixelFormat tex_format);
int get_mtl_format_num_components(MTLPixelFormat tex_format);
bool mtl_format_supports_blending(MTLPixelFormat format);
/* The type used to define the per-component data in the input buffer. */
inline std::string tex_data_format_to_msl_type_str(eGPUDataFormat type)
{
switch (type) {
case GPU_DATA_FLOAT:
return "float";
case GPU_DATA_HALF_FLOAT:
return "half";
case GPU_DATA_INT:
return "int";
case GPU_DATA_UINT:
return "uint";
case GPU_DATA_UBYTE:
return "uchar";
case GPU_DATA_UINT_24_8:
return "uint"; /* Problematic type - but will match alignment. */
case GPU_DATA_10_11_11_REV:
return "float"; /* Problematic type - each component will be read as a float. */
default:
BLI_assert(false);
break;
}
return "";
}
/* The type T which goes into texture2d<T, access>. */
inline std::string tex_data_format_to_msl_texture_template_type(eGPUDataFormat type)
{
switch (type) {
case GPU_DATA_FLOAT:
return "float";
case GPU_DATA_HALF_FLOAT:
return "half";
case GPU_DATA_INT:
return "int";
case GPU_DATA_UINT:
return "uint";
case GPU_DATA_UBYTE:
return "ushort";
case GPU_DATA_UINT_24_8:
return "uint"; /* Problematic type. */
case GPU_DATA_10_11_11_REV:
return "float"; /* Problematic type. */
default:
BLI_assert(false);
break;
}
return "";
}
/* Determine whether format is writable or not. Use mtl_format_get_writeable_view_format(..) for
* these. */
inline bool mtl_format_is_writable(MTLPixelFormat format)
{
switch (format) {
case MTLPixelFormatRGBA8Unorm_sRGB:
case MTLPixelFormatBGRA8Unorm_sRGB:
case MTLPixelFormatDepth16Unorm:
case MTLPixelFormatDepth32Float:
case MTLPixelFormatDepth32Float_Stencil8:
case MTLPixelFormatBGR10A2Unorm:
case MTLPixelFormatDepth24Unorm_Stencil8:
return false;
default:
return true;
}
return true;
}
/* For the cases where a texture format is unwritable, we can create a texture view of a similar
* format */
inline MTLPixelFormat mtl_format_get_writeable_view_format(MTLPixelFormat format)
{
switch (format) {
case MTLPixelFormatRGBA8Unorm_sRGB:
return MTLPixelFormatRGBA8Unorm;
case MTLPixelFormatBGRA8Unorm_sRGB:
return MTLPixelFormatBGRA8Unorm;
case MTLPixelFormatDepth16Unorm:
return MTLPixelFormatR16Unorm;
case MTLPixelFormatDepth32Float:
return MTLPixelFormatR32Float;
case MTLPixelFormatDepth32Float_Stencil8:
/* return MTLPixelFormatRG32Float; */
/* No alternative mirror format. This should not be used for
* manual data upload */
return MTLPixelFormatInvalid;
case MTLPixelFormatBGR10A2Unorm:
/* return MTLPixelFormatBGRA8Unorm; */
/* No alternative mirror format. This should not be used for
* manual data upload */
return MTLPixelFormatInvalid;
case MTLPixelFormatDepth24Unorm_Stencil8:
/* No direct format, but we'll just mirror the bytes -- Uint
* should ensure bytes are not re-normalized or manipulated */
/* return MTLPixelFormatR32Uint; */
return MTLPixelFormatInvalid;
default:
return format;
}
return format;
}
/* Returns the associated engine data type with a given texture:
* Definitely not complete, edit according to the METAL specification. */
inline eGPUDataFormat to_mtl_internal_data_format(eGPUTextureFormat tex_format)
{
switch (tex_format) {
case GPU_RGBA8:
case GPU_RGBA32F:
case GPU_RGBA16F:
case GPU_RGBA16:
case GPU_RG8:
case GPU_RG32F:
case GPU_RG16F:
case GPU_RG16:
case GPU_R8:
case GPU_R32F:
case GPU_R16F:
case GPU_R16:
case GPU_RGB16F:
case GPU_DEPTH_COMPONENT24:
case GPU_DEPTH_COMPONENT16:
case GPU_DEPTH_COMPONENT32F:
case GPU_SRGB8_A8:
return GPU_DATA_FLOAT;
case GPU_DEPTH24_STENCIL8:
case GPU_DEPTH32F_STENCIL8:
return GPU_DATA_UINT_24_8;
case GPU_RGBA8UI:
case GPU_RGBA32UI:
case GPU_RGBA16UI:
case GPU_RG8UI:
case GPU_RG32UI:
case GPU_R8UI:
case GPU_R16UI:
case GPU_RG16UI:
case GPU_R32UI:
return GPU_DATA_UINT;
case GPU_R8I:
case GPU_RG8I:
case GPU_R16I:
case GPU_R32I:
case GPU_RG16I:
case GPU_RGBA8I:
case GPU_RGBA32I:
case GPU_RGBA16I:
case GPU_RG32I:
return GPU_DATA_INT;
case GPU_R11F_G11F_B10F:
return GPU_DATA_10_11_11_REV;
default:
BLI_assert(false && "Texture not yet handled");
return GPU_DATA_FLOAT;
}
}
} // namespace blender::gpu

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,748 @@
/** \file
* \ingroup gpu
*/
#include "BKE_global.h"
#include "DNA_userdef_types.h"
#include "GPU_batch.h"
#include "GPU_batch_presets.h"
#include "GPU_capabilities.h"
#include "GPU_framebuffer.h"
#include "GPU_platform.h"
#include "GPU_state.h"
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_texture.hh"
/* Utility file for secondary functionality which supports mtl_texture.mm. */
extern char datatoc_compute_texture_update_msl[];
extern char datatoc_depth_2d_update_vert_glsl[];
extern char datatoc_depth_2d_update_float_frag_glsl[];
extern char datatoc_depth_2d_update_int24_frag_glsl[];
extern char datatoc_depth_2d_update_int32_frag_glsl[];
extern char datatoc_compute_texture_read_msl[];
extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[];
extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[];
namespace blender::gpu {
/* -------------------------------------------------------------------- */
/** \name Texture Utility Functions
* \{ */
MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
{
switch (tex_format) {
/* Formats texture & renderbuffer. */
case GPU_RGBA8UI:
return MTLPixelFormatRGBA8Uint;
case GPU_RGBA8I:
return MTLPixelFormatRGBA8Sint;
case GPU_RGBA8:
return MTLPixelFormatRGBA8Unorm;
case GPU_RGBA32UI:
return MTLPixelFormatRGBA32Uint;
case GPU_RGBA32I:
return MTLPixelFormatRGBA32Sint;
case GPU_RGBA32F:
return MTLPixelFormatRGBA32Float;
case GPU_RGBA16UI:
return MTLPixelFormatRGBA16Uint;
case GPU_RGBA16I:
return MTLPixelFormatRGBA16Sint;
case GPU_RGBA16F:
return MTLPixelFormatRGBA16Float;
case GPU_RGBA16:
return MTLPixelFormatRGBA16Unorm;
case GPU_RG8UI:
return MTLPixelFormatRG8Uint;
case GPU_RG8I:
return MTLPixelFormatRG8Sint;
case GPU_RG8:
return MTLPixelFormatRG8Unorm;
case GPU_RG32UI:
return MTLPixelFormatRG32Uint;
case GPU_RG32I:
return MTLPixelFormatRG32Sint;
case GPU_RG32F:
return MTLPixelFormatRG32Float;
case GPU_RG16UI:
return MTLPixelFormatRG16Uint;
case GPU_RG16I:
return MTLPixelFormatRG16Sint;
case GPU_RG16F:
return MTLPixelFormatRG16Float;
case GPU_RG16:
return MTLPixelFormatRG16Float;
case GPU_R8UI:
return MTLPixelFormatR8Uint;
case GPU_R8I:
return MTLPixelFormatR8Sint;
case GPU_R8:
return MTLPixelFormatR8Unorm;
case GPU_R32UI:
return MTLPixelFormatR32Uint;
case GPU_R32I:
return MTLPixelFormatR32Sint;
case GPU_R32F:
return MTLPixelFormatR32Float;
case GPU_R16UI:
return MTLPixelFormatR16Uint;
case GPU_R16I:
return MTLPixelFormatR16Sint;
case GPU_R16F:
return MTLPixelFormatR16Float;
case GPU_R16:
return MTLPixelFormatR16Snorm;
/* Special formats texture & renderbuffer. */
case GPU_R11F_G11F_B10F:
return MTLPixelFormatRG11B10Float;
case GPU_DEPTH32F_STENCIL8:
return MTLPixelFormatDepth32Float_Stencil8;
case GPU_DEPTH24_STENCIL8: {
BLI_assert(false && "GPU_DEPTH24_STENCIL8 not supported by Apple Silicon.");
return MTLPixelFormatDepth24Unorm_Stencil8;
}
case GPU_SRGB8_A8:
return MTLPixelFormatRGBA8Unorm_sRGB;
case GPU_RGB16F:
return MTLPixelFormatRGBA16Float;
/* Depth Formats. */
case GPU_DEPTH_COMPONENT32F:
case GPU_DEPTH_COMPONENT24:
return MTLPixelFormatDepth32Float;
case GPU_DEPTH_COMPONENT16:
return MTLPixelFormatDepth16Unorm;
default:
BLI_assert(!"Unrecognised GPU pixel format!\n");
return MTLPixelFormatRGBA8Unorm;
}
}
int get_mtl_format_bytesize(MTLPixelFormat tex_format)
{
switch (tex_format) {
case MTLPixelFormatRGBA8Uint:
case MTLPixelFormatRGBA8Sint:
case MTLPixelFormatRGBA8Unorm:
return 4;
case MTLPixelFormatRGBA32Uint:
case MTLPixelFormatRGBA32Sint:
case MTLPixelFormatRGBA32Float:
return 16;
case MTLPixelFormatRGBA16Uint:
case MTLPixelFormatRGBA16Sint:
case MTLPixelFormatRGBA16Float:
case MTLPixelFormatRGBA16Unorm:
return 8;
case MTLPixelFormatRG8Uint:
case MTLPixelFormatRG8Sint:
case MTLPixelFormatRG8Unorm:
return 2;
case MTLPixelFormatRG32Uint:
case MTLPixelFormatRG32Sint:
case MTLPixelFormatRG32Float:
return 8;
case MTLPixelFormatRG16Uint:
case MTLPixelFormatRG16Sint:
case MTLPixelFormatRG16Float:
return 4;
case MTLPixelFormatR8Uint:
case MTLPixelFormatR8Sint:
case MTLPixelFormatR8Unorm:
return 1;
case MTLPixelFormatR32Uint:
case MTLPixelFormatR32Sint:
case MTLPixelFormatR32Float:
return 4;
case MTLPixelFormatR16Uint:
case MTLPixelFormatR16Sint:
case MTLPixelFormatR16Float:
case MTLPixelFormatR16Snorm:
return 2;
case MTLPixelFormatRG11B10Float:
return 4;
case MTLPixelFormatDepth32Float_Stencil8:
return 8;
case MTLPixelFormatRGBA8Unorm_sRGB:
case MTLPixelFormatDepth32Float:
case MTLPixelFormatDepth24Unorm_Stencil8:
return 4;
case MTLPixelFormatDepth16Unorm:
return 2;
default:
BLI_assert(!"Unrecognised GPU pixel format!\n");
return 1;
}
}
int get_mtl_format_num_components(MTLPixelFormat tex_format)
{
switch (tex_format) {
case MTLPixelFormatRGBA8Uint:
case MTLPixelFormatRGBA8Sint:
case MTLPixelFormatRGBA8Unorm:
case MTLPixelFormatRGBA32Uint:
case MTLPixelFormatRGBA32Sint:
case MTLPixelFormatRGBA32Float:
case MTLPixelFormatRGBA16Uint:
case MTLPixelFormatRGBA16Sint:
case MTLPixelFormatRGBA16Float:
case MTLPixelFormatRGBA16Unorm:
case MTLPixelFormatRGBA8Unorm_sRGB:
return 4;
case MTLPixelFormatRG11B10Float:
return 3;
case MTLPixelFormatRG8Uint:
case MTLPixelFormatRG8Sint:
case MTLPixelFormatRG8Unorm:
case MTLPixelFormatRG32Uint:
case MTLPixelFormatRG32Sint:
case MTLPixelFormatRG32Float:
case MTLPixelFormatRG16Uint:
case MTLPixelFormatRG16Sint:
case MTLPixelFormatRG16Float:
case MTLPixelFormatDepth32Float_Stencil8:
return 2;
case MTLPixelFormatR8Uint:
case MTLPixelFormatR8Sint:
case MTLPixelFormatR8Unorm:
case MTLPixelFormatR32Uint:
case MTLPixelFormatR32Sint:
case MTLPixelFormatR32Float:
case MTLPixelFormatR16Uint:
case MTLPixelFormatR16Sint:
case MTLPixelFormatR16Float:
case MTLPixelFormatR16Snorm:
case MTLPixelFormatDepth32Float:
case MTLPixelFormatDepth16Unorm:
case MTLPixelFormatDepth24Unorm_Stencil8:
/* Treating this format as single-channel for direct data copies -- Stencil component is not
* addressable. */
return 1;
default:
BLI_assert(!"Unrecognised GPU pixel format!\n");
return 1;
}
}
bool mtl_format_supports_blending(MTLPixelFormat format)
{
/* Add formats as needed -- Verify platforms. */
const MTLCapabilities &capabilities = MTLBackend::get_capabilities();
if (capabilities.supports_family_mac1 || capabilities.supports_family_mac_catalyst1) {
switch (format) {
case MTLPixelFormatA8Unorm:
case MTLPixelFormatR8Uint:
case MTLPixelFormatR8Sint:
case MTLPixelFormatR16Uint:
case MTLPixelFormatR16Sint:
case MTLPixelFormatRG32Uint:
case MTLPixelFormatRG32Sint:
case MTLPixelFormatRGBA8Uint:
case MTLPixelFormatRGBA8Sint:
case MTLPixelFormatRGBA32Uint:
case MTLPixelFormatRGBA32Sint:
case MTLPixelFormatDepth16Unorm:
case MTLPixelFormatDepth32Float:
case MTLPixelFormatInvalid:
case MTLPixelFormatBGR10A2Unorm:
case MTLPixelFormatRGB10A2Uint:
return false;
default:
return true;
}
}
else {
switch (format) {
case MTLPixelFormatA8Unorm:
case MTLPixelFormatR8Uint:
case MTLPixelFormatR8Sint:
case MTLPixelFormatR16Uint:
case MTLPixelFormatR16Sint:
case MTLPixelFormatRG32Uint:
case MTLPixelFormatRG32Sint:
case MTLPixelFormatRGBA8Uint:
case MTLPixelFormatRGBA8Sint:
case MTLPixelFormatRGBA32Uint:
case MTLPixelFormatRGBA32Sint:
case MTLPixelFormatRGBA32Float:
case MTLPixelFormatDepth16Unorm:
case MTLPixelFormatDepth32Float:
case MTLPixelFormatInvalid:
case MTLPixelFormatBGR10A2Unorm:
case MTLPixelFormatRGB10A2Uint:
return false;
default:
return true;
}
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Texture data upload routines
* \{ */
id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl(
TextureUpdateRoutineSpecialisation specialisation_params,
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
&specialisation_cache,
eGPUTextureType texture_type)
{
/* Check whether the Kernel exists. */
id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params);
if (result != nullptr) {
return *result;
}
id<MTLComputePipelineState> return_pso = nil;
@autoreleasepool {
/* Fetch active context. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
/** SOURCE. **/
NSString *tex_update_kernel_src = [NSString
stringWithUTF8String:datatoc_compute_texture_update_msl];
/* Prepare options and specialisations. */
MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
options.languageVersion = MTLLanguageVersion2_2;
options.preprocessorMacros = @{
@"INPUT_DATA_TYPE" :
[NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()],
@"OUTPUT_DATA_TYPE" :
[NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()],
@"COMPONENT_COUNT_INPUT" :
[NSNumber numberWithInt:specialisation_params.component_count_input],
@"COMPONENT_COUNT_OUTPUT" :
[NSNumber numberWithInt:specialisation_params.component_count_output],
@"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))]
};
/* Prepare shader library for conversion routine. */
NSError *error = NULL;
id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src
options:options
error:&error] autorelease];
if (error) {
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
BLI_assert(false);
return nullptr;
}
/* Fetch compute function. */
BLI_assert(temp_lib != nil);
id<MTLFunction> temp_compute_function = [[temp_lib
newFunctionWithName:@"compute_texture_update"] autorelease];
BLI_assert(temp_compute_function);
/* Otherwise, bake new Kernel. */
id<MTLComputePipelineState> compute_pso = [ctx->device
newComputePipelineStateWithFunction:temp_compute_function
error:&error];
if (error || compute_pso == nil) {
NSLog(@"Failed to prepare texture_update MTLComputePipelineState %@", error);
BLI_assert(false);
}
/* Store PSO. */
[compute_pso retain];
specialisation_cache.add_new(specialisation_params, compute_pso);
return_pso = compute_pso;
}
BLI_assert(return_pso != nil);
return return_pso;
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_get_kernel(
TextureUpdateRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_update_impl(specialisation,
mtl_context->get_texture_utils().texture_1d_update_compute_psos,
GPU_TEXTURE_1D);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_array_get_kernel(
TextureUpdateRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_update_impl(
specialisation,
mtl_context->get_texture_utils().texture_1d_array_update_compute_psos,
GPU_TEXTURE_1D_ARRAY);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_get_kernel(
TextureUpdateRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_update_impl(specialisation,
mtl_context->get_texture_utils().texture_2d_update_compute_psos,
GPU_TEXTURE_2D);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_array_get_kernel(
TextureUpdateRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_update_impl(
specialisation,
mtl_context->get_texture_utils().texture_2d_array_update_compute_psos,
GPU_TEXTURE_2D_ARRAY);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_3d_get_kernel(
TextureUpdateRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_update_impl(specialisation,
mtl_context->get_texture_utils().texture_3d_update_compute_psos,
GPU_TEXTURE_3D);
}
/* TODO(Metal): Data upload routine kernel for texture cube and texture cube array.
* Currently does not appear to be hit. */
GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
DepthTextureUpdateRoutineSpecialisation specialisation)
{
/* Check whether the Kernel exists. */
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
GPUShader **result = mtl_context->get_texture_utils().depth_2d_update_shaders.lookup_ptr(
specialisation);
if (result != nullptr) {
return *result;
}
const char *fragment_source = nullptr;
switch (specialisation.data_mode) {
case MTL_DEPTH_UPDATE_MODE_FLOAT:
fragment_source = datatoc_depth_2d_update_float_frag_glsl;
break;
case MTL_DEPTH_UPDATE_MODE_INT24:
fragment_source = datatoc_depth_2d_update_int24_frag_glsl;
break;
case MTL_DEPTH_UPDATE_MODE_INT32:
fragment_source = datatoc_depth_2d_update_int32_frag_glsl;
break;
default:
BLI_assert(false && "Invalid format mode\n");
return nullptr;
}
GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl,
fragment_source,
nullptr,
nullptr,
nullptr,
"depth_2d_update_sh_get");
mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialisation, shader);
return shader;
}
GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get()
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) {
const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl;
const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl;
GPUShader *shader = GPU_shader_create(
vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit");
mtl_context->get_texture_utils().fullscreen_blit_shader = shader;
}
return mtl_context->get_texture_utils().fullscreen_blit_shader;
}
/* Special routine for updating 2D depth textures using the rendering pipeline. */
void gpu::MTLTexture::update_sub_depth_2d(
int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data)
{
/* Verify we are in a valid configuration. */
BLI_assert(ELEM(this->format_,
GPU_DEPTH_COMPONENT24,
GPU_DEPTH_COMPONENT32F,
GPU_DEPTH_COMPONENT16,
GPU_DEPTH24_STENCIL8,
GPU_DEPTH32F_STENCIL8));
BLI_assert(validate_data_format_mtl(this->format_, type));
BLI_assert(ELEM(type, GPU_DATA_FLOAT, GPU_DATA_UINT_24_8, GPU_DATA_UINT));
/* Determine whether we are in GPU_DATA_UINT_24_8 or GPU_DATA_FLOAT mode. */
bool is_float = (type == GPU_DATA_FLOAT);
eGPUTextureFormat format = (is_float) ? GPU_R32F : GPU_R32I;
/* Shader key - Add parameters here for different configurations. */
DepthTextureUpdateRoutineSpecialisation specialisation;
switch (type) {
case GPU_DATA_FLOAT:
specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_FLOAT;
break;
case GPU_DATA_UINT_24_8:
specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT24;
break;
case GPU_DATA_UINT:
specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT32;
break;
default:
BLI_assert(false && "Unsupported eGPUDataFormat being passed to depth texture update\n");
return;
}
/* Push contents into an r32_tex and render contents to depth using a shader. */
GPUTexture *r32_tex_tmp = GPU_texture_create_2d(
"depth_intermediate_copy_tex", this->w_, this->h_, 1, format, nullptr);
GPU_texture_filter_mode(r32_tex_tmp, false);
GPU_texture_wrap_mode(r32_tex_tmp, false, true);
gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(unwrap(r32_tex_tmp));
mtl_tex->update_sub(mip, offset, extent, type, data);
GPUFrameBuffer *restore_fb = GPU_framebuffer_active_get();
GPUFrameBuffer *depth_fb_temp = GPU_framebuffer_create("depth_intermediate_copy_fb");
GPU_framebuffer_texture_attach(depth_fb_temp, wrap(static_cast<Texture *>(this)), 0, mip);
GPU_framebuffer_bind(depth_fb_temp);
if (extent[0] == this->w_ && extent[1] == this->h_) {
/* Skip load if the whole texture is being updated. */
GPU_framebuffer_clear_depth(depth_fb_temp, 0.0);
GPU_framebuffer_clear_stencil(depth_fb_temp, 0);
}
GPUShader *depth_2d_update_sh = depth_2d_update_sh_get(specialisation);
BLI_assert(depth_2d_update_sh != nullptr);
GPUBatch *quad = GPU_batch_preset_quad();
GPU_batch_set_shader(quad, depth_2d_update_sh);
GPU_batch_texture_bind(quad, "source_data", r32_tex_tmp);
GPU_batch_uniform_1i(quad, "mip", mip);
GPU_batch_uniform_2f(quad, "extent", (float)extent[0], (float)extent[1]);
GPU_batch_uniform_2f(quad, "offset", (float)offset[0], (float)offset[1]);
GPU_batch_uniform_2f(quad, "size", (float)this->w_, (float)this->h_);
bool depth_write_prev = GPU_depth_mask_get();
uint stencil_mask_prev = GPU_stencil_mask_get();
eGPUDepthTest depth_test_prev = GPU_depth_test_get();
eGPUStencilTest stencil_test_prev = GPU_stencil_test_get();
GPU_scissor_test(true);
GPU_scissor(offset[0], offset[1], extent[0], extent[1]);
GPU_stencil_write_mask_set(0xFF);
GPU_stencil_reference_set(0);
GPU_stencil_test(GPU_STENCIL_ALWAYS);
GPU_depth_mask(true);
GPU_depth_test(GPU_DEPTH_ALWAYS);
GPU_batch_draw(quad);
GPU_depth_mask(depth_write_prev);
GPU_stencil_write_mask_set(stencil_mask_prev);
GPU_stencil_test(stencil_test_prev);
GPU_depth_test(depth_test_prev);
if (restore_fb != nullptr) {
GPU_framebuffer_bind(restore_fb);
}
else {
GPU_framebuffer_restore();
}
GPU_framebuffer_free(depth_fb_temp);
GPU_texture_free(r32_tex_tmp);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Texture data read routines
* \{ */
id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
TextureReadRoutineSpecialisation specialisation_params,
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
&specialisation_cache,
eGPUTextureType texture_type)
{
/* Check whether the Kernel exists. */
id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params);
if (result != nullptr) {
return *result;
}
id<MTLComputePipelineState> return_pso = nil;
@autoreleasepool {
/* Fetch active context. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
/** SOURCE. **/
NSString *tex_update_kernel_src = [NSString
stringWithUTF8String:datatoc_compute_texture_read_msl];
/* Defensive Debug Checks. */
long long int depth_scale_factor = 1;
if (specialisation_params.depth_format_mode > 0) {
BLI_assert(specialisation_params.component_count_input == 1);
BLI_assert(specialisation_params.component_count_output == 1);
switch (specialisation_params.depth_format_mode) {
case 1:
/* FLOAT */
depth_scale_factor = 1;
break;
case 2:
/* D24 unsigned int */
depth_scale_factor = 0xFFFFFFu;
break;
case 4:
/* D32 unsigned int */
depth_scale_factor = 0xFFFFFFFFu;
break;
default:
BLI_assert_msg(0, "Unrecognised mode");
break;
}
}
/* Prepare options and specialisations. */
MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
options.languageVersion = MTLLanguageVersion2_2;
options.preprocessorMacros = @{
@"INPUT_DATA_TYPE" :
[NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()],
@"OUTPUT_DATA_TYPE" :
[NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()],
@"COMPONENT_COUNT_INPUT" :
[NSNumber numberWithInt:specialisation_params.component_count_input],
@"COMPONENT_COUNT_OUTPUT" :
[NSNumber numberWithInt:specialisation_params.component_count_output],
@"WRITE_COMPONENT_COUNT" :
[NSNumber numberWithInt:min_ii(specialisation_params.component_count_input,
specialisation_params.component_count_output)],
@"IS_DEPTH_FORMAT" :
[NSNumber numberWithInt:((specialisation_params.depth_format_mode > 0) ? 1 : 0)],
@"DEPTH_SCALE_FACTOR" : [NSNumber numberWithLongLong:depth_scale_factor],
@"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))]
};
/* Prepare shader library for conversion routine. */
NSError *error = NULL;
id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src
options:options
error:&error] autorelease];
if (error) {
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
BLI_assert(false);
return nil;
}
/* Fetch compute function. */
BLI_assert(temp_lib != nil);
id<MTLFunction> temp_compute_function = [[temp_lib newFunctionWithName:@"compute_texture_read"]
autorelease];
BLI_assert(temp_compute_function);
/* Otherwise, bake new Kernel. */
id<MTLComputePipelineState> compute_pso = [ctx->device
newComputePipelineStateWithFunction:temp_compute_function
error:&error];
if (error || compute_pso == nil) {
NSLog(@"Failed to prepare texture_read MTLComputePipelineState %@", error);
BLI_assert(false);
return nil;
}
/* Store PSO. */
[compute_pso retain];
specialisation_cache.add_new(specialisation_params, compute_pso);
return_pso = compute_pso;
}
BLI_assert(return_pso != nil);
return return_pso;
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_get_kernel(
TextureReadRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_read_impl(specialisation,
mtl_context->get_texture_utils().texture_2d_read_compute_psos,
GPU_TEXTURE_2D);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_array_get_kernel(
TextureReadRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_read_impl(specialisation,
mtl_context->get_texture_utils().texture_2d_array_read_compute_psos,
GPU_TEXTURE_2D_ARRAY);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_get_kernel(
TextureReadRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_read_impl(specialisation,
mtl_context->get_texture_utils().texture_1d_read_compute_psos,
GPU_TEXTURE_1D);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_array_get_kernel(
TextureReadRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_read_impl(specialisation,
mtl_context->get_texture_utils().texture_1d_array_read_compute_psos,
GPU_TEXTURE_1D_ARRAY);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_3d_get_kernel(
TextureReadRoutineSpecialisation specialisation)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_read_impl(specialisation,
mtl_context->get_texture_utils().texture_3d_read_compute_psos,
GPU_TEXTURE_3D);
}
/** \} */
} // namespace blender::gpu