Fix performance regression on Metal/AMD due to new BSDFs

The increased amount of BSDF code from Principled BSDF v2 and the
microfacet BSDF led to a big performance regression on Metal and AMD.
We have not been able to find a good workaround for all scenes.

This change disables the Principled Hair BSDF code when it is not used
in the scene. This makes common benchmark scenes faster, but
performance is still bad in scenes that do use it.

Ref #112596

Pull Request: https://projects.blender.org/blender/blender/pulls/113904
This commit is contained in:
Brecht Van Lommel 2023-10-18 22:17:05 +02:00 committed by Brecht Van Lommel
parent 15d316a51a
commit e11f031d62
5 changed files with 35 additions and 4 deletions

View File

@ -345,6 +345,12 @@ string MetalDevice::preprocess_source(MetalPipelineType pso_type,
break;
case METAL_GPU_AMD:
global_defines += "#define __KERNEL_METAL_AMD__\n";
/* The increased amount of BSDF code leads to a big performance regression
* on AMD. There is currently no workaround to fix this general. Instead
* disable Principled Hair. */
if (kernel_features & KERNEL_FEATURE_NODE_PRINCIPLED_HAIR) {
global_defines += "#define WITH_PRINCIPLED_HAIR\n";
}
break;
case METAL_GPU_APPLE:
global_defines += "#define __KERNEL_METAL_APPLE__\n";

View File

@ -195,6 +195,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
sc, Ng, sd->wi, rand_xy, eval, wo, pdf, sampled_roughness);
*eta = 1.0f;
break;
# ifdef __PRINCIPLED_HAIR__
case CLOSURE_BSDF_HAIR_CHIANG_ID:
label = bsdf_hair_chiang_sample(kg, sc, sd, rand, eval, wo, pdf, sampled_roughness);
*eta = 1.0f;
@ -203,6 +204,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
label = bsdf_hair_huang_sample(kg, sc, sd, rand, eval, wo, pdf, sampled_roughness);
*eta = 1.0f;
break;
# endif
case CLOSURE_BSDF_SHEEN_ID:
label = bsdf_sheen_sample(sc, Ng, sd->wi, rand_xy, eval, wo, pdf);
*sampled_roughness = one_float2();
@ -325,6 +327,7 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
((ccl_private HairBsdf *)sc)->roughness2);
*eta = 1.0f;
break;
# ifdef __PRINCIPLED_HAIR__
case CLOSURE_BSDF_HAIR_CHIANG_ID:
alpha = ((ccl_private ChiangHairBSDF *)sc)->m0_roughness;
*roughness = make_float2(alpha, alpha);
@ -335,6 +338,7 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
*roughness = make_float2(alpha, alpha);
*eta = 1.0f;
break;
# endif
case CLOSURE_BSDF_SHEEN_ID:
alpha = ((ccl_private SheenBsdf *)sc)->roughness;
*roughness = make_float2(alpha, alpha);
@ -409,6 +413,7 @@ ccl_device_inline int bsdf_label(const KernelGlobals kg,
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
label = LABEL_TRANSMIT | LABEL_GLOSSY;
break;
# ifdef __PRINCIPLED_HAIR__
case CLOSURE_BSDF_HAIR_CHIANG_ID:
if (bsdf_is_transmission(sc, wo))
label = LABEL_TRANSMIT | LABEL_GLOSSY;
@ -418,6 +423,7 @@ ccl_device_inline int bsdf_label(const KernelGlobals kg,
case CLOSURE_BSDF_HAIR_HUANG_ID:
label = LABEL_REFLECT | LABEL_GLOSSY;
break;
# endif
case CLOSURE_BSDF_SHEEN_ID:
label = LABEL_REFLECT | LABEL_DIFFUSE;
break;
@ -500,12 +506,14 @@ ccl_device_inline
case CLOSURE_BSDF_GLOSSY_TOON_ID:
eval = bsdf_glossy_toon_eval(sc, sd->wi, wo, pdf);
break;
# ifdef __PRINCIPLED_HAIR__
case CLOSURE_BSDF_HAIR_CHIANG_ID:
eval = bsdf_hair_chiang_eval(kg, sd, sc, wo, pdf);
break;
case CLOSURE_BSDF_HAIR_HUANG_ID:
eval = bsdf_hair_huang_eval(kg, sd, sc, wo, pdf);
break;
# endif
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
eval = bsdf_hair_reflection_eval(sc, sd->wi, wo, pdf);
break;
@ -560,12 +568,14 @@ ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
bsdf_ashikhmin_shirley_blur(sc, roughness);
break;
# ifdef __PRINCIPLED_HAIR__
case CLOSURE_BSDF_HAIR_CHIANG_ID:
bsdf_hair_chiang_blur(sc, roughness);
break;
case CLOSURE_BSDF_HAIR_HUANG_ID:
bsdf_hair_huang_blur(sc, roughness);
break;
# endif
default:
break;
}
@ -593,6 +603,7 @@ ccl_device_inline Spectrum bsdf_albedo(KernelGlobals kg,
albedo *= bsdf_microfacet_estimate_albedo(
kg, sd, (ccl_private const MicrofacetBsdf *)sc, reflection, transmission);
}
# ifdef __PRINCIPLED_HAIR__
else if (sc->type == CLOSURE_BSDF_HAIR_CHIANG_ID) {
/* TODO(lukas): Principled Hair could also be split into a glossy and a transmission component,
* similar to Glass BSDFs. */
@ -601,6 +612,7 @@ ccl_device_inline Spectrum bsdf_albedo(KernelGlobals kg,
else if (sc->type == CLOSURE_BSDF_HAIR_HUANG_ID) {
albedo *= bsdf_hair_huang_albedo(sd, sc);
}
# endif
#endif
return albedo;
}

View File

@ -638,6 +638,7 @@ ccl_device
break;
}
#ifdef __HAIR__
# ifdef __PRINCIPLED_HAIR__
case CLOSURE_BSDF_HAIR_CHIANG_ID:
case CLOSURE_BSDF_HAIR_HUANG_ID: {
uint4 data_node2 = read_node(kg, &offset);
@ -790,6 +791,7 @@ ccl_device
}
break;
}
# endif /* __PRINCIPLED_HAIR__ */
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
Spectrum weight = closure_weight * mix_weight;

View File

@ -75,6 +75,7 @@ CCL_NAMESPACE_BEGIN
#define __PASSES__
#define __PATCH_EVAL__
#define __POINTCLOUD__
#define __PRINCIPLED_HAIR__
#define __RAY_DIFFERENTIALS__
#define __SHADER_RAYTRACE__
#define __SHADOW_CATCHER__
@ -111,6 +112,10 @@ CCL_NAMESPACE_BEGIN
# undef __LIGHT_TREE__
/* Disabled due to compiler crash on Metal/AMD. */
# undef __MNEE__
/* Disable due to performance regression on Metal/AMD. */
# ifndef WITH_PRINCIPLED_HAIR
# undef __PRINCIPLED_HAIR__
# endif
#endif
/* Scene-based selective features compilation. */
@ -1679,9 +1684,7 @@ enum KernelFeatureFlag : uint32_t {
KERNEL_FEATURE_NODE_RAYTRACE = (1U << 6U),
KERNEL_FEATURE_NODE_AOV = (1U << 7U),
KERNEL_FEATURE_NODE_LIGHT_PATH = (1U << 8U),
/* Use denoising kernels and output denoising passes. */
KERNEL_FEATURE_DENOISING = (1U << 9U),
KERNEL_FEATURE_NODE_PRINCIPLED_HAIR = (1U << 9U),
/* Use path tracing kernels. */
KERNEL_FEATURE_PATH_TRACING = (1U << 10U),
@ -1730,6 +1733,9 @@ enum KernelFeatureFlag : uint32_t {
/* Light and shadow linking. */
KERNEL_FEATURE_LIGHT_LINKING = (1U << 27U),
KERNEL_FEATURE_SHADOW_LINKING = (1U << 28U),
/* Use denoising kernels and output denoising passes. */
KERNEL_FEATURE_DENOISING = (1U << 29U),
};
/* Shader node feature mask, to specialize shader evaluation for kernels. */
@ -1742,7 +1748,7 @@ enum KernelFeatureFlag : uint32_t {
#define KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW \
(KERNEL_FEATURE_NODE_BSDF | KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_BUMP | \
KERNEL_FEATURE_NODE_BUMP_STATE | KERNEL_FEATURE_NODE_VORONOI_EXTRA | \
KERNEL_FEATURE_NODE_LIGHT_PATH)
KERNEL_FEATURE_NODE_LIGHT_PATH | KERNEL_FEATURE_NODE_PRINCIPLED_HAIR)
#define KERNEL_FEATURE_NODE_MASK_SURFACE \
(KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW | KERNEL_FEATURE_NODE_RAYTRACE | \
KERNEL_FEATURE_NODE_AOV | KERNEL_FEATURE_NODE_LIGHT_PATH)

View File

@ -859,6 +859,11 @@ class PrincipledHairBsdfNode : public BsdfBaseNode {
NODE_SOCKET_API(NodePrincipledHairParametrization, parametrization)
/* Selected scattering model (near-/far-field). */
NODE_SOCKET_API(NodePrincipledHairModel, model)
virtual int get_feature()
{
return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_PRINCIPLED_HAIR;
}
};
class HairBsdfNode : public BsdfNode {