Cycles: Fix hanging unit tests when MetalRT is enabled

This patch fixes hanging unit tests when MetalRT is enabled. It simplifies and fixes the kernel selection logic by baking the MetalRT-specific options into `kernels_md5` rather than expanding out and testing MetalRT bit flags explicitly.

Pull Request #105270
This commit is contained in:
Michael Jones 2023-02-28 11:42:08 +01:00 committed by Michael Jones (Apple)
parent 9fccd2dc24
commit 7842347ec8
5 changed files with 54 additions and 80 deletions

View File

@ -182,7 +182,7 @@ class Device {
{
}
/* Return true if device is ready for rendering, or report status if not. */
/* Report status and return true if device is ready for rendering. */
virtual bool is_ready(string & /*status*/) const
{
return true;

View File

@ -476,6 +476,9 @@ bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_
MD5Hash md5;
md5.append(constant_values);
md5.append(source[pso_type]);
if (use_metalrt) {
md5.append(string_printf("metalrt_features=%d", kernel_features & METALRT_FEATURE_MASK));
}
kernels_md5[pso_type] = md5.get_hex();
return MetalDeviceKernels::should_load_kernels(this, pso_type);
@ -917,6 +920,17 @@ bool MetalDevice::is_ready(string &status) const
DEVICE_KERNEL_NUM);
return false;
}
if (int num_requests = MetalDeviceKernels::num_incomplete_specialization_requests()) {
status = string_printf("%d kernels to optimize", num_requests);
}
else if (kernel_specialization_level == PSO_SPECIALIZED_INTERSECT) {
status = "Using optimized intersection kernels";
}
else if (kernel_specialization_level == PSO_SPECIALIZED_SHADE) {
status = "Using optimized kernels";
}
metal_printf("MetalDevice::is_ready(...) --> true\n");
return true;
}
@ -953,7 +967,7 @@ void MetalDevice::optimize_for_scene(Scene *scene)
}
if (specialize_in_background) {
if (!MetalDeviceKernels::any_specialization_happening_now()) {
if (MetalDeviceKernels::num_incomplete_specialization_requests() == 0) {
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
specialize_kernels_fn);
}

View File

@ -63,8 +63,7 @@ enum MetalPipelineType {
};
# define METALRT_FEATURE_MASK \
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
KERNEL_FEATURE_OBJECT_MOTION)
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD)
const char *kernel_type_as_string(MetalPipelineType pso_type);
@ -81,7 +80,7 @@ struct MetalKernelPipeline {
KernelData kernel_data_;
bool use_metalrt;
uint32_t metalrt_features = 0;
uint32_t kernel_features = 0;
int threads_per_threadgroup;
@ -104,7 +103,7 @@ struct MetalKernelPipeline {
/* Cache of Metal kernels for each DeviceKernel. */
namespace MetalDeviceKernels {
bool any_specialization_happening_now();
int num_incomplete_specialization_requests();
int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
bool load(MetalDevice *device, MetalPipelineType pso_type);

View File

@ -344,9 +344,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
/* metalrt options */
pipeline->use_metalrt = device->use_metalrt;
pipeline->metalrt_features = device->use_metalrt ?
(device->kernel_features & METALRT_FEATURE_MASK) :
0;
pipeline->kernel_features = device->kernel_features;
{
thread_scoped_lock lock(cache_mutex);
@ -357,65 +355,36 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device)
{
/* metalrt options */
bool use_metalrt = device->use_metalrt;
bool device_metalrt_hair = use_metalrt && device->kernel_features & KERNEL_FEATURE_HAIR;
bool device_metalrt_hair_thick = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
bool device_metalrt_pointcloud = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
bool device_metalrt_motion = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
MetalKernelPipeline *best_pipeline = nullptr;
while (!best_pipeline) {
while (running) {
/* Search all loaded pipelines with matching kernels_md5 checksums. */
MetalKernelPipeline *best_match = nullptr;
{
thread_scoped_lock lock(cache_mutex);
for (auto &pipeline : pipelines[kernel]) {
if (!pipeline->loaded) {
/* still loading - ignore */
continue;
}
bool pipeline_metalrt_hair = pipeline->metalrt_features & KERNEL_FEATURE_HAIR;
bool pipeline_metalrt_hair_thick = pipeline->metalrt_features & KERNEL_FEATURE_HAIR_THICK;
bool pipeline_metalrt_pointcloud = pipeline->metalrt_features & KERNEL_FEATURE_POINTCLOUD;
bool pipeline_metalrt_motion = use_metalrt &&
pipeline->metalrt_features & KERNEL_FEATURE_OBJECT_MOTION;
if (pipeline->use_metalrt != use_metalrt || pipeline_metalrt_hair != device_metalrt_hair ||
pipeline_metalrt_hair_thick != device_metalrt_hair_thick ||
pipeline_metalrt_pointcloud != device_metalrt_pointcloud ||
pipeline_metalrt_motion != device_metalrt_motion) {
/* wrong combination of metalrt options */
continue;
}
if (pipeline->pso_type != PSO_GENERIC) {
if (pipeline->kernels_md5 == device->kernels_md5[PSO_SPECIALIZED_INTERSECT] ||
pipeline->kernels_md5 == device->kernels_md5[PSO_SPECIALIZED_SHADE]) {
best_pipeline = pipeline.get();
for (auto &candidate : pipelines[kernel]) {
if (candidate->loaded &&
candidate->kernels_md5 == device->kernels_md5[candidate->pso_type]) {
/* Replace existing match if candidate is more specialized. */
if (!best_match || candidate->pso_type > best_match->pso_type) {
best_match = candidate.get();
}
}
else if (!best_pipeline) {
best_pipeline = pipeline.get();
}
}
}
if (!best_pipeline) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
if (best_match) {
if (best_match->usage_count == 0 && best_match->pso_type != PSO_GENERIC) {
metal_printf("Swapping in %s version of %s\n",
kernel_type_as_string(best_match->pso_type),
device_kernel_as_string(kernel));
}
best_match->usage_count += 1;
return best_match;
}
}
if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
metal_printf("Swapping in %s version of %s\n",
kernel_type_as_string(best_pipeline->pso_type),
device_kernel_as_string(kernel));
/* Spin until a matching kernel is loaded, or we're shutting down. */
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
best_pipeline->usage_count += 1;
return best_pipeline;
return nullptr;
}
bool MetalKernelPipeline::should_use_binary_archive() const
@ -570,18 +539,14 @@ void MetalKernelPipeline::compile()
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
NSArray *linked_functions = nil;
bool metalrt_hair = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR);
bool metalrt_hair_thick = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR_THICK);
bool metalrt_pointcloud = use_metalrt && (metalrt_features & KERNEL_FEATURE_POINTCLOUD);
if (use_metalrt) {
id<MTLFunction> curve_intersect_default = nil;
id<MTLFunction> curve_intersect_shadow = nil;
id<MTLFunction> point_intersect_default = nil;
id<MTLFunction> point_intersect_shadow = nil;
if (metalrt_hair) {
if (kernel_features & KERNEL_FEATURE_HAIR) {
/* Add curve intersection programs. */
if (metalrt_hair_thick) {
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
/* Slower programs for thick hair since that also slows down ribbons.
* Ideally this should not be needed. */
curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL];
@ -592,7 +557,7 @@ void MetalKernelPipeline::compile()
curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW];
}
}
if (metalrt_pointcloud) {
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT];
point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW];
}
@ -682,15 +647,6 @@ void MetalKernelPipeline::compile()
local_md5.append((uint8_t *)&this->threads_per_threadgroup,
sizeof(this->threads_per_threadgroup));
string options;
if (use_metalrt && kernel_has_intersection(device_kernel)) {
/* incorporate any MetalRT specializations into the archive name */
options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d",
metalrt_hair ? 1 : 0,
metalrt_hair_thick ? 1 : 0,
metalrt_pointcloud ? 1 : 0);
}
/* Replace non-alphanumerical characters with underscores. */
string device_name = [mtlDevice.name UTF8String];
for (char &c : device_name) {
@ -702,7 +658,7 @@ void MetalKernelPipeline::compile()
metalbin_name = device_name;
metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel));
metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type));
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin");
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + ".bin");
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
path_create_directories(metalbin_path);
@ -860,16 +816,15 @@ void MetalDeviceKernels::wait_for_all()
}
}
bool MetalDeviceKernels::any_specialization_happening_now()
int MetalDeviceKernels::num_incomplete_specialization_requests()
{
/* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
* only 1). */
int total = 0;
for (int i = 0; i < g_shaderCacheCount; i++) {
if (g_shaderCache[i].second->incomplete_specialization_requests > 0) {
return true;
}
total += g_shaderCache[i].second->incomplete_specialization_requests;
}
return false;
return total;
}
int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,

View File

@ -706,6 +706,12 @@ void Session::update_status_time(bool show_pause, bool show_done)
string_printf("Sample %d/%d", current_sample, num_samples));
}
/* Append any device-specific status (such as background kernel optimization) */
string device_status;
if (device->is_ready(device_status) && !device_status.empty()) {
substatus += string_printf(" (%s)", device_status.c_str());
}
/* TODO(sergey): Denoising status from the path trace. */
if (show_pause) {