Fix #107356: Cycles: improve oneAPI error handling

This commit is contained in:
Nikita Sirgienko 2023-05-03 12:05:04 +02:00
parent 8775cf804e
commit 1dcc8e6ffa
1 changed files with 39 additions and 22 deletions

View File

@ -302,6 +302,12 @@ void OneapiDevice::mem_copy_to(device_memory &mem)
<< string_human_readable_size(mem.memory_size()) << ")";
}
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
* because the associated GPU context may be in an invalid state at this point. */
if (have_error()) {
return;
}
if (mem.type == MEM_GLOBAL) {
global_free(mem);
global_alloc(mem);
@ -334,6 +340,12 @@ void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t
<< " data " << size << " bytes";
}
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
* because the associated GPU context may be in an invalid state at this point. */
if (have_error()) {
return;
}
assert(device_queue_);
assert(size != 0);
@ -357,6 +369,12 @@ void OneapiDevice::mem_zero(device_memory &mem)
<< string_human_readable_size(mem.memory_size()) << ")\n";
}
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
* because the associated GPU context may be in an invalid state at this point. */
if (have_error()) {
return;
}
if (!mem.device_pointer) {
mem_alloc(mem);
}
@ -602,33 +620,33 @@ bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t n
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
OneapiDevice::check_usm(queue_, dest, true);
OneapiDevice::check_usm(queue_, src, true);
sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
# ifdef WITH_CYCLES_DEBUG
try {
sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
# ifdef WITH_CYCLES_DEBUG
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
*/
mem_event.wait_and_throw();
return true;
# else
sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
src_type == sycl::usm::alloc::device;
bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
src_type == sycl::usm::alloc::unknown;
/* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
* may not wait until the end of the transfer before using the memory.
*/
if (from_device_to_host || host_or_device_memop_with_offset)
mem_event.wait();
return true;
# endif
}
catch (sycl::exception const &e) {
oneapi_error_string_ = e.what();
return false;
}
# else
sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
src_type == sycl::usm::alloc::device;
bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
src_type == sycl::usm::alloc::unknown;
/* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
* may not wait until the end of the transfer before using the memory.
*/
if (from_device_to_host || host_or_device_memop_with_offset)
mem_event.wait();
return true;
# endif
}
bool OneapiDevice::usm_memset(SyclQueue *queue_,
@ -639,23 +657,22 @@ bool OneapiDevice::usm_memset(SyclQueue *queue_,
assert(queue_);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
OneapiDevice::check_usm(queue_, usm_ptr, true);
sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
# ifdef WITH_CYCLES_DEBUG
try {
sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
# ifdef WITH_CYCLES_DEBUG
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
*/
mem_event.wait_and_throw();
# else
(void)mem_event;
# endif
return true;
}
catch (sycl::exception const &e) {
oneapi_error_string_ = e.what();
return false;
}
# else
(void)mem_event;
return true;
# endif
}
bool OneapiDevice::queue_synchronize(SyclQueue *queue_)