Cleanup: remove __KERNEL_CPU__

This was tested in some places to check if code was being compiled for the CPU, however this is only defined in the kernel. Checking __KERNEL_GPU__ always works.
2022-07-25 17:38:03 +02:00 · 2022-07-25 17:38:03 +02:00 · f26aa186b2
parent 793d203139
commit f26aa186b2
15 changed files with 24 additions and 34 deletions
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@ -26,7 +26,6 @@
 #  include "util/task.h"
 #  include "util/time.h"

-#  undef __KERNEL_CPU__
 #  define __KERNEL_OPTIX__
 #  include "kernel/device/optix/globals.h"

--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@ -8,7 +8,6 @@

 #  include "util/time.h"

-#  undef __KERNEL_CPU__
 #  define __KERNEL_OPTIX__
 #  include "kernel/device/optix/globals.h"

--- a/intern/cycles/kernel/bvh/util.h
+++ b/intern/cycles/kernel/bvh/util.h
@ -33,7 +33,7 @@ ccl_device_forceinline float intersection_t_offset(const float t)
  return __uint_as_float(bits);
 }

-#if defined(__KERNEL_CPU__)
+#ifndef __KERNEL_GPU__
 ccl_device int intersections_compare(const void *a, const void *b)
 {
  const Intersection *isect_a = (const Intersection *)a;
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@ -3,7 +3,7 @@

 #pragma once

-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
 #  include <fenv.h>
 #endif

--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@ -3,8 +3,6 @@

 #pragma once

-#define __KERNEL_CPU__
-
 /* Release kernel has too much false-positive maybe-uninitialized warnings,
 * which makes it possible to miss actual warnings.
 */
--- a/intern/cycles/kernel/integrator/intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/intersect_shadow.h
@ -51,7 +51,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k
 }

 #ifdef __TRANSPARENT_SHADOWS__
-#  if defined(__KERNEL_CPU__)
+#  ifndef __KERNEL_GPU__
 ccl_device int shadow_intersections_compare(const void *a, const void *b)
 {
  const Intersection *isect_a = (const Intersection *)a;
--- a/intern/cycles/kernel/integrator/path_state.h
+++ b/intern/cycles/kernel/integrator/path_state.h
@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN
 ccl_device_inline void path_state_init_queues(IntegratorState state)
 {
  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
  INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0;
  INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0;
 #endif
--- a/intern/cycles/kernel/integrator/state.h
+++ b/intern/cycles/kernel/integrator/state.h
@ -140,7 +140,7 @@ typedef struct IntegratorStateGPU {
 * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors
 * from a kernel which operates on a shadow catcher state will cause bad memory access. */

-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__

 /* Scalar access on CPU. */

@ -159,7 +159,7 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;
 #  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
    ((state)->nested_struct[array_index].member)

-#else /* __KERNEL_CPU__ */
+#else /* !__KERNEL_GPU__ */

 /* Array access on GPU with Structure-of-Arrays. */

@ -180,6 +180,6 @@ typedef int ConstIntegratorShadowState;
 #  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
    INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)

-#endif /* __KERNEL_CPU__ */
+#endif /* !__KERNEL_GPU__ */

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/integrator/state_util.h
+++ b/intern/cycles/kernel/integrator/state_util.h
@ -338,7 +338,7 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl
  return to_state;
 }

-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
 ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int)
 {
  return INTEGRATOR_STATE(state, path, bounce);
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@ -19,10 +19,6 @@

 #include "kernel/svm/types.h"

-#ifndef __KERNEL_GPU__
-#  define __KERNEL_CPU__
-#endif
-
 CCL_NAMESPACE_BEGIN

 /* Constants */
@ -51,10 +47,10 @@ CCL_NAMESPACE_BEGIN
 #define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U
 #define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U

-#ifdef __KERNEL_CPU__
-#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
-#else
+#ifdef __KERNEL_GPU__
 #  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
+#else
+#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
 #endif

 /* Kernel features */
@ -91,12 +87,12 @@ CCL_NAMESPACE_BEGIN
 #define __BRANCHED_PATH__

 /* Device specific features */
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
 #  ifdef WITH_OSL
 #    define __OSL__
 #  endif
 #  define __VOLUME_RECORD_ALL__
-#endif /* __KERNEL_CPU__ */
+#endif /* !__KERNEL_GPU__ */

 /* MNEE currently causes "Compute function exceeds available temporary registers"
 * on Metal, disabled for now. */
@ -722,7 +718,7 @@ typedef struct ccl_align(16) ShaderClosure
 {
  SHADER_CLOSURE_BASE;

-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
  float pad[2];
 #endif
  float data[10];
@ -1540,15 +1536,15 @@ enum KernelFeatureFlag : uint32_t {
 /* Must be constexpr on the CPU to avoid compile errors because the state types
 * are different depending on the main, shadow or null path. For GPU we don't have
 * C++17 everywhere so can't use it. */
-#ifdef __KERNEL_CPU__
+#ifdef __KERNEL_GPU__
+#  define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
+#  define IF_KERNEL_NODES_FEATURE(feature) \
+    if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
+#else
 #  define IF_KERNEL_FEATURE(feature) \
    if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
 #  define IF_KERNEL_NODES_FEATURE(feature) \
    if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
-#else
-#  define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
-#  define IF_KERNEL_NODES_FEATURE(feature) \
-    if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
 #endif

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/util/profiling.h
+++ b/intern/cycles/kernel/util/profiling.h
@ -3,13 +3,13 @@

 #pragma once

-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
 #  include "util/profiling.h"
 #endif

 CCL_NAMESPACE_BEGIN

-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
 #  define PROFILING_INIT(kg, event) \
    ProfilingHelper profiling_helper((ProfilingState *)&kg->profiler, event)
 #  define PROFILING_EVENT(event) profiling_helper.set_event(event)
@ -22,6 +22,6 @@ CCL_NAMESPACE_BEGIN
 #  define PROFILING_EVENT(event)
 #  define PROFILING_INIT_FOR_SHADER(kg, event)
 #  define PROFILING_SHADER(object, shader)
-#endif /* __KERNEL_CPU__ */
+#endif /* !__KERNEL_GPU__ */

 CCL_NAMESPACE_END
--- a/intern/cycles/test/util_avxf_avx2_test.cpp
+++ b/intern/cycles/test/util_avxf_avx2_test.cpp
@ -2,7 +2,6 @@
 * Copyright 2011-2022 Blender Foundation */

 #define __KERNEL_AVX2__
-#define __KERNEL_CPU__

 #define TEST_CATEGORY_NAME util_avx2

--- a/intern/cycles/test/util_avxf_avx_test.cpp
+++ b/intern/cycles/test/util_avxf_avx_test.cpp
@ -2,7 +2,6 @@
 * Copyright 2011-2022 Blender Foundation */

 #define __KERNEL_AVX__
-#define __KERNEL_CPU__

 #define TEST_CATEGORY_NAME util_avx

--- a/intern/cycles/util/defines.h
+++ b/intern/cycles/util/defines.h
@ -81,7 +81,7 @@
 /* macros */

 /* hints for branch prediction, only use in code that runs a _lot_ */
-#if defined(__GNUC__) && defined(__KERNEL_CPU__)
+#if defined(__GNUC__) && !defined(__KERNEL_GPU__)
 #  define LIKELY(x) __builtin_expect(!!(x), 1)
 #  define UNLIKELY(x) __builtin_expect(!!(x), 0)
 #else
--- a/intern/cycles/util/math_fast.h
+++ b/intern/cycles/util/math_fast.h
@ -420,7 +420,7 @@ ccl_device_inline float fast_expf(float x)
  return fast_exp2f(x / M_LN2_F);
 }

-#if defined(__KERNEL_CPU__) && !defined(_MSC_VER)
+#if !defined(__KERNEL_GPU__) && !defined(_MSC_VER)
 /* MSVC seems to have a code-gen bug here in at least SSE41/AVX, see
 * T78047 and T78869 for details. Just disable for now, it only makes
 * a small difference in denoising performance. */