Fix cycles CUDA sm 1.3 build with 32 bit compiler, tweaked voronoi

and brick code so that it can be uninlined.
This commit is contained in:
Brecht Van Lommel 2012-11-30 07:27:17 +00:00
parent ad2b41bc4b
commit ceedd5bd35
7 changed files with 38 additions and 46 deletions

View File

@ -110,8 +110,7 @@ BF_JACK_LIB_STATIC = '${BF_ZLIB}/lib/libjack.a'
# Cycles
WITH_BF_CYCLES = True
WITH_BF_CYCLES_CUDA_BINARIES = True
#BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21', 'sm_30']
WITH_BF_OIIO = True
WITH_BF_STATICOIIO = True

View File

@ -97,8 +97,7 @@ WITH_BF_JACK = True
# Cycles
WITH_BF_CYCLES = True
WITH_BF_CYCLES_CUDA_BINARIES = True
#BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_20', 'sm_21', 'sm_30']
BF_CYCLES_CUDA_BINARIES_ARCH = ['sm_13', 'sm_20', 'sm_21', 'sm_30']
WITH_BF_OIIO = True
WITH_BF_STATICOIIO = True

View File

@ -102,6 +102,7 @@ CCL_NAMESPACE_BEGIN
#define __IMAGE_TEXTURES__
#define __EXTRA_NODES__
#define __HOLDOUT__
#define __NORMAL_MAP__
#endif
#ifdef __KERNEL_ADV_SHADING__

View File

@ -401,9 +401,13 @@ __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderT
case NODE_LIGHT_FALLOFF:
svm_node_light_falloff(sd, stack, node);
break;
#endif
#ifdef __ANISOTROPIC__
case NODE_TANGENT:
svm_node_tangent(kg, sd, stack, node);
break;
#endif
#ifdef __NORMAL_MAP__
case NODE_NORMAL_MAP:
svm_node_normal_map(kg, sd, stack, node);
break;

View File

@ -28,9 +28,9 @@ __device_noinline float brick_noise(int n) /* fast integer noise */
return 0.5f * ((float)nn / 1073741824.0f);
}
__device_noinline float svm_brick(float3 p, float scale, float mortar_size, float bias,
__device_noinline float2 svm_brick(float3 p, float scale, float mortar_size, float bias,
float brick_width, float row_height, float offset_amount, int offset_frequency,
float squash_amount, int squash_frequency, float *tint)
float squash_amount, int squash_frequency)
{
p *= scale;
@ -50,11 +50,12 @@ __device_noinline float svm_brick(float3 p, float scale, float mortar_size, floa
x = (p.x+offset) - brick_width*bricknum;
y = p.y - row_height*rownum;
*tint = clamp((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias), 0.0f, 1.0f);
return make_float2(
clamp((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias), 0.0f, 1.0f),
return (x < mortar_size || y < mortar_size ||
(x < mortar_size || y < mortar_size ||
x > (brick_width - mortar_size) ||
y > (row_height - mortar_size)) ? 1.0f : 0.0f;
y > (row_height - mortar_size)) ? 1.0f : 0.0f);
}
__device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
@ -70,8 +71,6 @@ __device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack
/* RNA properties */
uint offset_frequency, squash_frequency;
float tint = 0.0f;
decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
decode_node_uchar4(node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, NULL);
@ -92,9 +91,11 @@ __device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack
float offset_amount = __int_as_float(node3.z);
float squash_amount = __int_as_float(node3.w);
float f = svm_brick(co, scale, mortar_size, bias, brick_width, row_height,
offset_amount, offset_frequency, squash_amount, squash_frequency,
&tint);
float2 f2 = svm_brick(co, scale, mortar_size, bias, brick_width, row_height,
offset_amount, offset_frequency, squash_amount, squash_frequency);
float tint = f2.x;
float f = f2.y;
if(f != 1.0f) {
float facm = 1.0f - tint;

View File

@ -42,8 +42,12 @@ __device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d, fl
/* Voronoi / Worley like */
__device_noinline void voronoi(float3 p, NodeDistanceMetric distance_metric, float e, float da[4], float3 pa[4])
__device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
{
float da[4];
float3 pa[4];
NodeDistanceMetric distance_metric = NODE_VORONOI_DISTANCE_SQUARED;
/* returns distances in da and point coords in pa */
int xx, yy, zz, xi, yi, zi;
@ -105,33 +109,20 @@ __device_noinline void voronoi(float3 p, NodeDistanceMetric distance_metric, flo
}
}
}
float4 result = make_float4(pa[n1].x, pa[n1].y, pa[n1].z, da[n1]);
if(n2 != -1)
result = make_float4(pa[n2].x, pa[n2].y, pa[n2].z, da[n2]) - result;
return result;
}
__device float voronoi_Fn(float3 p, int n)
{
float da[4];
float3 pa[4];
voronoi(p, NODE_VORONOI_DISTANCE_SQUARED, 0, da, pa);
return da[n];
}
__device float voronoi_FnFn(float3 p, int n1, int n2)
{
float da[4];
float3 pa[4];
voronoi(p, NODE_VORONOI_DISTANCE_SQUARED, 0, da, pa);
return da[n2] - da[n1];
}
__device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0); }
__device float voronoi_F2(float3 p) { return voronoi_Fn(p, 1); }
__device float voronoi_F3(float3 p) { return voronoi_Fn(p, 2); }
__device float voronoi_F4(float3 p) { return voronoi_Fn(p, 3); }
__device float voronoi_F1F2(float3 p) { return voronoi_FnFn(p, 0, 1); }
__device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0.0f, 0, -1).w; }
__device float voronoi_F2(float3 p) { return voronoi_Fn(p, 0.0f, 1, -1).w; }
__device float voronoi_F3(float3 p) { return voronoi_Fn(p, 0.0f, 2, -1).w; }
__device float voronoi_F4(float3 p) { return voronoi_Fn(p, 0.0f, 3, -1).w; }
__device float voronoi_F1F2(float3 p) { return voronoi_Fn(p, 0.0f, 0, 1).w; }
__device float voronoi_Cr(float3 p)
{

View File

@ -23,21 +23,18 @@ CCL_NAMESPACE_BEGIN
__device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float scale, float3 p)
{
/* compute distance and point coordinate of 4 nearest neighbours */
float da[4];
float3 pa[4];
voronoi(p*scale, NODE_VORONOI_DISTANCE_SQUARED, 1.0f, da, pa);
float4 dpa0 = voronoi_Fn(p*scale, 1.0f, 0, -1);
/* output */
float fac;
float3 color;
if(coloring == NODE_VORONOI_INTENSITY) {
fac = fabsf(da[0]);
fac = fabsf(dpa0.w);
color = make_float3(fac, fac, fac);
}
else {
color = cellnoise_color(pa[0]);
color = cellnoise_color(float4_to_float3(dpa0));
fac = average(color);
}