Deduplicate CUDA and OpenCL wranglers

For now it was mainly about OpenCL wrangler being duplicated
between Cycles and Compositor, but with OpenSubdiv work those
wranglers were gonna to be duplicated just once again.

This commit makes it so Cycles and Compositor uses wranglers
from this repositories:

  - https://github.com/CudaWrangler/cuew
  - https://github.com/OpenCLWrangler/clew

This repositories are based on the wranglers we used before
and they'll be likely continued maintaining by us plus some
more players in the market.

Pretty much straightforward change with some tricks in the
CMake/SCons to make this libs being passed to the linker
after all other libraries in order to make OpenSubdiv linked
against those wranglers in the future.

For those who're worrying about Cycles being less standalone,
it's not truth, it's rather more flexible now and in the future
different wranglers might be used in Cycles. For now it'll
just mean those libs would need to be put into Cycles repository
together with some other libs from Blender such as mikkspace.

This is mainly platform maintenance commit, should not be any
changes to the user space.

Reviewers: juicyfruit, dingto, campbellbarton

Reviewed By: juicyfruit, dingto, campbellbarton

Differential Revision: https://developer.blender.org/D707
This commit is contained in:
Sergey Sharybin 2014-08-05 13:57:50 +06:00
parent a3fac84c73
commit 77b7e1fe9a
46 changed files with 6143 additions and 4612 deletions

View File

@ -70,7 +70,7 @@ quickdebug = None
##### BEGIN SETUP #####
B.possible_types = ['core', 'player', 'player2', 'intern', 'extern']
B.possible_types = ['core', 'player', 'player2', 'intern', 'extern', 'system']
B.binarykind = ['blender' , 'blenderplayer']
##################################
@ -815,7 +815,7 @@ SConscript(B.root_build_dir+'/extern/SConscript')
# libraries to give as objects to linking phase
mainlist = []
for tp in B.possible_types:
if (not tp == 'player') and (not tp == 'player2'):
if (not tp == 'player') and (not tp == 'player2') and (not tp == 'system'):
mainlist += B.create_blender_liblist(env, tp)
if B.arguments.get('BF_PRIORITYLIST', '0')=='1':
@ -826,6 +826,11 @@ creob = B.creator(env)
thestatlibs, thelibincs = B.setup_staticlibs(env)
thesyslibs = B.setup_syslibs(env)
# Hack to pass OSD libraries to linker before extern_{clew,cuew}
for x in B.create_blender_liblist(env, 'system'):
thesyslibs.append(os.path.basename(x))
thelibincs.append(os.path.dirname(x))
if 'blender' in B.targets or not env['WITH_BF_NOBLENDER']:
env.BlenderProg(B.root_build_dir, "blender", creob + mainlist + thestatlibs + dobj, thesyslibs, [B.root_build_dir+'/lib'] + thelibincs, 'blender')
if env['WITH_BF_PLAYER']:

View File

@ -411,6 +411,12 @@ macro(setup_liblinks
endif()
target_link_libraries(${target} ${PLATFORM_LINKLIBS} ${CMAKE_DL_LIBS})
# We put CLEW and CUEW here because OPENSUBDIV_LIBRARIES dpeends on them..
if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
target_link_libraries(blender "extern_clew")
target_link_libraries(blender "extern_cuew")
endif()
endmacro()
macro(SETUP_BLENDER_SORTED_LIBS)

View File

@ -70,6 +70,11 @@ if(WITH_LZMA)
add_subdirectory(lzma)
endif()
if(WITH_CYCLES OR WITH_COMPOSITOR)
add_subdirectory(clew)
add_subdirectory(cuew)
endif()
if(WITH_MOD_BOOLEAN)
add_subdirectory(carve)
endif()

4
extern/SConscript vendored
View File

@ -20,6 +20,10 @@ if env['WITH_BF_ELTOPO']:
if env['WITH_BF_BULLET']:
SConscript(['bullet2/src/SConscript'])
if env['WITH_BF_COMPOSITOR'] or env['WITH_BF_CYCLES']:
SConscript (['clew/SConscript'])
SConscript (['cuew/SConscript'])
if env['WITH_BF_OPENJPEG'] and env['BF_OPENJPEG_LIB'] == '':
SConscript(['libopenjpeg/SConscript'])

42
extern/clew/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,42 @@
# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# The Original Code is Copyright (C) 2006, Blender Foundation
# All rights reserved.
#
# The Original Code is: all of this file.
#
# Contributor(s): Jacques Beaurain.
#
# ***** END GPL LICENSE BLOCK *****
set(INC
.
include
)
set(INC_SYS
)
set(SRC
include/clew.h
src/clew.c
)
add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS)
blender_add_lib(extern_clew "${SRC}" "${INC}" "${INC_SYS}")

35
extern/clew/SConscript vendored Normal file
View File

@ -0,0 +1,35 @@
#!/usr/bin/env python
#
# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# The Original Code is Copyright (C) 2006, Blender Foundation
# All rights reserved.
#
# The Original Code is: all of this file.
#
# Contributor(s): Nathan Letwory.
#
# ***** END GPL LICENSE BLOCK *****
Import ('env')
sources = env.Glob('src/clew.c')
incs = 'include'
defs = ['CL_USE_DEPRECATED_OPENCL_1_1_APIS']
env.BlenderLib ('extern_clew', sources, Split(incs), defines=defs, libtype=['system'], priority = [999])

2759
extern/clew/include/clew.h vendored Normal file

File diff suppressed because it is too large Load Diff

382
extern/clew/src/clew.c vendored Normal file
View File

@ -0,0 +1,382 @@
//////////////////////////////////////////////////////////////////////////
// Copyright (c) 2009 Organic Vectory B.V.
// Written by George van Venrooij
//
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file license.txt)
//////////////////////////////////////////////////////////////////////////
#include "clew.h"
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h>
typedef HMODULE CLEW_DYNLIB_HANDLE;
#define CLEW_DYNLIB_OPEN LoadLibrary
#define CLEW_DYNLIB_CLOSE FreeLibrary
#define CLEW_DYNLIB_IMPORT GetProcAddress
#else
#include <dlfcn.h>
typedef void* CLEW_DYNLIB_HANDLE;
#define CLEW_DYNLIB_OPEN(path) dlopen(path, RTLD_NOW | RTLD_GLOBAL)
#define CLEW_DYNLIB_CLOSE dlclose
#define CLEW_DYNLIB_IMPORT dlsym
#endif
#include <stdlib.h>
//! \brief module handle
static CLEW_DYNLIB_HANDLE module = NULL;
// Variables holding function entry points
PFNCLGETPLATFORMIDS __clewGetPlatformIDs = NULL;
PFNCLGETPLATFORMINFO __clewGetPlatformInfo = NULL;
PFNCLGETDEVICEIDS __clewGetDeviceIDs = NULL;
PFNCLGETDEVICEINFO __clewGetDeviceInfo = NULL;
PFNCLCREATESUBDEVICES __clewCreateSubDevices = NULL;
PFNCLRETAINDEVICE __clewRetainDevice = NULL;
PFNCLRELEASEDEVICE __clewReleaseDevice = NULL;
PFNCLCREATECONTEXT __clewCreateContext = NULL;
PFNCLCREATECONTEXTFROMTYPE __clewCreateContextFromType = NULL;
PFNCLRETAINCONTEXT __clewRetainContext = NULL;
PFNCLRELEASECONTEXT __clewReleaseContext = NULL;
PFNCLGETCONTEXTINFO __clewGetContextInfo = NULL;
PFNCLCREATECOMMANDQUEUE __clewCreateCommandQueue = NULL;
PFNCLRETAINCOMMANDQUEUE __clewRetainCommandQueue = NULL;
PFNCLRELEASECOMMANDQUEUE __clewReleaseCommandQueue = NULL;
PFNCLGETCOMMANDQUEUEINFO __clewGetCommandQueueInfo = NULL;
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
PFNCLSETCOMMANDQUEUEPROPERTY __clewSetCommandQueueProperty = NULL;
#endif
PFNCLCREATEBUFFER __clewCreateBuffer = NULL;
PFNCLCREATESUBBUFFER __clewCreateSubBuffer = NULL;
PFNCLCREATEIMAGE __clewCreateImage = NULL;
PFNCLRETAINMEMOBJECT __clewRetainMemObject = NULL;
PFNCLRELEASEMEMOBJECT __clewReleaseMemObject = NULL;
PFNCLGETSUPPORTEDIMAGEFORMATS __clewGetSupportedImageFormats = NULL;
PFNCLGETMEMOBJECTINFO __clewGetMemObjectInfo = NULL;
PFNCLGETIMAGEINFO __clewGetImageInfo = NULL;
PFNCLSETMEMOBJECTDESTRUCTORCALLBACK __clewSetMemObjectDestructorCallback = NULL;
PFNCLCREATESAMPLER __clewCreateSampler = NULL;
PFNCLRETAINSAMPLER __clewRetainSampler = NULL;
PFNCLRELEASESAMPLER __clewReleaseSampler = NULL;
PFNCLGETSAMPLERINFO __clewGetSamplerInfo = NULL;
PFNCLCREATEPROGRAMWITHSOURCE __clewCreateProgramWithSource = NULL;
PFNCLCREATEPROGRAMWITHBINARY __clewCreateProgramWithBinary = NULL;
PFNCLCREATEPROGRAMWITHBUILTINKERNELS __clewCreateProgramWithBuiltInKernels = NULL;
PFNCLRETAINPROGRAM __clewRetainProgram = NULL;
PFNCLRELEASEPROGRAM __clewReleaseProgram = NULL;
PFNCLBUILDPROGRAM __clewBuildProgram = NULL;
PFNCLGETPROGRAMINFO __clewGetProgramInfo = NULL;
PFNCLGETPROGRAMBUILDINFO __clewGetProgramBuildInfo = NULL;
PFNCLCREATEKERNEL __clewCreateKernel = NULL;
PFNCLCREATEKERNELSINPROGRAM __clewCreateKernelsInProgram = NULL;
PFNCLRETAINKERNEL __clewRetainKernel = NULL;
PFNCLRELEASEKERNEL __clewReleaseKernel = NULL;
PFNCLSETKERNELARG __clewSetKernelArg = NULL;
PFNCLGETKERNELINFO __clewGetKernelInfo = NULL;
PFNCLGETKERNELWORKGROUPINFO __clewGetKernelWorkGroupInfo = NULL;
PFNCLWAITFOREVENTS __clewWaitForEvents = NULL;
PFNCLGETEVENTINFO __clewGetEventInfo = NULL;
PFNCLCREATEUSEREVENT __clewCreateUserEvent = NULL;
PFNCLRETAINEVENT __clewRetainEvent = NULL;
PFNCLRELEASEEVENT __clewReleaseEvent = NULL;
PFNCLSETUSEREVENTSTATUS __clewSetUserEventStatus = NULL;
PFNCLSETEVENTCALLBACK __clewSetEventCallback = NULL;
PFNCLGETEVENTPROFILINGINFO __clewGetEventProfilingInfo = NULL;
PFNCLFLUSH __clewFlush = NULL;
PFNCLFINISH __clewFinish = NULL;
PFNCLENQUEUEREADBUFFER __clewEnqueueReadBuffer = NULL;
PFNCLENQUEUEREADBUFFERRECT __clewEnqueueReadBufferRect = NULL;
PFNCLENQUEUEWRITEBUFFER __clewEnqueueWriteBuffer = NULL;
PFNCLENQUEUEWRITEBUFFERRECT __clewEnqueueWriteBufferRect = NULL;
PFNCLENQUEUECOPYBUFFER __clewEnqueueCopyBuffer = NULL;
PFNCLENQUEUEREADIMAGE __clewEnqueueReadImage = NULL;
PFNCLENQUEUEWRITEIMAGE __clewEnqueueWriteImage = NULL;
PFNCLENQUEUECOPYIMAGE __clewEnqueueCopyImage = NULL;
PFNCLENQUEUECOPYBUFFERRECT __clewEnqueueCopyBufferRect = NULL;
PFNCLENQUEUECOPYIMAGETOBUFFER __clewEnqueueCopyImageToBuffer = NULL;
PFNCLENQUEUECOPYBUFFERTOIMAGE __clewEnqueueCopyBufferToImage = NULL;
PFNCLENQUEUEMAPBUFFER __clewEnqueueMapBuffer = NULL;
PFNCLENQUEUEMAPIMAGE __clewEnqueueMapImage = NULL;
PFNCLENQUEUEUNMAPMEMOBJECT __clewEnqueueUnmapMemObject = NULL;
PFNCLENQUEUENDRANGEKERNEL __clewEnqueueNDRangeKernel = NULL;
PFNCLENQUEUETASK __clewEnqueueTask = NULL;
PFNCLENQUEUENATIVEKERNEL __clewEnqueueNativeKernel = NULL;
PFNCLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM __clewGetExtensionFunctionAddressForPlatform = NULL;
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
PFNCLCREATEIMAGE2D __clewCreateImage2D = NULL;
PFNCLCREATEIMAGE3D __clewCreateImage3D = NULL;
PFNCLENQUEUEMARKER __clewEnqueueMarker = NULL;
PFNCLENQUEUEWAITFOREVENTS __clewEnqueueWaitForEvents = NULL;
PFNCLENQUEUEBARRIER __clewEnqueueBarrier = NULL;
PFNCLUNLOADCOMPILER __clewUnloadCompiler = NULL;
PFNCLGETEXTENSIONFUNCTIONADDRESS __clewGetExtensionFunctionAddress = NULL;
#endif
/* cl_gl */
PFNCLCREATEFROMGLBUFFER __clewCreateFromGLBuffer = NULL;
PFNCLCREATEFROMGLTEXTURE __clewCreateFromGLTexture = NULL;
PFNCLCREATEFROMGLRENDERBUFFER __clewCreateFromGLRenderbuffer = NULL;
PFNCLGETGLOBJECTINFO __clewGetGLObjectInfo = NULL;
PFNCLGETGLTEXTUREINFO __clewGetGLTextureInfo = NULL;
PFNCLENQUEUEACQUIREGLOBJECTS __clewEnqueueAcquireGLObjects = NULL;
PFNCLENQUEUERELEASEGLOBJECTS __clewEnqueueReleaseGLObjects = NULL;
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
PFNCLCREATEFROMGLTEXTURE2D __clewCreateFromGLTexture2D = NULL;
PFNCLCREATEFROMGLTEXTURE3D __clewCreateFromGLTexture3D = NULL;
#endif
PFNCLGETGLCONTEXTINFOKHR __clewGetGLContextInfoKHR = NULL;
static void clewExit(void)
{
if (module != NULL)
{
// Ignore errors
CLEW_DYNLIB_CLOSE(module);
module = NULL;
}
}
int clewInit()
{
#ifdef _WIN32
const char *path = "OpenCL.dll";
#elif defined(__APPLE__)
const char *path = "/Library/Frameworks/OpenCL.framework/OpenCL";
#else
const char *path = "libOpenCL.so";
#endif
int error = 0;
// Check if already initialized
if (module != NULL)
{
return CLEW_SUCCESS;
}
// Load library
module = CLEW_DYNLIB_OPEN(path);
// Check for errors
if (module == NULL)
{
return CLEW_ERROR_OPEN_FAILED;
}
// Set unloading
error = atexit(clewExit);
if (error)
{
// Failure queuing atexit, shutdown with error
CLEW_DYNLIB_CLOSE(module);
module = NULL;
return CLEW_ERROR_ATEXIT_FAILED;
}
// Determine function entry-points
__clewGetPlatformIDs = (PFNCLGETPLATFORMIDS )CLEW_DYNLIB_IMPORT(module, "clGetPlatformIDs");
__clewGetPlatformInfo = (PFNCLGETPLATFORMINFO )CLEW_DYNLIB_IMPORT(module, "clGetPlatformInfo");
__clewGetDeviceIDs = (PFNCLGETDEVICEIDS )CLEW_DYNLIB_IMPORT(module, "clGetDeviceIDs");
__clewGetDeviceInfo = (PFNCLGETDEVICEINFO )CLEW_DYNLIB_IMPORT(module, "clGetDeviceInfo");
__clewCreateSubDevices = (PFNCLCREATESUBDEVICES )CLEW_DYNLIB_IMPORT(module, "clCreateSubDevices");
__clewRetainDevice = (PFNCLRETAINDEVICE )CLEW_DYNLIB_IMPORT(module, "clRetainDevice");
__clewReleaseDevice = (PFNCLRELEASEDEVICE )CLEW_DYNLIB_IMPORT(module, "clReleaseDevice");
__clewCreateContext = (PFNCLCREATECONTEXT )CLEW_DYNLIB_IMPORT(module, "clCreateContext");
__clewCreateContextFromType = (PFNCLCREATECONTEXTFROMTYPE )CLEW_DYNLIB_IMPORT(module, "clCreateContextFromType");
__clewRetainContext = (PFNCLRETAINCONTEXT )CLEW_DYNLIB_IMPORT(module, "clRetainContext");
__clewReleaseContext = (PFNCLRELEASECONTEXT )CLEW_DYNLIB_IMPORT(module, "clReleaseContext");
__clewGetContextInfo = (PFNCLGETCONTEXTINFO )CLEW_DYNLIB_IMPORT(module, "clGetContextInfo");
__clewCreateCommandQueue = (PFNCLCREATECOMMANDQUEUE )CLEW_DYNLIB_IMPORT(module, "clCreateCommandQueue");
__clewRetainCommandQueue = (PFNCLRETAINCOMMANDQUEUE )CLEW_DYNLIB_IMPORT(module, "clRetainCommandQueue");
__clewReleaseCommandQueue = (PFNCLRELEASECOMMANDQUEUE )CLEW_DYNLIB_IMPORT(module, "clReleaseCommandQueue");
__clewGetCommandQueueInfo = (PFNCLGETCOMMANDQUEUEINFO )CLEW_DYNLIB_IMPORT(module, "clGetCommandQueueInfo");
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
__clewSetCommandQueueProperty = (PFNCLSETCOMMANDQUEUEPROPERTY )CLEW_DYNLIB_IMPORT(module, "clSetCommandQueueProperty");
#endif
__clewCreateBuffer = (PFNCLCREATEBUFFER )CLEW_DYNLIB_IMPORT(module, "clCreateBuffer");
__clewCreateSubBuffer = (PFNCLCREATESUBBUFFER )CLEW_DYNLIB_IMPORT(module, "clCreateBuffer");
__clewCreateImage = (PFNCLCREATEIMAGE )CLEW_DYNLIB_IMPORT(module, "clCreateImage");
__clewRetainMemObject = (PFNCLRETAINMEMOBJECT )CLEW_DYNLIB_IMPORT(module, "clRetainMemObject");
__clewReleaseMemObject = (PFNCLRELEASEMEMOBJECT )CLEW_DYNLIB_IMPORT(module, "clReleaseMemObject");
__clewGetSupportedImageFormats = (PFNCLGETSUPPORTEDIMAGEFORMATS )CLEW_DYNLIB_IMPORT(module, "clGetSupportedImageFormats");
__clewGetMemObjectInfo = (PFNCLGETMEMOBJECTINFO )CLEW_DYNLIB_IMPORT(module, "clGetMemObjectInfo");
__clewGetImageInfo = (PFNCLGETIMAGEINFO )CLEW_DYNLIB_IMPORT(module, "clGetImageInfo");
__clewSetMemObjectDestructorCallback = (PFNCLSETMEMOBJECTDESTRUCTORCALLBACK)CLEW_DYNLIB_IMPORT(module, "clSetMemObjectDestructorCallback");
__clewCreateSampler = (PFNCLCREATESAMPLER )CLEW_DYNLIB_IMPORT(module, "clCreateSampler");
__clewRetainSampler = (PFNCLRETAINSAMPLER )CLEW_DYNLIB_IMPORT(module, "clRetainSampler");
__clewReleaseSampler = (PFNCLRELEASESAMPLER )CLEW_DYNLIB_IMPORT(module, "clReleaseSampler");
__clewGetSamplerInfo = (PFNCLGETSAMPLERINFO )CLEW_DYNLIB_IMPORT(module, "clGetSamplerInfo");
__clewCreateProgramWithSource = (PFNCLCREATEPROGRAMWITHSOURCE )CLEW_DYNLIB_IMPORT(module, "clCreateProgramWithSource");
__clewCreateProgramWithBinary = (PFNCLCREATEPROGRAMWITHBINARY )CLEW_DYNLIB_IMPORT(module, "clCreateProgramWithBinary");
__clewCreateProgramWithBuiltInKernels =(PFNCLCREATEPROGRAMWITHBUILTINKERNELS)CLEW_DYNLIB_IMPORT(module, "clCreateProgramWithBuiltInKernels");
__clewRetainProgram = (PFNCLRETAINPROGRAM )CLEW_DYNLIB_IMPORT(module, "clRetainProgram");
__clewReleaseProgram = (PFNCLRELEASEPROGRAM )CLEW_DYNLIB_IMPORT(module, "clReleaseProgram");
__clewBuildProgram = (PFNCLBUILDPROGRAM )CLEW_DYNLIB_IMPORT(module, "clBuildProgram");
__clewGetProgramInfo = (PFNCLGETPROGRAMINFO )CLEW_DYNLIB_IMPORT(module, "clGetProgramInfo");
__clewGetProgramBuildInfo = (PFNCLGETPROGRAMBUILDINFO )CLEW_DYNLIB_IMPORT(module, "clGetProgramBuildInfo");
__clewCreateKernel = (PFNCLCREATEKERNEL )CLEW_DYNLIB_IMPORT(module, "clCreateKernel");
__clewCreateKernelsInProgram = (PFNCLCREATEKERNELSINPROGRAM )CLEW_DYNLIB_IMPORT(module, "clCreateKernelsInProgram");
__clewRetainKernel = (PFNCLRETAINKERNEL )CLEW_DYNLIB_IMPORT(module, "clRetainKernel");
__clewReleaseKernel = (PFNCLRELEASEKERNEL )CLEW_DYNLIB_IMPORT(module, "clReleaseKernel");
__clewSetKernelArg = (PFNCLSETKERNELARG )CLEW_DYNLIB_IMPORT(module, "clSetKernelArg");
__clewGetKernelInfo = (PFNCLGETKERNELINFO )CLEW_DYNLIB_IMPORT(module, "clGetKernelInfo");
__clewGetKernelWorkGroupInfo = (PFNCLGETKERNELWORKGROUPINFO )CLEW_DYNLIB_IMPORT(module, "clGetKernelWorkGroupInfo");
__clewWaitForEvents = (PFNCLWAITFOREVENTS )CLEW_DYNLIB_IMPORT(module, "clWaitForEvents");
__clewGetEventInfo = (PFNCLGETEVENTINFO )CLEW_DYNLIB_IMPORT(module, "clGetEventInfo");
__clewCreateUserEvent = (PFNCLCREATEUSEREVENT )CLEW_DYNLIB_IMPORT(module, "clCreateUserEvent");
__clewRetainEvent = (PFNCLRETAINEVENT )CLEW_DYNLIB_IMPORT(module, "clRetainEvent");
__clewReleaseEvent = (PFNCLRELEASEEVENT )CLEW_DYNLIB_IMPORT(module, "clReleaseEvent");
__clewSetUserEventStatus = (PFNCLSETUSEREVENTSTATUS )CLEW_DYNLIB_IMPORT(module, "clSetUserEventStatus");
__clewSetEventCallback = (PFNCLSETEVENTCALLBACK )CLEW_DYNLIB_IMPORT(module, "clSetEventCallback");
__clewGetEventProfilingInfo = (PFNCLGETEVENTPROFILINGINFO )CLEW_DYNLIB_IMPORT(module, "clGetEventProfilingInfo");
__clewFlush = (PFNCLFLUSH )CLEW_DYNLIB_IMPORT(module, "clFlush");
__clewFinish = (PFNCLFINISH )CLEW_DYNLIB_IMPORT(module, "clFinish");
__clewEnqueueReadBuffer = (PFNCLENQUEUEREADBUFFER )CLEW_DYNLIB_IMPORT(module, "clEnqueueReadBuffer");
__clewEnqueueReadBufferRect = (PFNCLENQUEUEREADBUFFERRECT )CLEW_DYNLIB_IMPORT(module, "clEnqueueReadBufferRect");
__clewEnqueueWriteBuffer = (PFNCLENQUEUEWRITEBUFFER )CLEW_DYNLIB_IMPORT(module, "clEnqueueWriteBuffer");
__clewEnqueueWriteBufferRect = (PFNCLENQUEUEWRITEBUFFERRECT )CLEW_DYNLIB_IMPORT(module, "clEnqueueWriteBufferRect");
__clewEnqueueCopyBuffer = (PFNCLENQUEUECOPYBUFFER )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyBuffer");
__clewEnqueueCopyBufferRect = (PFNCLENQUEUECOPYBUFFERRECT )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyBufferRect");
__clewEnqueueReadImage = (PFNCLENQUEUEREADIMAGE )CLEW_DYNLIB_IMPORT(module, "clEnqueueReadImage");
__clewEnqueueWriteImage = (PFNCLENQUEUEWRITEIMAGE )CLEW_DYNLIB_IMPORT(module, "clEnqueueWriteImage");
__clewEnqueueCopyImage = (PFNCLENQUEUECOPYIMAGE )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyImage");
__clewEnqueueCopyImageToBuffer = (PFNCLENQUEUECOPYIMAGETOBUFFER )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyImageToBuffer");
__clewEnqueueCopyBufferToImage = (PFNCLENQUEUECOPYBUFFERTOIMAGE )CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyBufferToImage");
__clewEnqueueMapBuffer = (PFNCLENQUEUEMAPBUFFER )CLEW_DYNLIB_IMPORT(module, "clEnqueueMapBuffer");
__clewEnqueueMapImage = (PFNCLENQUEUEMAPIMAGE )CLEW_DYNLIB_IMPORT(module, "clEnqueueMapImage");
__clewEnqueueUnmapMemObject = (PFNCLENQUEUEUNMAPMEMOBJECT )CLEW_DYNLIB_IMPORT(module, "clEnqueueUnmapMemObject");
__clewEnqueueNDRangeKernel = (PFNCLENQUEUENDRANGEKERNEL )CLEW_DYNLIB_IMPORT(module, "clEnqueueNDRangeKernel");
__clewEnqueueTask = (PFNCLENQUEUETASK )CLEW_DYNLIB_IMPORT(module, "clEnqueueTask");
__clewEnqueueNativeKernel = (PFNCLENQUEUENATIVEKERNEL )CLEW_DYNLIB_IMPORT(module, "clEnqueueNativeKernel");
__clewGetExtensionFunctionAddressForPlatform = (PFNCLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM)CLEW_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddressForPlatform");
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
__clewCreateImage2D = (PFNCLCREATEIMAGE2D )CLEW_DYNLIB_IMPORT(module, "clCreateImage2D");
__clewCreateImage3D = (PFNCLCREATEIMAGE3D )CLEW_DYNLIB_IMPORT(module, "clCreateImage3D");
__clewEnqueueMarker = (PFNCLENQUEUEMARKER )CLEW_DYNLIB_IMPORT(module, "clEnqueueMarker");
__clewEnqueueWaitForEvents = (PFNCLENQUEUEWAITFOREVENTS )CLEW_DYNLIB_IMPORT(module, "clEnqueueWaitForEvents");
__clewEnqueueBarrier = (PFNCLENQUEUEBARRIER )CLEW_DYNLIB_IMPORT(module, "clEnqueueBarrier");
__clewUnloadCompiler = (PFNCLUNLOADCOMPILER )CLEW_DYNLIB_IMPORT(module, "clUnloadCompiler");
__clewGetExtensionFunctionAddress = (PFNCLGETEXTENSIONFUNCTIONADDRESS )CLEW_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddress");
#endif
/* cl_gl */
__clewCreateFromGLBuffer = (PFNCLCREATEFROMGLBUFFER )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLBuffer");
__clewCreateFromGLTexture = (PFNCLCREATEFROMGLTEXTURE )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLTexture");
__clewCreateFromGLRenderbuffer = (PFNCLCREATEFROMGLRENDERBUFFER )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLRenderbuffer");
__clewGetGLObjectInfo = (PFNCLGETGLOBJECTINFO )CLEW_DYNLIB_IMPORT(module, "clGetGLObjectInfo");
__clewGetGLTextureInfo = (PFNCLGETGLTEXTUREINFO )CLEW_DYNLIB_IMPORT(module, "clGetGLTextureInfo");
__clewEnqueueAcquireGLObjects = (PFNCLENQUEUEACQUIREGLOBJECTS )CLEW_DYNLIB_IMPORT(module, "clEnqueueAcquireGLObjects");
__clewEnqueueReleaseGLObjects = (PFNCLENQUEUERELEASEGLOBJECTS )CLEW_DYNLIB_IMPORT(module, "clEnqueueReleaseGLObjects");
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
__clewCreateFromGLTexture2D = (PFNCLCREATEFROMGLTEXTURE2D )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLTexture2D");
__clewCreateFromGLTexture3D = (PFNCLCREATEFROMGLTEXTURE3D )CLEW_DYNLIB_IMPORT(module, "clCreateFromGLTexture3D");
#endif
__clewGetGLContextInfoKHR = (PFNCLGETGLCONTEXTINFOKHR )CLEW_DYNLIB_IMPORT(module, "clGetGLContextInfoKHR");
if(__clewGetPlatformIDs == NULL) return 0;
if(__clewGetPlatformInfo == NULL) return 0;
if(__clewGetDeviceIDs == NULL) return 0;
if(__clewGetDeviceInfo == NULL) return 0;
return CLEW_SUCCESS;
}
const char* clewErrorString(cl_int error)
{
static const char* strings[] =
{
// Error Codes
"CL_SUCCESS" // 0
, "CL_DEVICE_NOT_FOUND" // -1
, "CL_DEVICE_NOT_AVAILABLE" // -2
, "CL_COMPILER_NOT_AVAILABLE" // -3
, "CL_MEM_OBJECT_ALLOCATION_FAILURE" // -4
, "CL_OUT_OF_RESOURCES" // -5
, "CL_OUT_OF_HOST_MEMORY" // -6
, "CL_PROFILING_INFO_NOT_AVAILABLE" // -7
, "CL_MEM_COPY_OVERLAP" // -8
, "CL_IMAGE_FORMAT_MISMATCH" // -9
, "CL_IMAGE_FORMAT_NOT_SUPPORTED" // -10
, "CL_BUILD_PROGRAM_FAILURE" // -11
, "CL_MAP_FAILURE" // -12
, "CL_MISALIGNED_SUB_BUFFER_OFFSET" // -13
, "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"// -14
, "CL_COMPILE_PROGRAM_FAILURE" // -15
, "CL_LINKER_NOT_AVAILABLE" // -16
, "CL_LINK_PROGRAM_FAILURE" // -17
, "CL_DEVICE_PARTITION_FAILED" // -18
, "CL_KERNEL_ARG_INFO_NOT_AVAILABLE" // -19
, "" // -20
, "" // -21
, "" // -22
, "" // -23
, "" // -24
, "" // -25
, "" // -26
, "" // -27
, "" // -28
, "" // -29
, "CL_INVALID_VALUE" // -30
, "CL_INVALID_DEVICE_TYPE" // -31
, "CL_INVALID_PLATFORM" // -32
, "CL_INVALID_DEVICE" // -33
, "CL_INVALID_CONTEXT" // -34
, "CL_INVALID_QUEUE_PROPERTIES" // -35
, "CL_INVALID_COMMAND_QUEUE" // -36
, "CL_INVALID_HOST_PTR" // -37
, "CL_INVALID_MEM_OBJECT" // -38
, "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" // -39
, "CL_INVALID_IMAGE_SIZE" // -40
, "CL_INVALID_SAMPLER" // -41
, "CL_INVALID_BINARY" // -42
, "CL_INVALID_BUILD_OPTIONS" // -43
, "CL_INVALID_PROGRAM" // -44
, "CL_INVALID_PROGRAM_EXECUTABLE" // -45
, "CL_INVALID_KERNEL_NAME" // -46
, "CL_INVALID_KERNEL_DEFINITION" // -47
, "CL_INVALID_KERNEL" // -48
, "CL_INVALID_ARG_INDEX" // -49
, "CL_INVALID_ARG_VALUE" // -50
, "CL_INVALID_ARG_SIZE" // -51
, "CL_INVALID_KERNEL_ARGS" // -52
, "CL_INVALID_WORK_DIMENSION" // -53
, "CL_INVALID_WORK_GROUP_SIZE" // -54
, "CL_INVALID_WORK_ITEM_SIZE" // -55
, "CL_INVALID_GLOBAL_OFFSET" // -56
, "CL_INVALID_EVENT_WAIT_LIST" // -57
, "CL_INVALID_EVENT" // -58
, "CL_INVALID_OPERATION" // -59
, "CL_INVALID_GL_OBJECT" // -60
, "CL_INVALID_BUFFER_SIZE" // -61
, "CL_INVALID_MIP_LEVEL" // -62
, "CL_INVALID_GLOBAL_WORK_SIZE" // -63
, "CL_INVALID_PROPERTY" // -64
, "CL_INVALID_IMAGE_DESCRIPTOR" // -65
, "CL_INVALID_COMPILER_OPTIONS" // -66
, "CL_INVALID_LINKER_OPTIONS" // -67
, "CL_INVALID_DEVICE_PARTITION_COUNT" // -68
};
return strings[-error];
}

View File

@ -25,6 +25,7 @@
set(INC
.
include
)
set(INC_SYS
@ -32,11 +33,8 @@ set(INC_SYS
)
set(SRC
OCL_opencl.h
intern/clew.h
intern/clew.c
intern/OCL_opencl.c
include/cuew.h
src/cuew.c
)
blender_add_lib(bf_intern_opencl "${SRC}" "${INC}" "${INC_SYS}")
blender_add_lib(extern_cuew "${SRC}" "${INC}" "${INC_SYS}")

174
extern/cuew/LICENSE vendored Normal file
View File

@ -0,0 +1,174 @@
Modified Apache 2.0 License
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor
and its affiliates, except as required to comply with Section 4(c) of
the License and to reproduce the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.

12
extern/cuew/README vendored Normal file
View File

@ -0,0 +1,12 @@
The CUDA Extension Wrangler Library (CUEW) is a cross-platform open-source
C/C++ extension loading library. CUEW provides efficient run-time mechanisms
for determining which CUDA functions and extensions extensions are supported
on the target platform.
CUDA core and extension functionality is exposed in a single header file.
GUEW has been tested on a variety of operating systems, including Windows,
Linux, Mac OS X.
LICENSE
CUEW library is released under the Apache 2.0 license.

View File

@ -27,8 +27,9 @@
Import ('env')
sources = env.Glob('intern/*.c')
sources = env.Glob('src/cuew.c')
incs = '.'
incs = 'include'
defs = []
env.BlenderLib ( 'bf_intern_opencl', sources, Split(incs), libtype=['core','player'], priority = [192,192] )
env.BlenderLib ('extern_cuew', sources, Split(incs), defines=defs, libtype=['system'], priority = [0])

35
extern/cuew/auto/cuda_errors.py vendored Normal file
View File

@ -0,0 +1,35 @@
CUDA_ERRORS={
'CUDA_SUCCESS': "No errors",
'CUDA_ERROR_INVALID_VALUE': "Invalid value",
'CUDA_ERROR_OUT_OF_MEMORY': "Out of memory",
'CUDA_ERROR_NOT_INITIALIZED': "Driver not initialized",
'CUDA_ERROR_DEINITIALIZED': "Driver deinitialized",
'CUDA_ERROR_NO_DEVICE': "No CUDA-capable device available",
'CUDA_ERROR_INVALID_DEVICE': "Invalid device",
'CUDA_ERROR_INVALID_IMAGE': "Invalid kernel image",
'CUDA_ERROR_INVALID_CONTEXT': "Invalid context",
'CUDA_ERROR_CONTEXT_ALREADY_CURRENT': "Context already current",
'CUDA_ERROR_MAP_FAILED': "Map failed",
'CUDA_ERROR_UNMAP_FAILED': "Unmap failed",
'CUDA_ERROR_ARRAY_IS_MAPPED': "Array is mapped",
'CUDA_ERROR_ALREADY_MAPPED': "Already mapped",
'CUDA_ERROR_NO_BINARY_FOR_GPU': "No binary for GPU",
'CUDA_ERROR_ALREADY_ACQUIRED': "Already acquired",
'CUDA_ERROR_NOT_MAPPED': "Not mapped",
'CUDA_ERROR_NOT_MAPPED_AS_ARRAY': "Mapped resource not available for access as an array",
'CUDA_ERROR_NOT_MAPPED_AS_POINTER': "Mapped resource not available for access as a pointer",
'CUDA_ERROR_ECC_UNCORRECTABLE': "Uncorrectable ECC error detected",
'CUDA_ERROR_UNSUPPORTED_LIMIT': "CUlimit not supported by device",
'CUDA_ERROR_INVALID_SOURCE': "Invalid source",
'CUDA_ERROR_FILE_NOT_FOUND': "File not found",
'CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND': "Link to a shared object failed to resolve",
'CUDA_ERROR_SHARED_OBJECT_INIT_FAILED': "Shared object initialization failed",
'CUDA_ERROR_INVALID_HANDLE': "Invalid handle",
'CUDA_ERROR_NOT_FOUND': "Not found",
'CUDA_ERROR_NOT_READY': "CUDA not ready",
'CUDA_ERROR_LAUNCH_FAILED': "Launch failed",
'CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES': "Launch exceeded resources",
'CUDA_ERROR_LAUNCH_TIMEOUT': "Launch exceeded timeout",
'CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING': "Launch with incompatible texturing",
'CUDA_ERROR_UNKNOWN': "Unknown error",
}

125
extern/cuew/auto/cuda_extra.py vendored Normal file
View File

@ -0,0 +1,125 @@
extra_code = """
static void path_join(const char *path1,
const char *path2,
int maxlen,
char *result) {
#if defined(WIN32) || defined(_WIN32)
const char separator = '\\\\';
#else
const char separator = '/';
#endif
int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
if (n != -1 && n < maxlen) {
result[n] = '\\0';
}
else {
result[maxlen - 1] = '\\0';
}
}
static int path_exists(const char *path) {
struct stat st;
if (stat(path, &st)) {
return 0;
}
return 1;
}
const char *cuewCompilerPath(void) {
#ifdef _WIN32
const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
const char *executable = "nvcc.exe";
#else
const char *defaultpaths[] = {
"/Developer/NVIDIA/CUDA-5.0/bin",
"/usr/local/cuda-5.0/bin",
"/usr/local/cuda/bin",
"/Developer/NVIDIA/CUDA-6.0/bin",
"/usr/local/cuda-6.0/bin",
"/Developer/NVIDIA/CUDA-5.5/bin",
"/usr/local/cuda-5.5/bin",
NULL};
const char *executable = "nvcc";
#endif
int i;
const char *binpath = getenv("CUDA_BIN_PATH");
static char nvcc[65536];
if (binpath) {
path_join(binpath, executable, sizeof(nvcc), nvcc);
if (path_exists(nvcc))
return nvcc;
}
for (i = 0; defaultpaths[i]; ++i) {
path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
if (path_exists(nvcc))
return nvcc;
}
#ifndef _WIN32
{
FILE *handle = popen("which nvcc", "r");
if (handle) {
char buffer[4096] = {0};
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
buffer[len] = '\\0';
pclose(handle);
if (buffer[0])
return "nvcc";
}
}
#endif
return NULL;
}
int cuewCompilerVersion(void) {
const char *path = cuewCompilerPath();
const char *marker = "Cuda compilation tools, release ";
FILE *pipe;
int major, minor;
char *versionstr;
char buf[128];
char output[65536] = "\\0";
char command[65536] = "\\0";
if (path == NULL)
return 0;
/* get --version output */
strncpy(command, path, sizeof(command));
strncat(command, " --version", sizeof(command) - strlen(path));
pipe = popen(command, "r");
if (!pipe) {
fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
return 0;
}
while (!feof(pipe)) {
if (fgets(buf, sizeof(buf), pipe) != NULL) {
strncat(output, buf, sizeof(output) - strlen(output));
}
}
pclose(pipe);
/* parse version number */
versionstr = strstr(output, marker);
if (versionstr == NULL) {
fprintf(stderr, "CUDA: failed to find version number in:\\n\\n%s\\n", output);
return 0;
}
versionstr += strlen(marker);
if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
fprintf(stderr, "CUDA: failed to parse version number from:\\n\\n%s\\n", output);
return 0;
}
return 10 * major + minor;
}
"""

591
extern/cuew/auto/cuew_gen.py vendored Normal file
View File

@ -0,0 +1,591 @@
#!/usr/bin/env python3
#
# Copyright 2014 Blender Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
# This script generates either header or implementation file from
# a CUDA header files.
#
# Usage: cuew hdr|impl [/path/to/cuda/includes]
# - hdr means header file will be generated and printed to stdout.
# - impl means implementation file will be generated and printed to stdout.
# - /path/to/cuda/includes is a path to a folder with cuda.h and cudaGL.h
# for which wrangler will be generated.
import os
import sys
from cuda_errors import CUDA_ERRORS
from pycparser import c_parser, c_ast, parse_file
from subprocess import Popen, PIPE
INCLUDE_DIR = "/usr/include"
LIB = "CUEW"
REAL_LIB = "CUDA"
VERSION_MAJOR = "1"
VERSION_MINOR = "2"
COPYRIGHT = """/*
* Copyright 2011-2014 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/"""
FILES = ["cuda.h", "cudaGL.h"]
TYPEDEFS = []
FUNC_TYPEDEFS = []
SYMBOLS = []
DEFINES = []
DEFINES_V2 = []
ERRORS = []
class FuncDefVisitor(c_ast.NodeVisitor):
indent = 0
prev_complex = False
dummy_typedefs = ['size_t', 'CUdeviceptr']
def _get_quals_string(self, node):
if node.quals:
return ' '.join(node.quals) + ' '
return ''
def _get_ident_type(self, node):
if isinstance(node, c_ast.PtrDecl):
return self._get_ident_type(node.type.type) + '*'
if isinstance(node, c_ast.ArrayDecl):
return self._get_ident_type(node.type)
elif isinstance(node, c_ast.Struct):
if node.name:
return 'struct ' + node.name
else:
self.indent += 1
struct = self._stringify_struct(node)
self.indent -= 1
return "struct {\n" + \
struct + (" " * self.indent) + "}"
elif isinstance(node, c_ast.Union):
self.indent += 1
union = self._stringify_struct(node)
self.indent -= 1
return "union {\n" + union + (" " * self.indent) + "}"
elif isinstance(node, c_ast.Enum):
return 'enum ' + node.name
elif isinstance(node, c_ast.TypeDecl):
return self._get_ident_type(node.type)
else:
return node.names[0]
def _stringify_param(self, param):
param_type = param.type
result = self._get_quals_string(param)
result += self._get_ident_type(param_type)
if param.name:
result += ' ' + param.name
if isinstance(param_type, c_ast.ArrayDecl):
# TODO(sergey): Workaround to deal with the
# preprocessed file where array size got
# substituded.
dim = param_type.dim.value
if param.name == "reserved" and dim == "64":
dim = "CU_IPC_HANDLE_SIZE"
result += '[' + dim + ']'
return result
def _stringify_params(self, params):
result = []
for param in params:
result.append(self._stringify_param(param))
return ', '.join(result)
def _stringify_struct(self, node):
result = ""
children = node.children()
for child in children:
member = self._stringify_param(child[1])
result += (" " * self.indent) + member + ";\n"
return result
def _stringify_enum(self, node):
result = ""
children = node.children()
for child in children:
if isinstance(child[1], c_ast.EnumeratorList):
enumerators = child[1].enumerators
for enumerator in enumerators:
result += (" " * self.indent) + enumerator.name
if enumerator.value:
result += " = " + enumerator.value.value
result += ",\n"
if enumerator.name.startswith("CUDA_ERROR_"):
ERRORS.append(enumerator.name)
return result
def visit_Decl(self, node):
if node.type.__class__.__name__ == 'FuncDecl':
if isinstance(node.type, c_ast.FuncDecl):
func_decl = node.type
func_decl_type = func_decl.type
typedef = 'typedef '
symbol_name = None
if isinstance(func_decl_type, c_ast.TypeDecl):
symbol_name = func_decl_type.declname
typedef += self._get_quals_string(func_decl_type)
typedef += self._get_ident_type(func_decl_type.type)
typedef += ' CUDAAPI'
typedef += ' t' + symbol_name
elif isinstance(func_decl_type, c_ast.PtrDecl):
ptr_type = func_decl_type.type
symbol_name = ptr_type.declname
typedef += self._get_quals_string(ptr_type)
typedef += self._get_ident_type(func_decl_type)
typedef += ' CUDAAPI'
typedef += ' t' + symbol_name
typedef += '(' + \
self._stringify_params(func_decl.args.params) + \
');'
SYMBOLS.append(symbol_name)
FUNC_TYPEDEFS.append(typedef)
def visit_Typedef(self, node):
if node.name in self.dummy_typedefs:
return
complex = False
type = self._get_ident_type(node.type)
quals = self._get_quals_string(node)
if isinstance(node.type.type, c_ast.Struct):
self.indent += 1
struct = self._stringify_struct(node.type.type)
self.indent -= 1
typedef = quals + type + " {\n" + struct + "} " + node.name
complex = True
elif isinstance(node.type.type, c_ast.Enum):
self.indent += 1
enum = self._stringify_enum(node.type.type)
self.indent -= 1
typedef = quals + type + " {\n" + enum + "} " + node.name
complex = True
else:
typedef = quals + type + " " + node.name
if complex or self.prev_complex:
typedef = "\ntypedef " + typedef + ";"
else:
typedef = "typedef " + typedef + ";"
TYPEDEFS.append(typedef)
self.prev_complex = complex
def get_latest_cpp():
path_prefix = "/usr/bin"
for cpp_version in ["9", "8", "7", "6", "5", "4"]:
test_cpp = os.path.join(path_prefix, "cpp-4." + cpp_version)
if os.path.exists(test_cpp):
return test_cpp
return None
def preprocess_file(filename, cpp_path):
args = [cpp_path, "-I./"]
if filename.endswith("GL.h"):
args.append("-DCUDAAPI= ")
args.append(filename)
try:
pipe = Popen(args,
stdout=PIPE,
universal_newlines=True)
text = pipe.communicate()[0]
except OSError as e:
raise RuntimeError("Unable to invoke 'cpp'. " +
'Make sure its path was passed correctly\n' +
('Original error: %s' % e))
return text
def parse_files():
parser = c_parser.CParser()
cpp_path = get_latest_cpp()
for filename in FILES:
filepath = os.path.join(INCLUDE_DIR, filename)
dummy_typedefs = {}
text = preprocess_file(filepath, cpp_path)
if filepath.endswith("GL.h"):
dummy_typedefs = {
"CUresult": "int",
"CUgraphicsResource": "void *",
"CUdevice": "void *",
"CUcontext": "void *",
"CUdeviceptr": "void *",
"CUstream": "void *"
}
text = "typedef int GLint;\n" + text
text = "typedef unsigned int GLuint;\n" + text
text = "typedef unsigned int GLenum;\n" + text
text = "typedef long size_t;\n" + text
for typedef in sorted(dummy_typedefs):
text = "typedef " + dummy_typedefs[typedef] + " " + \
typedef + ";\n" + text
ast = parser.parse(text, filepath)
with open(filepath) as f:
lines = f.readlines()
for line in lines:
if line.startswith("#define"):
line = line[8:-1]
token = line.split()
if token[0] not in ("__cuda_cuda_h__",
"CUDA_CB",
"CUDAAPI"):
DEFINES.append(token)
for line in lines:
# TODO(sergey): Use better matching rule for _v2 symbols.
if line[0].isspace() and line.lstrip().startswith("#define"):
line = line[12:-1]
token = line.split()
if len(token) == 2 and token[1].endswith("_v2"):
DEFINES_V2.append(token)
v = FuncDefVisitor()
for typedef in dummy_typedefs:
v.dummy_typedefs.append(typedef)
v.visit(ast)
FUNC_TYPEDEFS.append('')
SYMBOLS.append('')
def print_copyright():
print(COPYRIGHT)
print("")
def open_header_guard():
print("#ifndef __%s_H__" % (LIB))
print("#define __%s_H__" % (LIB))
print("")
print("#ifdef __cplusplus")
print("extern \"C\" {")
print("#endif")
print("")
def close_header_guard():
print("")
print("#ifdef __cplusplus")
print("}")
print("#endif")
print("")
print("#endif /* __%s_H__ */" % (LIB))
def print_header():
print_copyright()
open_header_guard()
# Fot size_t.
print("#include <stdlib.h>")
print("")
print("/* Defines. */")
print("#define %s_VERSION_MAJOR %s" % (LIB, VERSION_MAJOR))
print("#define %s_VERSION_MINOR %s" % (LIB, VERSION_MINOR))
print("")
for define in DEFINES:
print('#define %s' % (' '.join(define)))
print("")
print("""/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
* the cuda library has both the old ones for compatibility and new
* ones with _v2 postfix,
*/""")
for define in DEFINES_V2:
print('#define %s' % (' '.join(define)))
print("")
print("/* Types. */")
# We handle this specially because of the file is
# getting preprocessed.
print("""#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
typedef unsigned long long CUdeviceptr;
#else
typedef unsigned int CUdeviceptr;
#endif
""")
for typedef in TYPEDEFS:
print('%s' % (typedef))
# TDO(sergey): This is only specific to CUDA wrapper.
print("""
#ifdef _WIN32
# define CUDAAPI __stdcall
# define CUDA_CB __stdcall
#else
# define CUDAAPI
# define CUDA_CB
#endif
""")
print("/* Function types. */")
for func_typedef in FUNC_TYPEDEFS:
print('%s' % (func_typedef))
print("")
print("/* Function declarations. */")
for symbol in SYMBOLS:
if symbol:
print('extern t%s *%s;' % (symbol, symbol))
else:
print("")
print("")
print("enum {")
print(" CUEW_SUCCESS = 0,")
print(" CUEW_ERROR_OPEN_FAILED = -1,")
print(" CUEW_ERROR_ATEXIT_FAILED = -2,")
print("};")
print("")
print("int %sInit(void);" % (LIB.lower()))
# TODO(sergey): Get rid of hardcoded CUresult.
print("const char *%sErrorString(CUresult result);" % (LIB.lower()))
print("const char *cuewCompilerPath(void);")
print("int cuewCompilerVersion(void);")
close_header_guard()
def print_dl_wrapper():
print("""#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# define VC_EXTRALEAN
# include <windows.h>
/* Utility macros. */
typedef HMODULE DynamicLibrary;
# define dynamic_library_open(path) LoadLibrary(path)
# define dynamic_library_close(lib) FreeLibrary(lib)
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
#else
# include <dlfcn.h>
typedef void* DynamicLibrary;
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
# define dynamic_library_close(lib) dlclose(lib)
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
#endif
""")
def print_dl_helper_macro():
print("""#define %s_LIBRARY_FIND_CHECKED(name) \\
name = (t##name *)dynamic_library_find(lib, #name);
#define %s_LIBRARY_FIND(name) \\
name = (t##name *)dynamic_library_find(lib, #name); \\
assert(name);
static DynamicLibrary lib;""" % (REAL_LIB, REAL_LIB))
print("")
def print_dl_close():
print("""static void %sExit(void) {
if(lib != NULL) {
/* Ignore errors. */
dynamic_library_close(lib);
lib = NULL;
}
}""" % (LIB.lower()))
print("")
def print_lib_path():
# TODO(sergey): get rid of hardcoded libraries.
print("""#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
const char *path = "nvcuda.dll";
#elif defined(__APPLE__)
/* Default installation path. */
const char *path = "/usr/local/cuda/lib/libcuda.dylib";
#else
const char *path = "libcuda.so";
#endif""")
def print_init_guard():
print(""" static int initialized = 0;
static int result = 0;
int error, driver_version;
if (initialized) {
return result;
}
initialized = 1;
error = atexit(cuewExit);
if (error) {
result = CUEW_ERROR_ATEXIT_FAILED;
return result;
}
/* Load library. */
lib = dynamic_library_open(path);
if (lib == NULL) {
result = CUEW_ERROR_OPEN_FAILED;
return result;
}""")
print("")
def print_driver_version_guard():
# TODO(sergey): Currently it's hardcoded for CUDA only.
print(""" /* Detect driver version. */
driver_version = 1000;
%s_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
if (cuDriverGetVersion) {
cuDriverGetVersion(&driver_version);
}
/* We require version 4.0. */
if (driver_version < 4000) {
result = CUEW_ERROR_OPEN_FAILED;
return result;
}""" % (REAL_LIB))
def print_dl_init():
print("int %sInit(void) {" % (LIB.lower()))
print(" /* Library paths. */")
print_lib_path()
print_init_guard()
print_driver_version_guard()
print(" /* Fetch all function pointers. */")
for symbol in SYMBOLS:
if symbol:
print(" %s_LIBRARY_FIND(%s);" % (REAL_LIB, symbol))
else:
print("")
print("")
print(" result = CUEW_SUCCESS;")
print(" return result;")
print("}")
def print_implementation():
print_copyright()
# TODO(sergey): Get rid of hardcoded header.
print("""#ifdef _MSC_VER
# define snprintf _snprintf
# define popen _popen
# define pclose _pclose
# define _CRT_SECURE_NO_WARNINGS
#endif
""")
print("#include <cuew.h>")
print("#include <assert.h>")
print("#include <stdio.h>")
print("#include <string.h>")
print("#include <sys/stat.h>")
print("")
print_dl_wrapper()
print_dl_helper_macro()
print("/* Function definitions. */")
for symbol in SYMBOLS:
if symbol:
print('t%s *%s;' % (symbol, symbol))
else:
print("")
print("")
print_dl_close()
print("/* Implementation function. */")
print_dl_init()
print("")
# TODO(sergey): Get rid of hardcoded CUresult.
print("const char *%sErrorString(CUresult result) {" % (LIB.lower()))
print(" switch(result) {")
print(" case CUDA_SUCCESS: return \"No errors\";")
for error in ERRORS:
if error in CUDA_ERRORS:
str = CUDA_ERRORS[error]
else:
str = error[11:]
print(" case %s: return \"%s\";" % (error, str))
print(" default: return \"Unknown CUDA error value\";")
print(" }")
print("}")
from cuda_extra import extra_code
print(extra_code)
if __name__ == "__main__":
if len(sys.argv) != 2 and len(sys.argv) != 3:
print("Usage: %s hdr|impl [/path/to/cuda/toolkit/include]" %
(sys.argv[0]))
exit(1)
if len(sys.argv) == 3:
INCLUDE_DIR = sys.argv[2]
parse_files()
if sys.argv[1] == "hdr":
print_header()
elif sys.argv[1] == "impl":
print_implementation()
else:
print("Unknown command %s" % (sys.argv[1]))
exit(1)

10
extern/cuew/auto/cuew_gen.sh vendored Executable file
View File

@ -0,0 +1,10 @@
#!/bin/sh
# This script invokes cuew_gen.py and updates the
# header and source files in the repository.
SCRIPT=`realpath -s $0`
DIR=`dirname $SCRIPT`
python ${DIR}/cuew_gen.py hdr $@ > $DIR/../include/cuew.h
python ${DIR}/cuew_gen.py impl $@ > $DIR/../src/cuew.c

3
extern/cuew/auto/stdlib.h vendored Normal file
View File

@ -0,0 +1,3 @@
/* This file is needed to workaround issue with parsing system headers. */
typedef long size_t;

1138
extern/cuew/include/cuew.h vendored Normal file

File diff suppressed because it is too large Load Diff

710
extern/cuew/src/cuew.c vendored Normal file
View File

@ -0,0 +1,710 @@
/*
* Copyright 2011-2014 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifdef _MSC_VER
# define snprintf _snprintf
# define popen _popen
# define pclose _pclose
# define _CRT_SECURE_NO_WARNINGS
#endif
#include <cuew.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# define VC_EXTRALEAN
# include <windows.h>
/* Utility macros. */
typedef HMODULE DynamicLibrary;
# define dynamic_library_open(path) LoadLibrary(path)
# define dynamic_library_close(lib) FreeLibrary(lib)
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
#else
# include <dlfcn.h>
typedef void* DynamicLibrary;
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
# define dynamic_library_close(lib) dlclose(lib)
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
#endif
#define CUDA_LIBRARY_FIND_CHECKED(name) \
name = (t##name *)dynamic_library_find(lib, #name);
#define CUDA_LIBRARY_FIND(name) \
name = (t##name *)dynamic_library_find(lib, #name); \
assert(name);
static DynamicLibrary lib;
/* Function definitions. */
tcuGetErrorString *cuGetErrorString;
tcuGetErrorName *cuGetErrorName;
tcuInit *cuInit;
tcuDriverGetVersion *cuDriverGetVersion;
tcuDeviceGet *cuDeviceGet;
tcuDeviceGetCount *cuDeviceGetCount;
tcuDeviceGetName *cuDeviceGetName;
tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
tcuDeviceGetAttribute *cuDeviceGetAttribute;
tcuDeviceGetProperties *cuDeviceGetProperties;
tcuDeviceComputeCapability *cuDeviceComputeCapability;
tcuCtxCreate_v2 *cuCtxCreate_v2;
tcuCtxDestroy_v2 *cuCtxDestroy_v2;
tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
tcuCtxSetCurrent *cuCtxSetCurrent;
tcuCtxGetCurrent *cuCtxGetCurrent;
tcuCtxGetDevice *cuCtxGetDevice;
tcuCtxSynchronize *cuCtxSynchronize;
tcuCtxSetLimit *cuCtxSetLimit;
tcuCtxGetLimit *cuCtxGetLimit;
tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
tcuCtxGetApiVersion *cuCtxGetApiVersion;
tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
tcuCtxAttach *cuCtxAttach;
tcuCtxDetach *cuCtxDetach;
tcuModuleLoad *cuModuleLoad;
tcuModuleLoadData *cuModuleLoadData;
tcuModuleLoadDataEx *cuModuleLoadDataEx;
tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
tcuModuleUnload *cuModuleUnload;
tcuModuleGetFunction *cuModuleGetFunction;
tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
tcuModuleGetTexRef *cuModuleGetTexRef;
tcuModuleGetSurfRef *cuModuleGetSurfRef;
tcuLinkCreate *cuLinkCreate;
tcuLinkAddData *cuLinkAddData;
tcuLinkAddFile *cuLinkAddFile;
tcuLinkComplete *cuLinkComplete;
tcuLinkDestroy *cuLinkDestroy;
tcuMemGetInfo_v2 *cuMemGetInfo_v2;
tcuMemAlloc_v2 *cuMemAlloc_v2;
tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
tcuMemFree_v2 *cuMemFree_v2;
tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
tcuMemAllocHost_v2 *cuMemAllocHost_v2;
tcuMemFreeHost *cuMemFreeHost;
tcuMemHostAlloc *cuMemHostAlloc;
tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
tcuMemHostGetFlags *cuMemHostGetFlags;
tcuMemAllocManaged *cuMemAllocManaged;
tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
tcuIpcGetEventHandle *cuIpcGetEventHandle;
tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
tcuIpcGetMemHandle *cuIpcGetMemHandle;
tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
tcuMemHostRegister *cuMemHostRegister;
tcuMemHostUnregister *cuMemHostUnregister;
tcuMemcpy *cuMemcpy;
tcuMemcpyPeer *cuMemcpyPeer;
tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
tcuMemcpy2D_v2 *cuMemcpy2D_v2;
tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
tcuMemcpy3D_v2 *cuMemcpy3D_v2;
tcuMemcpy3DPeer *cuMemcpy3DPeer;
tcuMemcpyAsync *cuMemcpyAsync;
tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
tcuMemsetD8_v2 *cuMemsetD8_v2;
tcuMemsetD16_v2 *cuMemsetD16_v2;
tcuMemsetD32_v2 *cuMemsetD32_v2;
tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
tcuMemsetD8Async *cuMemsetD8Async;
tcuMemsetD16Async *cuMemsetD16Async;
tcuMemsetD32Async *cuMemsetD32Async;
tcuMemsetD2D8Async *cuMemsetD2D8Async;
tcuMemsetD2D16Async *cuMemsetD2D16Async;
tcuMemsetD2D32Async *cuMemsetD2D32Async;
tcuArrayCreate_v2 *cuArrayCreate_v2;
tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
tcuArrayDestroy *cuArrayDestroy;
tcuArray3DCreate_v2 *cuArray3DCreate_v2;
tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
tcuPointerGetAttribute *cuPointerGetAttribute;
tcuPointerSetAttribute *cuPointerSetAttribute;
tcuStreamCreate *cuStreamCreate;
tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
tcuStreamGetPriority *cuStreamGetPriority;
tcuStreamGetFlags *cuStreamGetFlags;
tcuStreamWaitEvent *cuStreamWaitEvent;
tcuStreamAddCallback *cuStreamAddCallback;
tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
tcuStreamQuery *cuStreamQuery;
tcuStreamSynchronize *cuStreamSynchronize;
tcuStreamDestroy_v2 *cuStreamDestroy_v2;
tcuEventCreate *cuEventCreate;
tcuEventRecord *cuEventRecord;
tcuEventQuery *cuEventQuery;
tcuEventSynchronize *cuEventSynchronize;
tcuEventDestroy_v2 *cuEventDestroy_v2;
tcuEventElapsedTime *cuEventElapsedTime;
tcuFuncGetAttribute *cuFuncGetAttribute;
tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
tcuLaunchKernel *cuLaunchKernel;
tcuFuncSetBlockShape *cuFuncSetBlockShape;
tcuFuncSetSharedSize *cuFuncSetSharedSize;
tcuParamSetSize *cuParamSetSize;
tcuParamSeti *cuParamSeti;
tcuParamSetf *cuParamSetf;
tcuParamSetv *cuParamSetv;
tcuLaunch *cuLaunch;
tcuLaunchGrid *cuLaunchGrid;
tcuLaunchGridAsync *cuLaunchGridAsync;
tcuParamSetTexRef *cuParamSetTexRef;
tcuTexRefSetArray *cuTexRefSetArray;
tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
tcuTexRefSetFormat *cuTexRefSetFormat;
tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
tcuTexRefSetFlags *cuTexRefSetFlags;
tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
tcuTexRefGetArray *cuTexRefGetArray;
tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
tcuTexRefGetFormat *cuTexRefGetFormat;
tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
tcuTexRefGetFlags *cuTexRefGetFlags;
tcuTexRefCreate *cuTexRefCreate;
tcuTexRefDestroy *cuTexRefDestroy;
tcuSurfRefSetArray *cuSurfRefSetArray;
tcuSurfRefGetArray *cuSurfRefGetArray;
tcuTexObjectCreate *cuTexObjectCreate;
tcuTexObjectDestroy *cuTexObjectDestroy;
tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
tcuSurfObjectCreate *cuSurfObjectCreate;
tcuSurfObjectDestroy *cuSurfObjectDestroy;
tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
tcuGraphicsMapResources *cuGraphicsMapResources;
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
tcuGetExportTable *cuGetExportTable;
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
tcuGLGetDevices *cuGLGetDevices;
tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
tcuGLInit *cuGLInit;
tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
static void cuewExit(void) {
if(lib != NULL) {
/* Ignore errors. */
dynamic_library_close(lib);
lib = NULL;
}
}
/* Implementation function. */
int cuewInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
const char *path = "nvcuda.dll";
#elif defined(__APPLE__)
/* Default installation path. */
const char *path = "/usr/local/cuda/lib/libcuda.dylib";
#else
const char *path = "libcuda.so";
#endif
static int initialized = 0;
static int result = 0;
int error, driver_version;
if (initialized) {
return result;
}
initialized = 1;
error = atexit(cuewExit);
if (error) {
result = CUEW_ERROR_ATEXIT_FAILED;
return result;
}
/* Load library. */
lib = dynamic_library_open(path);
if (lib == NULL) {
result = CUEW_ERROR_OPEN_FAILED;
return result;
}
/* Detect driver version. */
driver_version = 1000;
CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
if (cuDriverGetVersion) {
cuDriverGetVersion(&driver_version);
}
/* We require version 4.0. */
if (driver_version < 4000) {
result = CUEW_ERROR_OPEN_FAILED;
return result;
}
/* Fetch all function pointers. */
CUDA_LIBRARY_FIND(cuGetErrorString);
CUDA_LIBRARY_FIND(cuGetErrorName);
CUDA_LIBRARY_FIND(cuInit);
CUDA_LIBRARY_FIND(cuDriverGetVersion);
CUDA_LIBRARY_FIND(cuDeviceGet);
CUDA_LIBRARY_FIND(cuDeviceGetCount);
CUDA_LIBRARY_FIND(cuDeviceGetName);
CUDA_LIBRARY_FIND(cuDeviceTotalMem_v2);
CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
CUDA_LIBRARY_FIND(cuDeviceGetProperties);
CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
CUDA_LIBRARY_FIND(cuCtxCreate_v2);
CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
CUDA_LIBRARY_FIND(cuCtxPopCurrent_v2);
CUDA_LIBRARY_FIND(cuCtxSetCurrent);
CUDA_LIBRARY_FIND(cuCtxGetCurrent);
CUDA_LIBRARY_FIND(cuCtxGetDevice);
CUDA_LIBRARY_FIND(cuCtxSynchronize);
CUDA_LIBRARY_FIND(cuCtxSetLimit);
CUDA_LIBRARY_FIND(cuCtxGetLimit);
CUDA_LIBRARY_FIND(cuCtxGetCacheConfig);
CUDA_LIBRARY_FIND(cuCtxSetCacheConfig);
CUDA_LIBRARY_FIND(cuCtxGetSharedMemConfig);
CUDA_LIBRARY_FIND(cuCtxSetSharedMemConfig);
CUDA_LIBRARY_FIND(cuCtxGetApiVersion);
CUDA_LIBRARY_FIND(cuCtxGetStreamPriorityRange);
CUDA_LIBRARY_FIND(cuCtxAttach);
CUDA_LIBRARY_FIND(cuCtxDetach);
CUDA_LIBRARY_FIND(cuModuleLoad);
CUDA_LIBRARY_FIND(cuModuleLoadData);
CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
CUDA_LIBRARY_FIND(cuModuleUnload);
CUDA_LIBRARY_FIND(cuModuleGetFunction);
CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
CUDA_LIBRARY_FIND(cuModuleGetTexRef);
CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
CUDA_LIBRARY_FIND(cuLinkCreate);
CUDA_LIBRARY_FIND(cuLinkAddData);
CUDA_LIBRARY_FIND(cuLinkAddFile);
CUDA_LIBRARY_FIND(cuLinkComplete);
CUDA_LIBRARY_FIND(cuLinkDestroy);
CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
CUDA_LIBRARY_FIND(cuMemAlloc_v2);
CUDA_LIBRARY_FIND(cuMemAllocPitch_v2);
CUDA_LIBRARY_FIND(cuMemFree_v2);
CUDA_LIBRARY_FIND(cuMemGetAddressRange_v2);
CUDA_LIBRARY_FIND(cuMemAllocHost_v2);
CUDA_LIBRARY_FIND(cuMemFreeHost);
CUDA_LIBRARY_FIND(cuMemHostAlloc);
CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer_v2);
CUDA_LIBRARY_FIND(cuMemHostGetFlags);
CUDA_LIBRARY_FIND(cuMemAllocManaged);
CUDA_LIBRARY_FIND(cuDeviceGetByPCIBusId);
CUDA_LIBRARY_FIND(cuDeviceGetPCIBusId);
CUDA_LIBRARY_FIND(cuIpcGetEventHandle);
CUDA_LIBRARY_FIND(cuIpcOpenEventHandle);
CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
CUDA_LIBRARY_FIND(cuIpcOpenMemHandle);
CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
CUDA_LIBRARY_FIND(cuMemHostRegister);
CUDA_LIBRARY_FIND(cuMemHostUnregister);
CUDA_LIBRARY_FIND(cuMemcpy);
CUDA_LIBRARY_FIND(cuMemcpyPeer);
CUDA_LIBRARY_FIND(cuMemcpyHtoD_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoH_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoD_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoA_v2);
CUDA_LIBRARY_FIND(cuMemcpyAtoD_v2);
CUDA_LIBRARY_FIND(cuMemcpyHtoA_v2);
CUDA_LIBRARY_FIND(cuMemcpyAtoH_v2);
CUDA_LIBRARY_FIND(cuMemcpyAtoA_v2);
CUDA_LIBRARY_FIND(cuMemcpy2D_v2);
CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned_v2);
CUDA_LIBRARY_FIND(cuMemcpy3D_v2);
CUDA_LIBRARY_FIND(cuMemcpy3DPeer);
CUDA_LIBRARY_FIND(cuMemcpyAsync);
CUDA_LIBRARY_FIND(cuMemcpyPeerAsync);
CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpy2DAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpy3DAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpy3DPeerAsync);
CUDA_LIBRARY_FIND(cuMemsetD8_v2);
CUDA_LIBRARY_FIND(cuMemsetD16_v2);
CUDA_LIBRARY_FIND(cuMemsetD32_v2);
CUDA_LIBRARY_FIND(cuMemsetD2D8_v2);
CUDA_LIBRARY_FIND(cuMemsetD2D16_v2);
CUDA_LIBRARY_FIND(cuMemsetD2D32_v2);
CUDA_LIBRARY_FIND(cuMemsetD8Async);
CUDA_LIBRARY_FIND(cuMemsetD16Async);
CUDA_LIBRARY_FIND(cuMemsetD32Async);
CUDA_LIBRARY_FIND(cuMemsetD2D8Async);
CUDA_LIBRARY_FIND(cuMemsetD2D16Async);
CUDA_LIBRARY_FIND(cuMemsetD2D32Async);
CUDA_LIBRARY_FIND(cuArrayCreate_v2);
CUDA_LIBRARY_FIND(cuArrayGetDescriptor_v2);
CUDA_LIBRARY_FIND(cuArrayDestroy);
CUDA_LIBRARY_FIND(cuArray3DCreate_v2);
CUDA_LIBRARY_FIND(cuArray3DGetDescriptor_v2);
CUDA_LIBRARY_FIND(cuMipmappedArrayCreate);
CUDA_LIBRARY_FIND(cuMipmappedArrayGetLevel);
CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
CUDA_LIBRARY_FIND(cuPointerGetAttribute);
CUDA_LIBRARY_FIND(cuPointerSetAttribute);
CUDA_LIBRARY_FIND(cuStreamCreate);
CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
CUDA_LIBRARY_FIND(cuStreamGetPriority);
CUDA_LIBRARY_FIND(cuStreamGetFlags);
CUDA_LIBRARY_FIND(cuStreamWaitEvent);
CUDA_LIBRARY_FIND(cuStreamAddCallback);
CUDA_LIBRARY_FIND(cuStreamAttachMemAsync);
CUDA_LIBRARY_FIND(cuStreamQuery);
CUDA_LIBRARY_FIND(cuStreamSynchronize);
CUDA_LIBRARY_FIND(cuStreamDestroy_v2);
CUDA_LIBRARY_FIND(cuEventCreate);
CUDA_LIBRARY_FIND(cuEventRecord);
CUDA_LIBRARY_FIND(cuEventQuery);
CUDA_LIBRARY_FIND(cuEventSynchronize);
CUDA_LIBRARY_FIND(cuEventDestroy_v2);
CUDA_LIBRARY_FIND(cuEventElapsedTime);
CUDA_LIBRARY_FIND(cuFuncGetAttribute);
CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
CUDA_LIBRARY_FIND(cuLaunchKernel);
CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
CUDA_LIBRARY_FIND(cuParamSetSize);
CUDA_LIBRARY_FIND(cuParamSeti);
CUDA_LIBRARY_FIND(cuParamSetf);
CUDA_LIBRARY_FIND(cuParamSetv);
CUDA_LIBRARY_FIND(cuLaunch);
CUDA_LIBRARY_FIND(cuLaunchGrid);
CUDA_LIBRARY_FIND(cuLaunchGridAsync);
CUDA_LIBRARY_FIND(cuParamSetTexRef);
CUDA_LIBRARY_FIND(cuTexRefSetArray);
CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
CUDA_LIBRARY_FIND(cuTexRefSetAddress2D_v3);
CUDA_LIBRARY_FIND(cuTexRefSetFormat);
CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
CUDA_LIBRARY_FIND(cuTexRefSetMipmapFilterMode);
CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelBias);
CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelClamp);
CUDA_LIBRARY_FIND(cuTexRefSetMaxAnisotropy);
CUDA_LIBRARY_FIND(cuTexRefSetFlags);
CUDA_LIBRARY_FIND(cuTexRefGetAddress_v2);
CUDA_LIBRARY_FIND(cuTexRefGetArray);
CUDA_LIBRARY_FIND(cuTexRefGetMipmappedArray);
CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
CUDA_LIBRARY_FIND(cuTexRefGetFormat);
CUDA_LIBRARY_FIND(cuTexRefGetMipmapFilterMode);
CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelBias);
CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelClamp);
CUDA_LIBRARY_FIND(cuTexRefGetMaxAnisotropy);
CUDA_LIBRARY_FIND(cuTexRefGetFlags);
CUDA_LIBRARY_FIND(cuTexRefCreate);
CUDA_LIBRARY_FIND(cuTexRefDestroy);
CUDA_LIBRARY_FIND(cuSurfRefSetArray);
CUDA_LIBRARY_FIND(cuSurfRefGetArray);
CUDA_LIBRARY_FIND(cuTexObjectCreate);
CUDA_LIBRARY_FIND(cuTexObjectDestroy);
CUDA_LIBRARY_FIND(cuTexObjectGetResourceDesc);
CUDA_LIBRARY_FIND(cuTexObjectGetTextureDesc);
CUDA_LIBRARY_FIND(cuTexObjectGetResourceViewDesc);
CUDA_LIBRARY_FIND(cuSurfObjectCreate);
CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
CUDA_LIBRARY_FIND(cuGraphicsMapResources);
CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
CUDA_LIBRARY_FIND(cuGetExportTable);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
CUDA_LIBRARY_FIND(cuGLGetDevices);
CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
CUDA_LIBRARY_FIND(cuGLInit);
CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
CUDA_LIBRARY_FIND(cuGLMapBufferObject_v2);
CUDA_LIBRARY_FIND(cuGLUnmapBufferObject);
CUDA_LIBRARY_FIND(cuGLUnregisterBufferObject);
CUDA_LIBRARY_FIND(cuGLSetBufferObjectMapFlags);
CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
result = CUEW_SUCCESS;
return result;
}
const char *cuewErrorString(CUresult result) {
switch(result) {
case CUDA_SUCCESS: return "No errors";
case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
case CUDA_ERROR_PROFILER_DISABLED: return "PROFILER_DISABLED";
case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "PROFILER_NOT_INITIALIZED";
case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "PROFILER_ALREADY_STARTED";
case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "PROFILER_ALREADY_STOPPED";
case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current";
case CUDA_ERROR_MAP_FAILED: return "Map failed";
case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "CONTEXT_ALREADY_IN_USE";
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "PEER_ACCESS_UNSUPPORTED";
case CUDA_ERROR_INVALID_PTX: return "INVALID_PTX";
case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
case CUDA_ERROR_OPERATING_SYSTEM: return "OPERATING_SYSTEM";
case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
case CUDA_ERROR_NOT_FOUND: return "Not found";
case CUDA_ERROR_NOT_READY: return "CUDA not ready";
case CUDA_ERROR_ILLEGAL_ADDRESS: return "ILLEGAL_ADDRESS";
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "PEER_ACCESS_ALREADY_ENABLED";
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "PEER_ACCESS_NOT_ENABLED";
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "PRIMARY_CONTEXT_ACTIVE";
case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "CONTEXT_IS_DESTROYED";
case CUDA_ERROR_ASSERT: return "ASSERT";
case CUDA_ERROR_TOO_MANY_PEERS: return "TOO_MANY_PEERS";
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "HOST_MEMORY_ALREADY_REGISTERED";
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "HOST_MEMORY_NOT_REGISTERED";
case CUDA_ERROR_HARDWARE_STACK_ERROR: return "HARDWARE_STACK_ERROR";
case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "ILLEGAL_INSTRUCTION";
case CUDA_ERROR_MISALIGNED_ADDRESS: return "MISALIGNED_ADDRESS";
case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "INVALID_ADDRESS_SPACE";
case CUDA_ERROR_INVALID_PC: return "INVALID_PC";
case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
case CUDA_ERROR_NOT_PERMITTED: return "NOT_PERMITTED";
case CUDA_ERROR_NOT_SUPPORTED: return "NOT_SUPPORTED";
case CUDA_ERROR_UNKNOWN: return "Unknown error";
default: return "Unknown CUDA error value";
}
}
static void path_join(const char *path1,
const char *path2,
int maxlen,
char *result) {
#if defined(WIN32) || defined(_WIN32)
const char separator = '\\';
#else
const char separator = '/';
#endif
int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
if (n != -1 && n < maxlen) {
result[n] = '\0';
}
else {
result[maxlen - 1] = '\0';
}
}
static int path_exists(const char *path) {
struct stat st;
if (stat(path, &st)) {
return 0;
}
return 1;
}
const char *cuewCompilerPath(void) {
#ifdef _WIN32
const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
const char *executable = "nvcc.exe";
#else
const char *defaultpaths[] = {
"/Developer/NVIDIA/CUDA-5.0/bin",
"/usr/local/cuda-5.0/bin",
"/usr/local/cuda/bin",
"/Developer/NVIDIA/CUDA-6.0/bin",
"/usr/local/cuda-6.0/bin",
"/Developer/NVIDIA/CUDA-5.5/bin",
"/usr/local/cuda-5.5/bin",
NULL};
const char *executable = "nvcc";
#endif
int i;
const char *binpath = getenv("CUDA_BIN_PATH");
static char nvcc[65536];
if (binpath) {
path_join(binpath, executable, sizeof(nvcc), nvcc);
if (path_exists(nvcc))
return nvcc;
}
for (i = 0; defaultpaths[i]; ++i) {
path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
if (path_exists(nvcc))
return nvcc;
}
#ifndef _WIN32
{
FILE *handle = popen("which nvcc", "r");
if (handle) {
char buffer[4096] = {0};
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
buffer[len] = '\0';
pclose(handle);
if (buffer[0])
return "nvcc";
}
}
#endif
return NULL;
}
int cuewCompilerVersion(void) {
const char *path = cuewCompilerPath();
const char *marker = "Cuda compilation tools, release ";
FILE *pipe;
int major, minor;
char *versionstr;
char buf[128];
char output[65536] = "\0";
char command[65536] = "\0";
if (path == NULL)
return 0;
/* get --version output */
strncpy(command, path, sizeof(command));
strncat(command, " --version", sizeof(command) - strlen(path));
pipe = popen(command, "r");
if (!pipe) {
fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
return 0;
}
while (!feof(pipe)) {
if (fgets(buf, sizeof(buf), pipe) != NULL) {
strncat(output, buf, sizeof(output) - strlen(output));
}
}
pclose(pipe);
/* parse version number */
versionstr = strstr(output, marker);
if (versionstr == NULL) {
fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output);
return 0;
}
versionstr += strlen(marker);
if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output);
return 0;
}
return 10 * major + minor;
}

View File

@ -72,10 +72,6 @@ if(WITH_BULLET)
add_subdirectory(rigidbody)
endif()
if(WITH_COMPOSITOR)
add_subdirectory(opencl)
endif()
if(WITH_OPENNL)
add_subdirectory(opennl)
endif()

View File

@ -59,9 +59,6 @@ if env['WITH_BF_INTERNATIONAL']:
if env['WITH_BF_BULLET']:
SConscript (['rigidbody/SConscript'])
if env['WITH_BF_COMPOSITOR']:
SConscript (['opencl/SConscript'])
if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'win64-mingw', 'linuxcross', 'win64-vc'):
SConscript(['utfconv/SConscript'])

View File

@ -62,7 +62,7 @@ if env['WITH_BF_CYCLES_OSL']:
incs.extend('. bvh render device kernel kernel/osl kernel/svm util subd'.split())
incs.extend('#intern/guardedalloc #source/blender/makesrna #source/blender/makesdna #source/blender/blenlib'.split())
incs.extend('#source/blender/blenloader ../../source/blender/makesrna/intern'.split())
incs.extend('#extern/glew/include #intern/mikktspace'.split())
incs.extend('#extern/glew/include #extern/clew/include #extern/cuew/include #intern/mikktspace'.split())
incs.append(cycles['BF_OIIO_INC'])
incs.append(cycles['BF_BOOST_INC'])
incs.append(cycles['BF_OPENEXR_INC'].split())

View File

@ -28,6 +28,8 @@ set(LIBRARIES
${JPEG_LIBRARIES}
${ZLIB_LIBRARIES}
${TIFF_LIBRARY}
extern_clew
extern_cuew
)
if(WIN32)

View File

@ -11,6 +11,8 @@ set(INC
set(INC_SYS
${OPENGL_INCLUDE_DIR}
${GLEW_INCLUDE_PATH}
../../../extern/cuew/include
../../../extern/clew/include
)
set(SRC

View File

@ -20,12 +20,13 @@
#include "device.h"
#include "device_intern.h"
#include "util_cuda.h"
#include "cuew.h"
#include "clew.h"
#include "util_debug.h"
#include "util_foreach.h"
#include "util_half.h"
#include "util_math.h"
#include "util_opencl.h"
#include "util_opengl.h"
#include "util_time.h"
#include "util_types.h"
@ -141,7 +142,7 @@ Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
break;
#ifdef WITH_CUDA
case DEVICE_CUDA:
if(cuLibraryInit())
if(device_cuda_init())
device = device_cuda_create(info, stats, background);
else
device = NULL;
@ -159,7 +160,7 @@ Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
#endif
#ifdef WITH_OPENCL
case DEVICE_OPENCL:
if(clLibraryInit())
if(device_opencl_init())
device = device_opencl_create(info, stats, background);
else
device = NULL;
@ -213,12 +214,12 @@ vector<DeviceType>& Device::available_types()
types.push_back(DEVICE_CPU);
#ifdef WITH_CUDA
if(cuLibraryInit())
if(device_cuda_init())
types.push_back(DEVICE_CUDA);
#endif
#ifdef WITH_OPENCL
if(clLibraryInit())
if(device_opencl_init())
types.push_back(DEVICE_OPENCL);
#endif
@ -242,12 +243,12 @@ vector<DeviceInfo>& Device::available_devices()
if(!devices_init) {
#ifdef WITH_CUDA
if(cuLibraryInit())
if(device_cuda_init())
device_cuda_info(devices);
#endif
#ifdef WITH_OPENCL
if(clLibraryInit())
if(device_opencl_init())
device_opencl_info(devices);
#endif

View File

@ -23,7 +23,7 @@
#include "buffers.h"
#include "util_cuda.h"
#include "cuew.h"
#include "util_debug.h"
#include "util_map.h"
#include "util_opengl.h"
@ -61,65 +61,10 @@ public:
return (CUdeviceptr)mem;
}
static const char *cuda_error_string(CUresult result)
static bool have_precompiled_kernels()
{
switch(result) {
case CUDA_SUCCESS: return "No errors";
case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
case CUDA_ERROR_MAP_FAILED: return "Map failed";
case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use";
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported";
case CUDA_ERROR_INVALID_PTX: return "Invalid PTX code";
case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
case CUDA_ERROR_OPERATING_SYSTEM: return "OS call failed";
case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
case CUDA_ERROR_NOT_FOUND: return "Not found";
case CUDA_ERROR_NOT_READY: return "CUDA not ready";
case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address";
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded time out";
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Stack error";
case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction";
case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address";
case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
case CUDA_ERROR_INVALID_PC: return "Invalid program counter";
case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
case CUDA_ERROR_NOT_PERMITTED: return "Operation not permitted";
case CUDA_ERROR_NOT_SUPPORTED: return "Operation not supported";
case CUDA_ERROR_UNKNOWN: return "Unknown error";
default: return "Unknown CUDA error value";
}
string cubins_path = path_get("lib");
return path_exists(cubins_path);
}
/*#ifdef NDEBUG
@ -141,7 +86,7 @@ public:
CUresult result = stmt; \
\
if(result != CUDA_SUCCESS) { \
string message = string_printf("CUDA error: %s in %s", cuda_error_string(result), #stmt); \
string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
if(error_msg == "") \
error_msg = message; \
fprintf(stderr, "%s\n", message.c_str()); \
@ -155,7 +100,7 @@ public:
if(result == CUDA_SUCCESS)
return false;
string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuda_error_string(result));
string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result));
if(error_msg == "")
error_msg = message;
fprintf(stderr, "%s\n", message.c_str());
@ -275,7 +220,7 @@ public:
return cubin;
#ifdef _WIN32
if(cuHavePrecompiledKernels()) {
if(have_precompiled_kernels()) {
if(major < 2)
cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor));
else
@ -285,14 +230,14 @@ public:
#endif
/* if not, find CUDA compiler */
string nvcc = cuCompilerPath();
const char *nvcc = cuewCompilerPath();
if(nvcc == "") {
if(nvcc == NULL) {
cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location.");
return "";
}
int cuda_version = cuCompilerVersion();
int cuda_version = cuewCompilerVersion();
if(cuda_version == 0) {
cuda_error_message("CUDA nvcc compiler version could not be parsed.");
@ -317,7 +262,7 @@ public:
string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" "
"-o \"%s\" --ptxas-options=\"-v\" -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d",
nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
printf("%s\n", command.c_str());
@ -1050,6 +995,28 @@ public:
}
};
bool device_cuda_init(void)
{
static bool initialized = false;
static bool result = false;
if (initialized)
return result;
initialized = true;
if (cuewInit() == CUEW_SUCCESS) {
if(CUDADevice::have_precompiled_kernels())
result = true;
#ifndef _WIN32
else if(cuewCompilerPath() != NULL)
result = true;
#endif
}
return result;
}
Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background)
{
return new CUDADevice(info, stats, background);
@ -1063,13 +1030,13 @@ void device_cuda_info(vector<DeviceInfo>& devices)
result = cuInit(0);
if(result != CUDA_SUCCESS) {
if(result != CUDA_ERROR_NO_DEVICE)
fprintf(stderr, "CUDA cuInit: %s\n", CUDADevice::cuda_error_string(result));
fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result));
return;
}
result = cuDeviceGetCount(&count);
if(result != CUDA_SUCCESS) {
fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", CUDADevice::cuda_error_string(result));
fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result));
return;
}

View File

@ -22,7 +22,9 @@ CCL_NAMESPACE_BEGIN
class Device;
Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background);
bool device_opencl_init(void);
Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background);
bool device_cuda_init(void);
Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, const char *address);
Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background);

View File

@ -25,11 +25,12 @@
#include "buffers.h"
#include "clew.h"
#include "util_foreach.h"
#include "util_map.h"
#include "util_math.h"
#include "util_md5.h"
#include "util_opencl.h"
#include "util_opengl.h"
#include "util_path.h"
#include "util_time.h"
@ -552,7 +553,7 @@ public:
device_initialized = true;
}
static void context_notify_callback(const char *err_info,
static void CL_CALLBACK context_notify_callback(const char *err_info,
const void *private_info, size_t cb, void *user_data)
{
char name[256];
@ -1162,6 +1163,26 @@ Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background)
return new OpenCLDevice(info, stats, background);
}
bool device_opencl_init(void) {
static bool initialized = false;
static bool result = false;
if (initialized)
return result;
initialized = true;
// OpenCL disabled for now, only works with this environment variable set
if(!getenv("CYCLES_OPENCL_TEST")) {
result = false;
}
else {
result = clewInit() == CLEW_SUCCESS;
}
return result;
}
void device_opencl_info(vector<DeviceInfo>& devices)
{
vector<cl_device_id> device_ids;

View File

@ -10,10 +10,8 @@ set(INC_SYS
set(SRC
util_cache.cpp
util_cuda.cpp
util_dynlib.cpp
util_md5.cpp
util_opencl.cpp
util_path.cpp
util_string.cpp
util_simd.cpp
@ -34,7 +32,6 @@ set(SRC_HEADERS
util_args.h
util_boundbox.h
util_cache.h
util_cuda.h
util_debug.h
util_dynlib.h
util_foreach.h
@ -46,7 +43,6 @@ set(SRC_HEADERS
util_map.h
util_math.h
util_md5.h
util_opencl.h
util_opengl.h
util_optimization.h
util_param.h

View File

@ -1,503 +0,0 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include "util_cuda.h"
#include "util_debug.h"
#include "util_dynlib.h"
#include "util_path.h"
#include "util_string.h"
#ifdef _WIN32
#define popen _popen
#define pclose _pclose
#endif
/* function defininitions */
tcuInit *cuInit;
tcuDriverGetVersion *cuDriverGetVersion;
tcuDeviceGet *cuDeviceGet;
tcuDeviceGetCount *cuDeviceGetCount;
tcuDeviceGetName *cuDeviceGetName;
tcuDeviceComputeCapability *cuDeviceComputeCapability;
tcuDeviceTotalMem *cuDeviceTotalMem;
tcuDeviceGetProperties *cuDeviceGetProperties;
tcuDeviceGetAttribute *cuDeviceGetAttribute;
tcuCtxCreate *cuCtxCreate;
tcuCtxDestroy *cuCtxDestroy;
tcuCtxAttach *cuCtxAttach;
tcuCtxDetach *cuCtxDetach;
tcuCtxPushCurrent *cuCtxPushCurrent;
tcuCtxPopCurrent *cuCtxPopCurrent;
tcuCtxGetDevice *cuCtxGetDevice;
tcuCtxSynchronize *cuCtxSynchronize;
tcuModuleLoad *cuModuleLoad;
tcuModuleLoadData *cuModuleLoadData;
tcuModuleLoadDataEx *cuModuleLoadDataEx;
tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
tcuModuleUnload *cuModuleUnload;
tcuModuleGetFunction *cuModuleGetFunction;
tcuModuleGetGlobal *cuModuleGetGlobal;
tcuModuleGetTexRef *cuModuleGetTexRef;
tcuModuleGetSurfRef *cuModuleGetSurfRef;
tcuMemGetInfo *cuMemGetInfo;
tcuMemAlloc *cuMemAlloc;
tcuMemAllocPitch *cuMemAllocPitch;
tcuMemFree *cuMemFree;
tcuMemGetAddressRange *cuMemGetAddressRange;
tcuMemAllocHost *cuMemAllocHost;
tcuMemFreeHost *cuMemFreeHost;
tcuMemHostAlloc *cuMemHostAlloc;
tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
tcuMemHostGetFlags *cuMemHostGetFlags;
tcuMemcpyHtoD *cuMemcpyHtoD;
tcuMemcpyDtoH *cuMemcpyDtoH;
tcuMemcpyDtoD *cuMemcpyDtoD;
tcuMemcpyDtoA *cuMemcpyDtoA;
tcuMemcpyAtoD *cuMemcpyAtoD;
tcuMemcpyHtoA *cuMemcpyHtoA;
tcuMemcpyAtoH *cuMemcpyAtoH;
tcuMemcpyAtoA *cuMemcpyAtoA;
tcuMemcpy2D *cuMemcpy2D;
tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
tcuMemcpy3D *cuMemcpy3D;
tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
tcuMemcpy2DAsync *cuMemcpy2DAsync;
tcuMemcpy3DAsync *cuMemcpy3DAsync;
tcuMemsetD8 *cuMemsetD8;
tcuMemsetD16 *cuMemsetD16;
tcuMemsetD32 *cuMemsetD32;
tcuMemsetD2D8 *cuMemsetD2D8;
tcuMemsetD2D16 *cuMemsetD2D16;
tcuMemsetD2D32 *cuMemsetD2D32;
tcuFuncSetBlockShape *cuFuncSetBlockShape;
tcuFuncSetSharedSize *cuFuncSetSharedSize;
tcuFuncGetAttribute *cuFuncGetAttribute;
tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
tcuArrayCreate *cuArrayCreate;
tcuArrayGetDescriptor *cuArrayGetDescriptor;
tcuArrayDestroy *cuArrayDestroy;
tcuArray3DCreate *cuArray3DCreate;
tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
tcuTexRefCreate *cuTexRefCreate;
tcuTexRefDestroy *cuTexRefDestroy;
tcuTexRefSetArray *cuTexRefSetArray;
tcuTexRefSetAddress *cuTexRefSetAddress;
tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
tcuTexRefSetFormat *cuTexRefSetFormat;
tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
tcuTexRefSetFlags *cuTexRefSetFlags;
tcuTexRefGetAddress *cuTexRefGetAddress;
tcuTexRefGetArray *cuTexRefGetArray;
tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
tcuTexRefGetFormat *cuTexRefGetFormat;
tcuTexRefGetFlags *cuTexRefGetFlags;
tcuSurfRefSetArray *cuSurfRefSetArray;
tcuSurfRefGetArray *cuSurfRefGetArray;
tcuParamSetSize *cuParamSetSize;
tcuParamSeti *cuParamSeti;
tcuParamSetf *cuParamSetf;
tcuParamSetv *cuParamSetv;
tcuParamSetTexRef *cuParamSetTexRef;
tcuLaunch *cuLaunch;
tcuLaunchGrid *cuLaunchGrid;
tcuLaunchGridAsync *cuLaunchGridAsync;
tcuEventCreate *cuEventCreate;
tcuEventRecord *cuEventRecord;
tcuEventQuery *cuEventQuery;
tcuEventSynchronize *cuEventSynchronize;
tcuEventDestroy *cuEventDestroy;
tcuEventElapsedTime *cuEventElapsedTime;
tcuStreamCreate *cuStreamCreate;
tcuStreamQuery *cuStreamQuery;
tcuStreamSynchronize *cuStreamSynchronize;
tcuStreamDestroy *cuStreamDestroy;
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
tcuGraphicsMapResources *cuGraphicsMapResources;
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
tcuGetExportTable *cuGetExportTable;
tcuCtxSetLimit *cuCtxSetLimit;
tcuCtxGetLimit *cuCtxGetLimit;
tcuGLCtxCreate *cuGLCtxCreate;
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
tcuCtxSetCurrent *cuCtxSetCurrent;
tcuLaunchKernel *cuLaunchKernel;
CCL_NAMESPACE_BEGIN
/* utility macros */
#define CUDA_LIBRARY_FIND_CHECKED(name) \
name = (t##name*)dynamic_library_find(lib, #name);
#define CUDA_LIBRARY_FIND(name) \
name = (t##name*)dynamic_library_find(lib, #name); \
assert(name);
#define CUDA_LIBRARY_FIND_V2(name) \
name = (t##name*)dynamic_library_find(lib, #name "_v2"); \
assert(name);
/* initialization function */
bool cuLibraryInit()
{
static bool initialized = false;
static bool result = false;
if(initialized)
return result;
initialized = true;
/* library paths */
#ifdef _WIN32
/* expected in c:/windows/system or similar, no path needed */
const char *path = "nvcuda.dll";
const char *alternative_path = NULL;
#elif defined(__APPLE__)
/* default installation path */
const char *path = "/usr/local/cuda/lib/libcuda.dylib";
const char *alternative_path = NULL;
#else
const char *path = "libcuda.so";
const char *alternative_path = "libcuda.so.1";
#endif
/* load library */
DynamicLibrary *lib = dynamic_library_open(path);
if(lib == NULL && alternative_path)
lib = dynamic_library_open(alternative_path);
if(lib == NULL)
return false;
/* detect driver version */
int driver_version = 1000;
CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
if(cuDriverGetVersion)
cuDriverGetVersion(&driver_version);
/* we require version 4.0 */
if(driver_version < 4000)
return false;
/* fetch all function pointers */
CUDA_LIBRARY_FIND(cuInit);
CUDA_LIBRARY_FIND(cuDeviceGet);
CUDA_LIBRARY_FIND(cuDeviceGetCount);
CUDA_LIBRARY_FIND(cuDeviceGetName);
CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
CUDA_LIBRARY_FIND(cuDeviceTotalMem);
CUDA_LIBRARY_FIND(cuDeviceGetProperties);
CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
CUDA_LIBRARY_FIND(cuCtxCreate);
CUDA_LIBRARY_FIND(cuCtxDestroy);
CUDA_LIBRARY_FIND(cuCtxAttach);
CUDA_LIBRARY_FIND(cuCtxDetach);
CUDA_LIBRARY_FIND(cuCtxPushCurrent);
CUDA_LIBRARY_FIND(cuCtxPopCurrent);
CUDA_LIBRARY_FIND(cuCtxGetDevice);
CUDA_LIBRARY_FIND(cuCtxSynchronize);
CUDA_LIBRARY_FIND(cuModuleLoad);
CUDA_LIBRARY_FIND(cuModuleLoadData);
CUDA_LIBRARY_FIND(cuModuleUnload);
CUDA_LIBRARY_FIND(cuModuleGetFunction);
CUDA_LIBRARY_FIND(cuModuleGetGlobal);
CUDA_LIBRARY_FIND(cuModuleGetTexRef);
CUDA_LIBRARY_FIND(cuMemGetInfo);
CUDA_LIBRARY_FIND(cuMemAlloc);
CUDA_LIBRARY_FIND(cuMemAllocPitch);
CUDA_LIBRARY_FIND(cuMemFree);
CUDA_LIBRARY_FIND(cuMemGetAddressRange);
CUDA_LIBRARY_FIND(cuMemAllocHost);
CUDA_LIBRARY_FIND(cuMemFreeHost);
CUDA_LIBRARY_FIND(cuMemHostAlloc);
CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer);
CUDA_LIBRARY_FIND(cuMemcpyHtoD);
CUDA_LIBRARY_FIND(cuMemcpyDtoH);
CUDA_LIBRARY_FIND(cuMemcpyDtoD);
CUDA_LIBRARY_FIND(cuMemcpyDtoA);
CUDA_LIBRARY_FIND(cuMemcpyAtoD);
CUDA_LIBRARY_FIND(cuMemcpyHtoA);
CUDA_LIBRARY_FIND(cuMemcpyAtoH);
CUDA_LIBRARY_FIND(cuMemcpyAtoA);
CUDA_LIBRARY_FIND(cuMemcpy2D);
CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned);
CUDA_LIBRARY_FIND(cuMemcpy3D);
CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync);
CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync);
CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync);
CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync);
CUDA_LIBRARY_FIND(cuMemcpy2DAsync);
CUDA_LIBRARY_FIND(cuMemcpy3DAsync);
CUDA_LIBRARY_FIND(cuMemsetD8);
CUDA_LIBRARY_FIND(cuMemsetD16);
CUDA_LIBRARY_FIND(cuMemsetD32);
CUDA_LIBRARY_FIND(cuMemsetD2D8);
CUDA_LIBRARY_FIND(cuMemsetD2D16);
CUDA_LIBRARY_FIND(cuMemsetD2D32);
CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
CUDA_LIBRARY_FIND(cuFuncGetAttribute);
CUDA_LIBRARY_FIND(cuArrayCreate);
CUDA_LIBRARY_FIND(cuArrayGetDescriptor);
CUDA_LIBRARY_FIND(cuArrayDestroy);
CUDA_LIBRARY_FIND(cuArray3DCreate);
CUDA_LIBRARY_FIND(cuArray3DGetDescriptor);
CUDA_LIBRARY_FIND(cuTexRefCreate);
CUDA_LIBRARY_FIND(cuTexRefDestroy);
CUDA_LIBRARY_FIND(cuTexRefSetArray);
CUDA_LIBRARY_FIND(cuTexRefSetAddress);
CUDA_LIBRARY_FIND(cuTexRefSetAddress2D);
CUDA_LIBRARY_FIND(cuTexRefSetFormat);
CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
CUDA_LIBRARY_FIND(cuTexRefSetFlags);
CUDA_LIBRARY_FIND(cuTexRefGetAddress);
CUDA_LIBRARY_FIND(cuTexRefGetArray);
CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
CUDA_LIBRARY_FIND(cuTexRefGetFormat);
CUDA_LIBRARY_FIND(cuTexRefGetFlags);
CUDA_LIBRARY_FIND(cuParamSetSize);
CUDA_LIBRARY_FIND(cuParamSeti);
CUDA_LIBRARY_FIND(cuParamSetf);
CUDA_LIBRARY_FIND(cuParamSetv);
CUDA_LIBRARY_FIND(cuParamSetTexRef);
CUDA_LIBRARY_FIND(cuLaunch);
CUDA_LIBRARY_FIND(cuLaunchGrid);
CUDA_LIBRARY_FIND(cuLaunchGridAsync);
CUDA_LIBRARY_FIND(cuEventCreate);
CUDA_LIBRARY_FIND(cuEventRecord);
CUDA_LIBRARY_FIND(cuEventQuery);
CUDA_LIBRARY_FIND(cuEventSynchronize);
CUDA_LIBRARY_FIND(cuEventDestroy);
CUDA_LIBRARY_FIND(cuEventElapsedTime);
CUDA_LIBRARY_FIND(cuStreamCreate);
CUDA_LIBRARY_FIND(cuStreamQuery);
CUDA_LIBRARY_FIND(cuStreamSynchronize);
CUDA_LIBRARY_FIND(cuStreamDestroy);
/* cuda 2.1 */
CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
CUDA_LIBRARY_FIND(cuGLCtxCreate);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
/* cuda 2.3 */
CUDA_LIBRARY_FIND(cuMemHostGetFlags);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
/* cuda 3.0 */
CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync);
CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer);
CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
CUDA_LIBRARY_FIND(cuGraphicsMapResources);
CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
CUDA_LIBRARY_FIND(cuGetExportTable);
/* cuda 3.1 */
CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
CUDA_LIBRARY_FIND(cuSurfRefSetArray);
CUDA_LIBRARY_FIND(cuSurfRefGetArray);
CUDA_LIBRARY_FIND(cuCtxSetLimit);
CUDA_LIBRARY_FIND(cuCtxGetLimit);
/* functions which changed 3.1 -> 3.2 for 64 bit stuff, the cuda library
* has both the old ones for compatibility and new ones with _v2 postfix,
* we load the _v2 ones here. */
CUDA_LIBRARY_FIND_V2(cuDeviceTotalMem);
CUDA_LIBRARY_FIND_V2(cuCtxCreate);
CUDA_LIBRARY_FIND_V2(cuModuleGetGlobal);
CUDA_LIBRARY_FIND_V2(cuMemGetInfo);
CUDA_LIBRARY_FIND_V2(cuMemAlloc);
CUDA_LIBRARY_FIND_V2(cuMemAllocPitch);
CUDA_LIBRARY_FIND_V2(cuMemFree);
CUDA_LIBRARY_FIND_V2(cuMemGetAddressRange);
CUDA_LIBRARY_FIND_V2(cuMemAllocHost);
CUDA_LIBRARY_FIND_V2(cuMemHostGetDevicePointer);
CUDA_LIBRARY_FIND_V2(cuMemcpyHtoD);
CUDA_LIBRARY_FIND_V2(cuMemcpyDtoH);
CUDA_LIBRARY_FIND_V2(cuMemcpyDtoD);
CUDA_LIBRARY_FIND_V2(cuMemcpyDtoA);
CUDA_LIBRARY_FIND_V2(cuMemcpyAtoD);
CUDA_LIBRARY_FIND_V2(cuMemcpyHtoA);
CUDA_LIBRARY_FIND_V2(cuMemcpyAtoH);
CUDA_LIBRARY_FIND_V2(cuMemcpyAtoA);
CUDA_LIBRARY_FIND_V2(cuMemcpyHtoAAsync);
CUDA_LIBRARY_FIND_V2(cuMemcpyAtoHAsync);
CUDA_LIBRARY_FIND_V2(cuMemcpy2D);
CUDA_LIBRARY_FIND_V2(cuMemcpy2DUnaligned);
CUDA_LIBRARY_FIND_V2(cuMemcpy3D);
CUDA_LIBRARY_FIND_V2(cuMemcpyHtoDAsync);
CUDA_LIBRARY_FIND_V2(cuMemcpyDtoHAsync);
CUDA_LIBRARY_FIND_V2(cuMemcpyDtoDAsync);
CUDA_LIBRARY_FIND_V2(cuMemcpy2DAsync);
CUDA_LIBRARY_FIND_V2(cuMemcpy3DAsync);
CUDA_LIBRARY_FIND_V2(cuMemsetD8);
CUDA_LIBRARY_FIND_V2(cuMemsetD16);
CUDA_LIBRARY_FIND_V2(cuMemsetD32);
CUDA_LIBRARY_FIND_V2(cuMemsetD2D8);
CUDA_LIBRARY_FIND_V2(cuMemsetD2D16);
CUDA_LIBRARY_FIND_V2(cuMemsetD2D32);
CUDA_LIBRARY_FIND_V2(cuArrayCreate);
CUDA_LIBRARY_FIND_V2(cuArrayGetDescriptor);
CUDA_LIBRARY_FIND_V2(cuArray3DCreate);
CUDA_LIBRARY_FIND_V2(cuArray3DGetDescriptor);
CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress);
CUDA_LIBRARY_FIND_V2(cuTexRefSetAddress2D);
CUDA_LIBRARY_FIND_V2(cuTexRefGetAddress);
CUDA_LIBRARY_FIND_V2(cuGraphicsResourceGetMappedPointer);
CUDA_LIBRARY_FIND_V2(cuGLCtxCreate);
/* cuda 4.0 */
CUDA_LIBRARY_FIND(cuCtxSetCurrent);
CUDA_LIBRARY_FIND(cuLaunchKernel);
if(cuHavePrecompiledKernels())
result = true;
#ifndef _WIN32
else if(cuCompilerPath() != "")
result = true;
#endif
return result;
}
bool cuHavePrecompiledKernels()
{
string cubins_path = path_get("lib");
return path_exists(cubins_path);
}
string cuCompilerPath()
{
#ifdef _WIN32
const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
const char *executable = "nvcc.exe";
#else
const char *defaultpaths[] = {
"/Developer/NVIDIA/CUDA-5.0/bin",
"/usr/local/cuda-5.0/bin",
"/usr/local/cuda/bin",
"/Developer/NVIDIA/CUDA-6.0/bin",
"/usr/local/cuda-6.0/bin",
"/Developer/NVIDIA/CUDA-5.5/bin",
"/usr/local/cuda-5.5/bin",
NULL};
const char *executable = "nvcc";
#endif
const char *binpath = getenv("CUDA_BIN_PATH");
string nvcc;
if(binpath) {
nvcc = path_join(binpath, executable);
if(path_exists(nvcc))
return nvcc;
}
for(int i = 0; defaultpaths[i]; i++) {
nvcc = path_join(defaultpaths[i], executable);
if(path_exists(nvcc))
return nvcc;
}
#ifndef _WIN32
{
FILE *handle = popen("which nvcc", "r");
if(handle) {
char buffer[4096] = {0};
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
buffer[len] = '\0';
pclose(handle);
if(buffer[0])
return "nvcc";
}
}
#endif
return "";
}
int cuCompilerVersion()
{
string path = cuCompilerPath();
if(path == "")
return 0;
/* get --version output */
FILE *pipe = popen((path + " --version").c_str(), "r");
if(!pipe) {
fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
return 0;
}
char buf[128];
string output = "";
while(!feof(pipe))
if(fgets(buf, 128, pipe) != NULL)
output += buf;
pclose(pipe);
/* parse version number */
string marker = "Cuda compilation tools, release ";
size_t offset = output.find(marker);
if(offset == string::npos) {
fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output.c_str());
return 0;
}
string versionstr = output.substr(offset + marker.size(), string::npos);
int major, minor;
if(sscanf(versionstr.c_str(), "%d.%d", &major, &minor) < 2) {
fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output.c_str());
return 0;
}
return 10*major + minor;
}
CCL_NAMESPACE_END

View File

@ -1,636 +0,0 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifndef __UTIL_CUDA_H__
#define __UTIL_CUDA_H__
#include <stdlib.h>
#include "util_opengl.h"
#include "util_string.h"
CCL_NAMESPACE_BEGIN
/* CUDA is linked in dynamically at runtime, so we can start the application
* without requiring a CUDA installation. Code adapted from the example
* matrixMulDynlinkJIT in the CUDA SDK. */
bool cuLibraryInit();
bool cuHavePrecompiledKernels();
string cuCompilerPath();
int cuCompilerVersion();
CCL_NAMESPACE_END
/* defines, structs, enums */
#define CUDA_VERSION 3020
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined(__LP64__)
typedef unsigned long long CUdeviceptr;
#else
typedef unsigned int CUdeviceptr;
#endif
typedef int CUdevice;
typedef struct CUctx_st *CUcontext;
typedef struct CUmod_st *CUmodule;
typedef struct CUfunc_st *CUfunction;
typedef struct CUarray_st *CUarray;
typedef struct CUtexref_st *CUtexref;
typedef struct CUsurfref_st *CUsurfref;
typedef struct CUevent_st *CUevent;
typedef struct CUstream_st *CUstream;
typedef struct CUgraphicsResource_st *CUgraphicsResource;
typedef struct CUuuid_st {
char bytes[16];
} CUuuid;
typedef enum CUctx_flags_enum {
CU_CTX_SCHED_AUTO = 0,
CU_CTX_SCHED_SPIN = 1,
CU_CTX_SCHED_YIELD = 2,
CU_CTX_SCHED_MASK = 0x3,
CU_CTX_BLOCKING_SYNC = 4,
CU_CTX_MAP_HOST = 8,
CU_CTX_LMEM_RESIZE_TO_MAX = 16,
CU_CTX_FLAGS_MASK = 0x1f
} CUctx_flags;
typedef enum CUevent_flags_enum {
CU_EVENT_DEFAULT = 0,
CU_EVENT_BLOCKING_SYNC = 1,
CU_EVENT_DISABLE_TIMING = 2
} CUevent_flags;
typedef enum CUarray_format_enum {
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
CU_AD_FORMAT_SIGNED_INT8 = 0x08,
CU_AD_FORMAT_SIGNED_INT16 = 0x09,
CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
CU_AD_FORMAT_HALF = 0x10,
CU_AD_FORMAT_FLOAT = 0x20
} CUarray_format;
typedef enum CUaddress_mode_enum {
CU_TR_ADDRESS_MODE_WRAP = 0,
CU_TR_ADDRESS_MODE_CLAMP = 1,
CU_TR_ADDRESS_MODE_MIRROR = 2,
CU_TR_ADDRESS_MODE_BORDER = 3
} CUaddress_mode;
typedef enum CUfilter_mode_enum {
CU_TR_FILTER_MODE_POINT = 0,
CU_TR_FILTER_MODE_LINEAR = 1
} CUfilter_mode;
typedef enum CUdevice_attribute_enum {
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
} CUdevice_attribute;
typedef struct CUdevprop_st {
int maxThreadsPerBlock;
int maxThreadsDim[3];
int maxGridSize[3];
int sharedMemPerBlock;
int totalConstantMemory;
int SIMDWidth;
int memPitch;
int regsPerBlock;
int clockRate;
int textureAlign;
} CUdevprop;
typedef enum CUfunction_attribute_enum {
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
CU_FUNC_ATTRIBUTE_MAX
} CUfunction_attribute;
typedef enum CUfunc_cache_enum {
CU_FUNC_CACHE_PREFER_NONE = 0x00,
CU_FUNC_CACHE_PREFER_SHARED = 0x01,
CU_FUNC_CACHE_PREFER_L1 = 0x02
} CUfunc_cache;
typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 0x01,
CU_MEMORYTYPE_DEVICE = 0x02,
CU_MEMORYTYPE_ARRAY = 0x03
} CUmemorytype;
typedef enum CUcomputemode_enum {
CU_COMPUTEMODE_DEFAULT = 0,
CU_COMPUTEMODE_EXCLUSIVE = 1,
CU_COMPUTEMODE_PROHIBITED = 2
} CUcomputemode;
typedef enum CUjit_option_enum
{
CU_JIT_MAX_REGISTERS = 0,
CU_JIT_THREADS_PER_BLOCK,
CU_JIT_WALL_TIME,
CU_JIT_INFO_LOG_BUFFER,
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
CU_JIT_ERROR_LOG_BUFFER,
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
CU_JIT_OPTIMIZATION_LEVEL,
CU_JIT_TARGET_FROM_CUCONTEXT,
CU_JIT_TARGET,
CU_JIT_FALLBACK_STRATEGY
} CUjit_option;
typedef enum CUjit_target_enum
{
CU_TARGET_COMPUTE_10 = 0,
CU_TARGET_COMPUTE_11,
CU_TARGET_COMPUTE_12,
CU_TARGET_COMPUTE_13,
CU_TARGET_COMPUTE_20,
CU_TARGET_COMPUTE_21,
CU_TARGET_COMPUTE_30,
CU_TARGET_COMPUTE_35,
CU_TARGET_COMPUTE_50
} CUjit_target;
typedef enum CUjit_fallback_enum
{
CU_PREFER_PTX = 0,
CU_PREFER_BINARY
} CUjit_fallback;
typedef enum CUgraphicsRegisterFlags_enum {
CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00
} CUgraphicsRegisterFlags;
typedef enum CUgraphicsMapResourceFlags_enum {
CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
} CUgraphicsMapResourceFlags;
typedef enum CUarray_cubemap_face_enum {
CU_CUBEMAP_FACE_POSITIVE_X = 0x00,
CU_CUBEMAP_FACE_NEGATIVE_X = 0x01,
CU_CUBEMAP_FACE_POSITIVE_Y = 0x02,
CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03,
CU_CUBEMAP_FACE_POSITIVE_Z = 0x04,
CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05
} CUarray_cubemap_face;
typedef enum CUlimit_enum {
CU_LIMIT_STACK_SIZE = 0x00,
CU_LIMIT_PRINTF_FIFO_SIZE = 0x01,
CU_LIMIT_MALLOC_HEAP_SIZE = 0x02
} CUlimit;
typedef enum cudaError_enum {
CUDA_SUCCESS = 0,
CUDA_ERROR_INVALID_VALUE = 1,
CUDA_ERROR_OUT_OF_MEMORY = 2,
CUDA_ERROR_NOT_INITIALIZED = 3,
CUDA_ERROR_DEINITIALIZED = 4,
CUDA_ERROR_NO_DEVICE = 100,
CUDA_ERROR_INVALID_DEVICE = 101,
CUDA_ERROR_INVALID_IMAGE = 200,
CUDA_ERROR_INVALID_CONTEXT = 201,
CUDA_ERROR_MAP_FAILED = 205,
CUDA_ERROR_UNMAP_FAILED = 206,
CUDA_ERROR_ARRAY_IS_MAPPED = 207,
CUDA_ERROR_ALREADY_MAPPED = 208,
CUDA_ERROR_NO_BINARY_FOR_GPU = 209,
CUDA_ERROR_ALREADY_ACQUIRED = 210,
CUDA_ERROR_NOT_MAPPED = 211,
CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,
CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,
CUDA_ERROR_ECC_UNCORRECTABLE = 214,
CUDA_ERROR_UNSUPPORTED_LIMIT = 215,
CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
CUDA_ERROR_INVALID_PTX = 218,
CUDA_ERROR_INVALID_SOURCE = 300,
CUDA_ERROR_FILE_NOT_FOUND = 301,
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,
CUDA_ERROR_OPERATING_SYSTEM = 304,
CUDA_ERROR_INVALID_HANDLE = 400,
CUDA_ERROR_NOT_FOUND = 500,
CUDA_ERROR_NOT_READY = 600,
CUDA_ERROR_ILLEGAL_ADDRESS = 700,
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
CUDA_ERROR_LAUNCH_TIMEOUT = 702,
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
CUDA_ERROR_HARDWARE_STACK_ERROR = 714,
CUDA_ERROR_ILLEGAL_INSTRUCTION = 715,
CUDA_ERROR_MISALIGNED_ADDRESS = 716,
CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,
CUDA_ERROR_INVALID_PC = 718,
CUDA_ERROR_LAUNCH_FAILED = 719,
CUDA_ERROR_NOT_PERMITTED = 800,
CUDA_ERROR_NOT_SUPPORTED = 801,
CUDA_ERROR_UNKNOWN = 999
} CUresult;
#define CU_MEMHOSTALLOC_PORTABLE 0x01
#define CU_MEMHOSTALLOC_DEVICEMAP 0x02
#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
typedef struct CUDA_MEMCPY2D_st {
size_t srcXInBytes;
size_t srcY;
CUmemorytype srcMemoryType;
const void *srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
size_t srcPitch;
size_t dstXInBytes;
size_t dstY;
CUmemorytype dstMemoryType;
void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
size_t dstPitch;
size_t WidthInBytes;
size_t Height;
} CUDA_MEMCPY2D;
typedef struct CUDA_MEMCPY3D_st {
size_t srcXInBytes;
size_t srcY;
size_t srcZ;
size_t srcLOD;
CUmemorytype srcMemoryType;
const void *srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
void *reserved0;
size_t srcPitch;
size_t srcHeight;
size_t dstXInBytes;
size_t dstY;
size_t dstZ;
size_t dstLOD;
CUmemorytype dstMemoryType;
void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
void *reserved1;
size_t dstPitch;
size_t dstHeight;
size_t WidthInBytes;
size_t Height;
size_t Depth;
} CUDA_MEMCPY3D;
typedef struct CUDA_ARRAY_DESCRIPTOR_st
{
size_t Width;
size_t Height;
CUarray_format Format;
unsigned int NumChannels;
} CUDA_ARRAY_DESCRIPTOR;
typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
{
size_t Width;
size_t Height;
size_t Depth;
CUarray_format Format;
unsigned int NumChannels;
unsigned int Flags;
} CUDA_ARRAY3D_DESCRIPTOR;
#define CUDA_ARRAY3D_2DARRAY 0x01
#define CUDA_ARRAY3D_SURFACE_LDST 0x02
#define CU_TRSA_OVERRIDE_FORMAT 0x01
#define CU_TRSF_READ_AS_INTEGER 0x01
#define CU_TRSF_NORMALIZED_COORDINATES 0x02
#define CU_TRSF_SRGB 0x10
#define CU_PARAM_TR_DEFAULT -1
#ifdef _WIN32
#define CUDAAPI __stdcall
#else
#define CUDAAPI
#endif
/* function types */
typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
typedef CUresult CUDAAPI tcuDriverGetVersion(int *driverVersion);
typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceTotalMem(size_t *bytes, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
typedef CUresult CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev);
typedef CUresult CUDAAPI tcuCtxDestroy(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxAttach(CUcontext *pctx, unsigned int flags);
typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxPushCurrent(CUcontext ctx );
typedef CUresult CUDAAPI tcuCtxPopCurrent(CUcontext *pctx);
typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device);
typedef CUresult CUDAAPI tcuCtxSynchronize(void);
typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
typedef CUresult CUDAAPI tcuCtxGetLimit(size_t *pvalue, CUlimit limit);
typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache *pconfig);
typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int *version);
typedef CUresult CUDAAPI tcuModuleLoad(CUmodule *module, const char *fname);
typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule *module, const void *image);
typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule *module, const void *fatCubin);
typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name);
typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name);
typedef CUresult CUDAAPI tcuMemGetInfo(size_t *free, size_t *total);
typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, size_t bytesize);
typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
typedef CUresult CUDAAPI tcuMemFree(CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, size_t bytesize);
typedef CUresult CUDAAPI tcuMemFreeHost(void *p);
typedef CUresult CUDAAPI tcuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags);
typedef CUresult CUDAAPI tcuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags);
typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int *pFlags, void *p);
typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
typedef CUresult CUDAAPI tcuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
typedef CUresult CUDAAPI tcuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N);
typedef CUresult CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N);
typedef CUresult CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N);
typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
typedef CUresult CUDAAPI tcuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
typedef CUresult CUDAAPI tcuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
typedef CUresult CUDAAPI tcuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
typedef CUresult CUDAAPI tcuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int Flags);
typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
typedef CUresult CUDAAPI tcuStreamDestroy(CUstream hStream);
typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int Flags);
typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventDestroy(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
typedef CUresult CUDAAPI tcuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc);
typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes);
typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
typedef CUresult CUDAAPI tcuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
typedef CUresult CUDAAPI tcuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim);
typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref *pTexRef);
typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags);
typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId);
typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device );
typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, unsigned sharedMemBytes, CUstream hStream, void* kernelParams, void* extra);
/* function declarations */
extern tcuInit *cuInit;
extern tcuDriverGetVersion *cuDriverGetVersion;
extern tcuDeviceGet *cuDeviceGet;
extern tcuDeviceGetCount *cuDeviceGetCount;
extern tcuDeviceGetName *cuDeviceGetName;
extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
extern tcuDeviceTotalMem *cuDeviceTotalMem;
extern tcuDeviceGetProperties *cuDeviceGetProperties;
extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
extern tcuCtxCreate *cuCtxCreate;
extern tcuCtxDestroy *cuCtxDestroy;
extern tcuCtxAttach *cuCtxAttach;
extern tcuCtxDetach *cuCtxDetach;
extern tcuCtxPushCurrent *cuCtxPushCurrent;
extern tcuCtxPopCurrent *cuCtxPopCurrent;
extern tcuCtxGetDevice *cuCtxGetDevice;
extern tcuCtxSynchronize *cuCtxSynchronize;
extern tcuModuleLoad *cuModuleLoad;
extern tcuModuleLoadData *cuModuleLoadData;
extern tcuModuleLoadDataEx *cuModuleLoadDataEx;
extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
extern tcuModuleUnload *cuModuleUnload;
extern tcuModuleGetFunction *cuModuleGetFunction;
extern tcuModuleGetGlobal *cuModuleGetGlobal;
extern tcuModuleGetTexRef *cuModuleGetTexRef;
extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
extern tcuMemGetInfo *cuMemGetInfo;
extern tcuMemAlloc *cuMemAlloc;
extern tcuMemAllocPitch *cuMemAllocPitch;
extern tcuMemFree *cuMemFree;
extern tcuMemGetAddressRange *cuMemGetAddressRange;
extern tcuMemAllocHost *cuMemAllocHost;
extern tcuMemFreeHost *cuMemFreeHost;
extern tcuMemHostAlloc *cuMemHostAlloc;
extern tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
extern tcuMemHostGetFlags *cuMemHostGetFlags;
extern tcuMemcpyHtoD *cuMemcpyHtoD;
extern tcuMemcpyDtoH *cuMemcpyDtoH;
extern tcuMemcpyDtoD *cuMemcpyDtoD;
extern tcuMemcpyDtoA *cuMemcpyDtoA;
extern tcuMemcpyAtoD *cuMemcpyAtoD;
extern tcuMemcpyHtoA *cuMemcpyHtoA;
extern tcuMemcpyAtoH *cuMemcpyAtoH;
extern tcuMemcpyAtoA *cuMemcpyAtoA;
extern tcuMemcpy2D *cuMemcpy2D;
extern tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
extern tcuMemcpy3D *cuMemcpy3D;
extern tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
extern tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
extern tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
extern tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
extern tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
extern tcuMemcpy2DAsync *cuMemcpy2DAsync;
extern tcuMemcpy3DAsync *cuMemcpy3DAsync;
extern tcuMemsetD8 *cuMemsetD8;
extern tcuMemsetD16 *cuMemsetD16;
extern tcuMemsetD32 *cuMemsetD32;
extern tcuMemsetD2D8 *cuMemsetD2D8;
extern tcuMemsetD2D16 *cuMemsetD2D16;
extern tcuMemsetD2D32 *cuMemsetD2D32;
extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
extern tcuFuncGetAttribute *cuFuncGetAttribute;
extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
extern tcuArrayCreate *cuArrayCreate;
extern tcuArrayGetDescriptor *cuArrayGetDescriptor;
extern tcuArrayDestroy *cuArrayDestroy;
extern tcuArray3DCreate *cuArray3DCreate;
extern tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
extern tcuTexRefCreate *cuTexRefCreate;
extern tcuTexRefDestroy *cuTexRefDestroy;
extern tcuTexRefSetArray *cuTexRefSetArray;
extern tcuTexRefSetAddress *cuTexRefSetAddress;
extern tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
extern tcuTexRefSetFormat *cuTexRefSetFormat;
extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
extern tcuTexRefSetFlags *cuTexRefSetFlags;
extern tcuTexRefGetAddress *cuTexRefGetAddress;
extern tcuTexRefGetArray *cuTexRefGetArray;
extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
extern tcuTexRefGetFormat *cuTexRefGetFormat;
extern tcuTexRefGetFlags *cuTexRefGetFlags;
extern tcuSurfRefSetArray *cuSurfRefSetArray;
extern tcuSurfRefGetArray *cuSurfRefGetArray;
extern tcuParamSetSize *cuParamSetSize;
extern tcuParamSeti *cuParamSeti;
extern tcuParamSetf *cuParamSetf;
extern tcuParamSetv *cuParamSetv;
extern tcuParamSetTexRef *cuParamSetTexRef;
extern tcuLaunch *cuLaunch;
extern tcuLaunchGrid *cuLaunchGrid;
extern tcuLaunchGridAsync *cuLaunchGridAsync;
extern tcuEventCreate *cuEventCreate;
extern tcuEventRecord *cuEventRecord;
extern tcuEventQuery *cuEventQuery;
extern tcuEventSynchronize *cuEventSynchronize;
extern tcuEventDestroy *cuEventDestroy;
extern tcuEventElapsedTime *cuEventElapsedTime;
extern tcuStreamCreate *cuStreamCreate;
extern tcuStreamQuery *cuStreamQuery;
extern tcuStreamSynchronize *cuStreamSynchronize;
extern tcuStreamDestroy *cuStreamDestroy;
extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
extern tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
extern tcuGraphicsMapResources *cuGraphicsMapResources;
extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
extern tcuGetExportTable *cuGetExportTable;
extern tcuCtxSetLimit *cuCtxSetLimit;
extern tcuCtxGetLimit *cuCtxGetLimit;
extern tcuGLCtxCreate *cuGLCtxCreate;
extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
extern tcuCtxSetCurrent *cuCtxSetCurrent;
extern tcuLaunchKernel *cuLaunchKernel;
#endif /* __UTIL_CUDA_H__ */

View File

@ -1,337 +0,0 @@
//////////////////////////////////////////////////////////////////////////
// Copyright (c) 2009 Organic Vectory B.V.
// Written by George van Venrooij
//
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file doc/license/Boost.txt)
// Extracted from the CLCC project - http://clcc.sourceforge.net/
//////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include "util_opencl.h"
#ifndef CLCC_GENERATE_DOCUMENTATION
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# define VC_EXTRALEAN
# include <windows.h>
typedef HMODULE CLCC_DYNLIB_HANDLE;
# define CLCC_DYNLIB_OPEN LoadLibrary
# define CLCC_DYNLIB_CLOSE FreeLibrary
# define CLCC_DYNLIB_IMPORT GetProcAddress
#else
# include <dlfcn.h>
typedef void* CLCC_DYNLIB_HANDLE;
# define CLCC_DYNLIB_OPEN(path) dlopen(path, RTLD_NOW | RTLD_GLOBAL)
# define CLCC_DYNLIB_CLOSE dlclose
# define CLCC_DYNLIB_IMPORT dlsym
#endif
#else
// typedef implementation_defined CLCC_DYNLIB_HANDLE;
//# define CLCC_DYNLIB_OPEN(path) implementation_defined
//# define CLCC_DYNLIB_CLOSE implementation_defined
//# define CLCC_DYNLIB_IMPORT implementation_defined
#endif
CCL_NAMESPACE_BEGIN
//! \brief module handle
static CLCC_DYNLIB_HANDLE module = NULL;
// Variables holding function entry points
#ifndef CLCC_GENERATE_DOCUMENTATION
PFNCLGETPLATFORMIDS __clewGetPlatformIDs = NULL;
PFNCLGETPLATFORMINFO __clewGetPlatformInfo = NULL;
PFNCLGETDEVICEIDS __clewGetDeviceIDs = NULL;
PFNCLGETDEVICEINFO __clewGetDeviceInfo = NULL;
PFNCLCREATECONTEXT __clewCreateContext = NULL;
PFNCLCREATECONTEXTFROMTYPE __clewCreateContextFromType = NULL;
PFNCLRETAINCONTEXT __clewRetainContext = NULL;
PFNCLRELEASECONTEXT __clewReleaseContext = NULL;
PFNCLGETCONTEXTINFO __clewGetContextInfo = NULL;
PFNCLCREATECOMMANDQUEUE __clewCreateCommandQueue = NULL;
PFNCLRETAINCOMMANDQUEUE __clewRetainCommandQueue = NULL;
PFNCLRELEASECOMMANDQUEUE __clewReleaseCommandQueue = NULL;
PFNCLGETCOMMANDQUEUEINFO __clewGetCommandQueueInfo = NULL;
PFNCLSETCOMMANDQUEUEPROPERTY __clewSetCommandQueueProperty = NULL;
PFNCLCREATEBUFFER __clewCreateBuffer = NULL;
PFNCLCREATEIMAGE2D __clewCreateImage2D = NULL;
PFNCLCREATEIMAGE3D __clewCreateImage3D = NULL;
PFNCLRETAINMEMOBJECT __clewRetainMemObject = NULL;
PFNCLRELEASEMEMOBJECT __clewReleaseMemObject = NULL;
PFNCLGETSUPPORTEDIMAGEFORMATS __clewGetSupportedImageFormats = NULL;
PFNCLGETMEMOBJECTINFO __clewGetMemObjectInfo = NULL;
PFNCLGETIMAGEINFO __clewGetImageInfo = NULL;
PFNCLCREATESAMPLER __clewCreateSampler = NULL;
PFNCLRETAINSAMPLER __clewRetainSampler = NULL;
PFNCLRELEASESAMPLER __clewReleaseSampler = NULL;
PFNCLGETSAMPLERINFO __clewGetSamplerInfo = NULL;
PFNCLCREATEPROGRAMWITHSOURCE __clewCreateProgramWithSource = NULL;
PFNCLCREATEPROGRAMWITHBINARY __clewCreateProgramWithBinary = NULL;
PFNCLRETAINPROGRAM __clewRetainProgram = NULL;
PFNCLRELEASEPROGRAM __clewReleaseProgram = NULL;
PFNCLBUILDPROGRAM __clewBuildProgram = NULL;
PFNCLUNLOADCOMPILER __clewUnloadCompiler = NULL;
PFNCLGETPROGRAMINFO __clewGetProgramInfo = NULL;
PFNCLGETPROGRAMBUILDINFO __clewGetProgramBuildInfo = NULL;
PFNCLCREATEKERNEL __clewCreateKernel = NULL;
PFNCLCREATEKERNELSINPROGRAM __clewCreateKernelsInProgram = NULL;
PFNCLRETAINKERNEL __clewRetainKernel = NULL;
PFNCLRELEASEKERNEL __clewReleaseKernel = NULL;
PFNCLSETKERNELARG __clewSetKernelArg = NULL;
PFNCLGETKERNELINFO __clewGetKernelInfo = NULL;
PFNCLGETKERNELWORKGROUPINFO __clewGetKernelWorkGroupInfo = NULL;
PFNCLWAITFOREVENTS __clewWaitForEvents = NULL;
PFNCLGETEVENTINFO __clewGetEventInfo = NULL;
PFNCLRETAINEVENT __clewRetainEvent = NULL;
PFNCLRELEASEEVENT __clewReleaseEvent = NULL;
PFNCLGETEVENTPROFILINGINFO __clewGetEventProfilingInfo = NULL;
PFNCLFLUSH __clewFlush = NULL;
PFNCLFINISH __clewFinish = NULL;
PFNCLENQUEUEREADBUFFER __clewEnqueueReadBuffer = NULL;
PFNCLENQUEUEWRITEBUFFER __clewEnqueueWriteBuffer = NULL;
PFNCLENQUEUECOPYBUFFER __clewEnqueueCopyBuffer = NULL;
PFNCLENQUEUEREADIMAGE __clewEnqueueReadImage = NULL;
PFNCLENQUEUEWRITEIMAGE __clewEnqueueWriteImage = NULL;
PFNCLENQUEUECOPYIMAGE __clewEnqueueCopyImage = NULL;
PFNCLENQUEUECOPYIMAGETOBUFFER __clewEnqueueCopyImageToBuffer = NULL;
PFNCLENQUEUECOPYBUFFERTOIMAGE __clewEnqueueCopyBufferToImage = NULL;
PFNCLENQUEUEMAPBUFFER __clewEnqueueMapBuffer = NULL;
PFNCLENQUEUEMAPIMAGE __clewEnqueueMapImage = NULL;
PFNCLENQUEUEUNMAPMEMOBJECT __clewEnqueueUnmapMemObject = NULL;
PFNCLENQUEUENDRANGEKERNEL __clewEnqueueNDRangeKernel = NULL;
PFNCLENQUEUETASK __clewEnqueueTask = NULL;
PFNCLENQUEUENATIVEKERNEL __clewEnqueueNativeKernel = NULL;
PFNCLENQUEUEMARKER __clewEnqueueMarker = NULL;
PFNCLENQUEUEWAITFOREVENTS __clewEnqueueWaitForEvents = NULL;
PFNCLENQUEUEBARRIER __clewEnqueueBarrier = NULL;
PFNCLGETEXTENSIONFUNCTIONADDRESS __clewGetExtensionFunctionAddress = NULL;
#endif // CLCC_GENERATE_DOCUMENTATION
#if 0
//! \brief Unloads OpenCL dynamic library, should not be called directly
static void clewExit(void)
{
if (module != NULL)
{
// Ignore errors
CLCC_DYNLIB_CLOSE(module);
module = NULL;
}
}
#endif
//! \param path path to dynamic library to load
//! \return CLEW_ERROR_OPEN_FAILED if the library could not be opened
//! CLEW_ERROR_ATEXIT_FAILED if atexit(clewExit) failed
//! CLEW_SUCCESS when the library was succesfully loaded
int clLibraryInit()
{
#ifdef _WIN32
const char *path = "OpenCL.dll";
#elif defined(__APPLE__)
const char *path = "/Library/Frameworks/OpenCL.framework/OpenCL";
#else
const char *path = "libOpenCL.so";
#endif
// OpenCL disabled for now, only works with this environment variable set
if(!getenv("CYCLES_OPENCL_TEST"))
return 0;
// Check if already initialized
if (module != NULL)
{
return 1;
}
// Load library
module = CLCC_DYNLIB_OPEN(path);
// Check for errors
if (module == NULL)
{
return 0;
}
// Disabled because we retain OpenCL context and it's difficult to ensure
// this will exit after releasing the context
#if 0
// Set unloading
int error = atexit(clewExit);
if (error)
{
// Failure queing atexit, shutdown with error
CLCC_DYNLIB_CLOSE(module);
module = NULL;
return 0;
}
#endif
// Determine function entry-points
__clewGetPlatformIDs = (PFNCLGETPLATFORMIDS )CLCC_DYNLIB_IMPORT(module, "clGetPlatformIDs");
__clewGetPlatformInfo = (PFNCLGETPLATFORMINFO )CLCC_DYNLIB_IMPORT(module, "clGetPlatformInfo");
__clewGetDeviceIDs = (PFNCLGETDEVICEIDS )CLCC_DYNLIB_IMPORT(module, "clGetDeviceIDs");
__clewGetDeviceInfo = (PFNCLGETDEVICEINFO )CLCC_DYNLIB_IMPORT(module, "clGetDeviceInfo");
__clewCreateContext = (PFNCLCREATECONTEXT )CLCC_DYNLIB_IMPORT(module, "clCreateContext");
__clewCreateContextFromType = (PFNCLCREATECONTEXTFROMTYPE )CLCC_DYNLIB_IMPORT(module, "clCreateContextFromType");
__clewRetainContext = (PFNCLRETAINCONTEXT )CLCC_DYNLIB_IMPORT(module, "clRetainContext");
__clewReleaseContext = (PFNCLRELEASECONTEXT )CLCC_DYNLIB_IMPORT(module, "clReleaseContext");
__clewGetContextInfo = (PFNCLGETCONTEXTINFO )CLCC_DYNLIB_IMPORT(module, "clGetContextInfo");
__clewCreateCommandQueue = (PFNCLCREATECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clCreateCommandQueue");
__clewRetainCommandQueue = (PFNCLRETAINCOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clRetainCommandQueue");
__clewReleaseCommandQueue = (PFNCLRELEASECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clReleaseCommandQueue");
__clewGetCommandQueueInfo = (PFNCLGETCOMMANDQUEUEINFO )CLCC_DYNLIB_IMPORT(module, "clGetCommandQueueInfo");
__clewSetCommandQueueProperty = (PFNCLSETCOMMANDQUEUEPROPERTY )CLCC_DYNLIB_IMPORT(module, "clSetCommandQueueProperty");
__clewCreateBuffer = (PFNCLCREATEBUFFER )CLCC_DYNLIB_IMPORT(module, "clCreateBuffer");
__clewCreateImage2D = (PFNCLCREATEIMAGE2D )CLCC_DYNLIB_IMPORT(module, "clCreateImage2D");
__clewCreateImage3D = (PFNCLCREATEIMAGE3D )CLCC_DYNLIB_IMPORT(module, "clCreateImage3D");
__clewRetainMemObject = (PFNCLRETAINMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clRetainMemObject");
__clewReleaseMemObject = (PFNCLRELEASEMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clReleaseMemObject");
__clewGetSupportedImageFormats = (PFNCLGETSUPPORTEDIMAGEFORMATS )CLCC_DYNLIB_IMPORT(module, "clGetSupportedImageFormats");
__clewGetMemObjectInfo = (PFNCLGETMEMOBJECTINFO )CLCC_DYNLIB_IMPORT(module, "clGetMemObjectInfo");
__clewGetImageInfo = (PFNCLGETIMAGEINFO )CLCC_DYNLIB_IMPORT(module, "clGetImageInfo");
__clewCreateSampler = (PFNCLCREATESAMPLER )CLCC_DYNLIB_IMPORT(module, "clCreateSampler");
__clewRetainSampler = (PFNCLRETAINSAMPLER )CLCC_DYNLIB_IMPORT(module, "clRetainSampler");
__clewReleaseSampler = (PFNCLRELEASESAMPLER )CLCC_DYNLIB_IMPORT(module, "clReleaseSampler");
__clewGetSamplerInfo = (PFNCLGETSAMPLERINFO )CLCC_DYNLIB_IMPORT(module, "clGetSamplerInfo");
__clewCreateProgramWithSource = (PFNCLCREATEPROGRAMWITHSOURCE )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithSource");
__clewCreateProgramWithBinary = (PFNCLCREATEPROGRAMWITHBINARY )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithBinary");
__clewRetainProgram = (PFNCLRETAINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clRetainProgram");
__clewReleaseProgram = (PFNCLRELEASEPROGRAM )CLCC_DYNLIB_IMPORT(module, "clReleaseProgram");
__clewBuildProgram = (PFNCLBUILDPROGRAM )CLCC_DYNLIB_IMPORT(module, "clBuildProgram");
__clewUnloadCompiler = (PFNCLUNLOADCOMPILER )CLCC_DYNLIB_IMPORT(module, "clUnloadCompiler");
__clewGetProgramInfo = (PFNCLGETPROGRAMINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramInfo");
__clewGetProgramBuildInfo = (PFNCLGETPROGRAMBUILDINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramBuildInfo");
__clewCreateKernel = (PFNCLCREATEKERNEL )CLCC_DYNLIB_IMPORT(module, "clCreateKernel");
__clewCreateKernelsInProgram = (PFNCLCREATEKERNELSINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clCreateKernelsInProgram");
__clewRetainKernel = (PFNCLRETAINKERNEL )CLCC_DYNLIB_IMPORT(module, "clRetainKernel");
__clewReleaseKernel = (PFNCLRELEASEKERNEL )CLCC_DYNLIB_IMPORT(module, "clReleaseKernel");
__clewSetKernelArg = (PFNCLSETKERNELARG )CLCC_DYNLIB_IMPORT(module, "clSetKernelArg");
__clewGetKernelInfo = (PFNCLGETKERNELINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelInfo");
__clewGetKernelWorkGroupInfo = (PFNCLGETKERNELWORKGROUPINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelWorkGroupInfo");
__clewWaitForEvents = (PFNCLWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clWaitForEvents");
__clewGetEventInfo = (PFNCLGETEVENTINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventInfo");
__clewRetainEvent = (PFNCLRETAINEVENT )CLCC_DYNLIB_IMPORT(module, "clRetainEvent");
__clewReleaseEvent = (PFNCLRELEASEEVENT )CLCC_DYNLIB_IMPORT(module, "clReleaseEvent");
__clewGetEventProfilingInfo = (PFNCLGETEVENTPROFILINGINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventProfilingInfo");
__clewFlush = (PFNCLFLUSH )CLCC_DYNLIB_IMPORT(module, "clFlush");
__clewFinish = (PFNCLFINISH )CLCC_DYNLIB_IMPORT(module, "clFinish");
__clewEnqueueReadBuffer = (PFNCLENQUEUEREADBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadBuffer");
__clewEnqueueWriteBuffer = (PFNCLENQUEUEWRITEBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteBuffer");
__clewEnqueueCopyBuffer = (PFNCLENQUEUECOPYBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBuffer");
__clewEnqueueReadImage = (PFNCLENQUEUEREADIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadImage");
__clewEnqueueWriteImage = (PFNCLENQUEUEWRITEIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteImage");
__clewEnqueueCopyImage = (PFNCLENQUEUECOPYIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImage");
__clewEnqueueCopyImageToBuffer = (PFNCLENQUEUECOPYIMAGETOBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImageToBuffer");
__clewEnqueueCopyBufferToImage = (PFNCLENQUEUECOPYBUFFERTOIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBufferToImage");
__clewEnqueueMapBuffer = (PFNCLENQUEUEMAPBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapBuffer");
__clewEnqueueMapImage = (PFNCLENQUEUEMAPIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapImage");
__clewEnqueueUnmapMemObject = (PFNCLENQUEUEUNMAPMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clEnqueueUnmapMemObject");
__clewEnqueueNDRangeKernel = (PFNCLENQUEUENDRANGEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNDRangeKernel");
__clewEnqueueTask = (PFNCLENQUEUETASK )CLCC_DYNLIB_IMPORT(module, "clEnqueueTask");
__clewEnqueueNativeKernel = (PFNCLENQUEUENATIVEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNativeKernel");
__clewEnqueueMarker = (PFNCLENQUEUEMARKER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMarker");
__clewEnqueueWaitForEvents = (PFNCLENQUEUEWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clEnqueueWaitForEvents");
__clewEnqueueBarrier = (PFNCLENQUEUEBARRIER )CLCC_DYNLIB_IMPORT(module, "clEnqueueBarrier");
__clewGetExtensionFunctionAddress = (PFNCLGETEXTENSIONFUNCTIONADDRESS )CLCC_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddress");
if(__clewGetPlatformIDs == NULL) return 0;
if(__clewGetPlatformInfo == NULL) return 0;
if(__clewGetDeviceIDs == NULL) return 0;
if(__clewGetDeviceInfo == NULL) return 0;
return 1;
}
//! \param error CL error code
//! \return a string representation of the error code
const char *clErrorString(cl_int error)
{
static const char* strings[] =
{
// Error Codes
"CL_SUCCESS" // 0
, "CL_DEVICE_NOT_FOUND" // -1
, "CL_DEVICE_NOT_AVAILABLE" // -2
, "CL_COMPILER_NOT_AVAILABLE" // -3
, "CL_MEM_OBJECT_ALLOCATION_FAILURE" // -4
, "CL_OUT_OF_RESOURCES" // -5
, "CL_OUT_OF_HOST_MEMORY" // -6
, "CL_PROFILING_INFO_NOT_AVAILABLE" // -7
, "CL_MEM_COPY_OVERLAP" // -8
, "CL_IMAGE_FORMAT_MISMATCH" // -9
, "CL_IMAGE_FORMAT_NOT_SUPPORTED" // -10
, "CL_BUILD_PROGRAM_FAILURE" // -11
, "CL_MAP_FAILURE" // -12
, "" // -13
, "" // -14
, "" // -15
, "" // -16
, "" // -17
, "" // -18
, "" // -19
, "" // -20
, "" // -21
, "" // -22
, "" // -23
, "" // -24
, "" // -25
, "" // -26
, "" // -27
, "" // -28
, "" // -29
, "CL_INVALID_VALUE" // -30
, "CL_INVALID_DEVICE_TYPE" // -31
, "CL_INVALID_PLATFORM" // -32
, "CL_INVALID_DEVICE" // -33
, "CL_INVALID_CONTEXT" // -34
, "CL_INVALID_QUEUE_PROPERTIES" // -35
, "CL_INVALID_COMMAND_QUEUE" // -36
, "CL_INVALID_HOST_PTR" // -37
, "CL_INVALID_MEM_OBJECT" // -38
, "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" // -39
, "CL_INVALID_IMAGE_SIZE" // -40
, "CL_INVALID_SAMPLER" // -41
, "CL_INVALID_BINARY" // -42
, "CL_INVALID_BUILD_OPTIONS" // -43
, "CL_INVALID_PROGRAM" // -44
, "CL_INVALID_PROGRAM_EXECUTABLE" // -45
, "CL_INVALID_KERNEL_NAME" // -46
, "CL_INVALID_KERNEL_DEFINITION" // -47
, "CL_INVALID_KERNEL" // -48
, "CL_INVALID_ARG_INDEX" // -49
, "CL_INVALID_ARG_VALUE" // -50
, "CL_INVALID_ARG_SIZE" // -51
, "CL_INVALID_KERNEL_ARGS" // -52
, "CL_INVALID_WORK_DIMENSION" // -53
, "CL_INVALID_WORK_GROUP_SIZE" // -54
, "CL_INVALID_WORK_ITEM_SIZE" // -55
, "CL_INVALID_GLOBAL_OFFSET" // -56
, "CL_INVALID_EVENT_WAIT_LIST" // -57
, "CL_INVALID_EVENT" // -58
, "CL_INVALID_OPERATION" // -59
, "CL_INVALID_GL_OBJECT" // -60
, "CL_INVALID_BUFFER_SIZE" // -61
, "CL_INVALID_MIP_LEVEL" // -62
, "CL_INVALID_GLOBAL_WORK_SIZE" // -63
};
return strings[-error];
}
CCL_NAMESPACE_END
#ifdef CLCC_DYNLIB_CLOSE
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,37 +0,0 @@
/*
* Copyright 2011, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Contributor:
* Jeroen Bakker
* Monique Dewanchand
*/
#ifndef OCL_OPENCL_H
#define OCL_OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#include "intern/clew.h"
int OCL_init(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,37 +0,0 @@
/*
* Copyright 2011, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Contributor:
* Jeroen Bakker
* Monique Dewanchand
*/
#include "OCL_opencl.h"
int OCL_init(void)
{
#ifdef _WIN32
const char *path = "OpenCL.dll";
#elif defined(__APPLE__)
const char *path = "/Library/Frameworks/OpenCL.framework/OpenCL";
#else
const char *path = "libOpenCL.so";
#endif
return (clewInit(path) == CLEW_SUCCESS);
}

View File

@ -1,316 +0,0 @@
//////////////////////////////////////////////////////////////////////////
// Copyright (c) 2009 Organic Vectory B.V.
// Written by George van Venrooij
//
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file license.txt)
//////////////////////////////////////////////////////////////////////////
#include "clew.h"
//! \file clew.c
//! \brief OpenCL run-time loader source
#ifndef CLCC_GENERATE_DOCUMENTATION
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h>
typedef HMODULE CLCC_DYNLIB_HANDLE;
#define CLCC_DYNLIB_OPEN LoadLibrary
#define CLCC_DYNLIB_CLOSE FreeLibrary
#define CLCC_DYNLIB_IMPORT GetProcAddress
#else
#include <dlfcn.h>
typedef void* CLCC_DYNLIB_HANDLE;
#define CLCC_DYNLIB_OPEN(path) dlopen(path, RTLD_NOW | RTLD_GLOBAL)
#define CLCC_DYNLIB_CLOSE dlclose
#define CLCC_DYNLIB_IMPORT dlsym
#endif
#else
//typedef implementation_defined CLCC_DYNLIB_HANDLE;
//#define CLCC_DYNLIB_OPEN(path) implementation_defined
//#define CLCC_DYNLIB_CLOSE implementation_defined
//#define CLCC_DYNLIB_IMPORT implementation_defined
#endif
#include <stdlib.h>
//! \brief module handle
static CLCC_DYNLIB_HANDLE module = NULL;
// Variables holding function entry points
#ifndef CLCC_GENERATE_DOCUMENTATION
PFNCLGETPLATFORMIDS __oclGetPlatformIDs = NULL;
PFNCLGETPLATFORMINFO __oclGetPlatformInfo = NULL;
PFNCLGETDEVICEIDS __oclGetDeviceIDs = NULL;
PFNCLGETDEVICEINFO __oclGetDeviceInfo = NULL;
PFNCLCREATECONTEXT __oclCreateContext = NULL;
PFNCLCREATECONTEXTFROMTYPE __oclCreateContextFromType = NULL;
PFNCLRETAINCONTEXT __oclRetainContext = NULL;
PFNCLRELEASECONTEXT __oclReleaseContext = NULL;
PFNCLGETCONTEXTINFO __oclGetContextInfo = NULL;
PFNCLCREATECOMMANDQUEUE __oclCreateCommandQueue = NULL;
PFNCLRETAINCOMMANDQUEUE __oclRetainCommandQueue = NULL;
PFNCLRELEASECOMMANDQUEUE __oclReleaseCommandQueue = NULL;
PFNCLGETCOMMANDQUEUEINFO __oclGetCommandQueueInfo = NULL;
PFNCLSETCOMMANDQUEUEPROPERTY __oclSetCommandQueueProperty = NULL;
PFNCLCREATEBUFFER __oclCreateBuffer = NULL;
PFNCLCREATEIMAGE2D __oclCreateImage2D = NULL;
PFNCLCREATEIMAGE3D __oclCreateImage3D = NULL;
PFNCLRETAINMEMOBJECT __oclRetainMemObject = NULL;
PFNCLRELEASEMEMOBJECT __oclReleaseMemObject = NULL;
PFNCLGETSUPPORTEDIMAGEFORMATS __oclGetSupportedImageFormats = NULL;
PFNCLGETMEMOBJECTINFO __oclGetMemObjectInfo = NULL;
PFNCLGETIMAGEINFO __oclGetImageInfo = NULL;
PFNCLCREATESAMPLER __oclCreateSampler = NULL;
PFNCLRETAINSAMPLER __oclRetainSampler = NULL;
PFNCLRELEASESAMPLER __oclReleaseSampler = NULL;
PFNCLGETSAMPLERINFO __oclGetSamplerInfo = NULL;
PFNCLCREATEPROGRAMWITHSOURCE __oclCreateProgramWithSource = NULL;
PFNCLCREATEPROGRAMWITHBINARY __oclCreateProgramWithBinary = NULL;
PFNCLRETAINPROGRAM __oclRetainProgram = NULL;
PFNCLRELEASEPROGRAM __oclReleaseProgram = NULL;
PFNCLBUILDPROGRAM __oclBuildProgram = NULL;
PFNCLUNLOADCOMPILER __oclUnloadCompiler = NULL;
PFNCLGETPROGRAMINFO __oclGetProgramInfo = NULL;
PFNCLGETPROGRAMBUILDINFO __oclGetProgramBuildInfo = NULL;
PFNCLCREATEKERNEL __oclCreateKernel = NULL;
PFNCLCREATEKERNELSINPROGRAM __oclCreateKernelsInProgram = NULL;
PFNCLRETAINKERNEL __oclRetainKernel = NULL;
PFNCLRELEASEKERNEL __oclReleaseKernel = NULL;
PFNCLSETKERNELARG __oclSetKernelArg = NULL;
PFNCLGETKERNELINFO __oclGetKernelInfo = NULL;
PFNCLGETKERNELWORKGROUPINFO __oclGetKernelWorkGroupInfo = NULL;
PFNCLWAITFOREVENTS __oclWaitForEvents = NULL;
PFNCLGETEVENTINFO __oclGetEventInfo = NULL;
PFNCLRETAINEVENT __oclRetainEvent = NULL;
PFNCLRELEASEEVENT __oclReleaseEvent = NULL;
PFNCLGETEVENTPROFILINGINFO __oclGetEventProfilingInfo = NULL;
PFNCLFLUSH __oclFlush = NULL;
PFNCLFINISH __oclFinish = NULL;
PFNCLENQUEUEREADBUFFER __oclEnqueueReadBuffer = NULL;
PFNCLENQUEUEWRITEBUFFER __oclEnqueueWriteBuffer = NULL;
PFNCLENQUEUECOPYBUFFER __oclEnqueueCopyBuffer = NULL;
PFNCLENQUEUEREADIMAGE __oclEnqueueReadImage = NULL;
PFNCLENQUEUEWRITEIMAGE __oclEnqueueWriteImage = NULL;
PFNCLENQUEUECOPYIMAGE __oclEnqueueCopyImage = NULL;
PFNCLENQUEUECOPYIMAGETOBUFFER __oclEnqueueCopyImageToBuffer = NULL;
PFNCLENQUEUECOPYBUFFERTOIMAGE __oclEnqueueCopyBufferToImage = NULL;
PFNCLENQUEUEMAPBUFFER __oclEnqueueMapBuffer = NULL;
PFNCLENQUEUEMAPIMAGE __oclEnqueueMapImage = NULL;
PFNCLENQUEUEUNMAPMEMOBJECT __oclEnqueueUnmapMemObject = NULL;
PFNCLENQUEUENDRANGEKERNEL __oclEnqueueNDRangeKernel = NULL;
PFNCLENQUEUETASK __oclEnqueueTask = NULL;
PFNCLENQUEUENATIVEKERNEL __oclEnqueueNativeKernel = NULL;
PFNCLENQUEUEMARKER __oclEnqueueMarker = NULL;
PFNCLENQUEUEWAITFOREVENTS __oclEnqueueWaitForEvents = NULL;
PFNCLENQUEUEBARRIER __oclEnqueueBarrier = NULL;
PFNCLGETEXTENSIONFUNCTIONADDRESS __oclGetExtensionFunctionAddress = NULL;
#endif // CLCC_GENERATE_DOCUMENTATION
//! \brief Unloads OpenCL dynamic library, should not be called directly
static void clewExit(void)
{
if (module != NULL)
{
// Ignore errors
CLCC_DYNLIB_CLOSE(module);
module = NULL;
}
}
//! \param path path to dynamic library to load
//! \return CLEW_ERROR_OPEN_FAILED if the library could not be opened
//! CLEW_ERROR_ATEXIT_FAILED if atexit(clewExit) failed
//! CLEW_SUCCESS when the library was succesfully loaded
int clewInit(const char* path)
{
int error = 0;
// Check if already initialized
if (module != NULL)
{
return CLEW_SUCCESS;
}
// Load library
module = CLCC_DYNLIB_OPEN(path);
// Check for errors
if (module == NULL)
{
return CLEW_ERROR_OPEN_FAILED;
}
// Set unloading
error = atexit(clewExit);
if (error)
{
// Failure queing atexit, shutdown with error
CLCC_DYNLIB_CLOSE(module);
module = NULL;
return CLEW_ERROR_ATEXIT_FAILED;
}
// Determine function entry-points
__oclGetPlatformIDs = (PFNCLGETPLATFORMIDS )CLCC_DYNLIB_IMPORT(module, "clGetPlatformIDs");
__oclGetPlatformInfo = (PFNCLGETPLATFORMINFO )CLCC_DYNLIB_IMPORT(module, "clGetPlatformInfo");
__oclGetDeviceIDs = (PFNCLGETDEVICEIDS )CLCC_DYNLIB_IMPORT(module, "clGetDeviceIDs");
__oclGetDeviceInfo = (PFNCLGETDEVICEINFO )CLCC_DYNLIB_IMPORT(module, "clGetDeviceInfo");
__oclCreateContext = (PFNCLCREATECONTEXT )CLCC_DYNLIB_IMPORT(module, "clCreateContext");
__oclCreateContextFromType = (PFNCLCREATECONTEXTFROMTYPE )CLCC_DYNLIB_IMPORT(module, "clCreateContextFromType");
__oclRetainContext = (PFNCLRETAINCONTEXT )CLCC_DYNLIB_IMPORT(module, "clRetainContext");
__oclReleaseContext = (PFNCLRELEASECONTEXT )CLCC_DYNLIB_IMPORT(module, "clReleaseContext");
__oclGetContextInfo = (PFNCLGETCONTEXTINFO )CLCC_DYNLIB_IMPORT(module, "clGetContextInfo");
__oclCreateCommandQueue = (PFNCLCREATECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clCreateCommandQueue");
__oclRetainCommandQueue = (PFNCLRETAINCOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clRetainCommandQueue");
__oclReleaseCommandQueue = (PFNCLRELEASECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clReleaseCommandQueue");
__oclGetCommandQueueInfo = (PFNCLGETCOMMANDQUEUEINFO )CLCC_DYNLIB_IMPORT(module, "clGetCommandQueueInfo");
__oclSetCommandQueueProperty = (PFNCLSETCOMMANDQUEUEPROPERTY )CLCC_DYNLIB_IMPORT(module, "clSetCommandQueueProperty");
__oclCreateBuffer = (PFNCLCREATEBUFFER )CLCC_DYNLIB_IMPORT(module, "clCreateBuffer");
__oclCreateImage2D = (PFNCLCREATEIMAGE2D )CLCC_DYNLIB_IMPORT(module, "clCreateImage2D");
__oclCreateImage3D = (PFNCLCREATEIMAGE3D )CLCC_DYNLIB_IMPORT(module, "clCreateImage3D");
__oclRetainMemObject = (PFNCLRETAINMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clRetainMemObject");
__oclReleaseMemObject = (PFNCLRELEASEMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clReleaseMemObject");
__oclGetSupportedImageFormats = (PFNCLGETSUPPORTEDIMAGEFORMATS )CLCC_DYNLIB_IMPORT(module, "clGetSupportedImageFormats");
__oclGetMemObjectInfo = (PFNCLGETMEMOBJECTINFO )CLCC_DYNLIB_IMPORT(module, "clGetMemObjectInfo");
__oclGetImageInfo = (PFNCLGETIMAGEINFO )CLCC_DYNLIB_IMPORT(module, "clGetImageInfo");
__oclCreateSampler = (PFNCLCREATESAMPLER )CLCC_DYNLIB_IMPORT(module, "clCreateSampler");
__oclRetainSampler = (PFNCLRETAINSAMPLER )CLCC_DYNLIB_IMPORT(module, "clRetainSampler");
__oclReleaseSampler = (PFNCLRELEASESAMPLER )CLCC_DYNLIB_IMPORT(module, "clReleaseSampler");
__oclGetSamplerInfo = (PFNCLGETSAMPLERINFO )CLCC_DYNLIB_IMPORT(module, "clGetSamplerInfo");
__oclCreateProgramWithSource = (PFNCLCREATEPROGRAMWITHSOURCE )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithSource");
__oclCreateProgramWithBinary = (PFNCLCREATEPROGRAMWITHBINARY )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithBinary");
__oclRetainProgram = (PFNCLRETAINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clRetainProgram");
__oclReleaseProgram = (PFNCLRELEASEPROGRAM )CLCC_DYNLIB_IMPORT(module, "clReleaseProgram");
__oclBuildProgram = (PFNCLBUILDPROGRAM )CLCC_DYNLIB_IMPORT(module, "clBuildProgram");
__oclUnloadCompiler = (PFNCLUNLOADCOMPILER )CLCC_DYNLIB_IMPORT(module, "clUnloadCompiler");
__oclGetProgramInfo = (PFNCLGETPROGRAMINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramInfo");
__oclGetProgramBuildInfo = (PFNCLGETPROGRAMBUILDINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramBuildInfo");
__oclCreateKernel = (PFNCLCREATEKERNEL )CLCC_DYNLIB_IMPORT(module, "clCreateKernel");
__oclCreateKernelsInProgram = (PFNCLCREATEKERNELSINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clCreateKernelsInProgram");
__oclRetainKernel = (PFNCLRETAINKERNEL )CLCC_DYNLIB_IMPORT(module, "clRetainKernel");
__oclReleaseKernel = (PFNCLRELEASEKERNEL )CLCC_DYNLIB_IMPORT(module, "clReleaseKernel");
__oclSetKernelArg = (PFNCLSETKERNELARG )CLCC_DYNLIB_IMPORT(module, "clSetKernelArg");
__oclGetKernelInfo = (PFNCLGETKERNELINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelInfo");
__oclGetKernelWorkGroupInfo = (PFNCLGETKERNELWORKGROUPINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelWorkGroupInfo");
__oclWaitForEvents = (PFNCLWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clWaitForEvents");
__oclGetEventInfo = (PFNCLGETEVENTINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventInfo");
__oclRetainEvent = (PFNCLRETAINEVENT )CLCC_DYNLIB_IMPORT(module, "clRetainEvent");
__oclReleaseEvent = (PFNCLRELEASEEVENT )CLCC_DYNLIB_IMPORT(module, "clReleaseEvent");
__oclGetEventProfilingInfo = (PFNCLGETEVENTPROFILINGINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventProfilingInfo");
__oclFlush = (PFNCLFLUSH )CLCC_DYNLIB_IMPORT(module, "clFlush");
__oclFinish = (PFNCLFINISH )CLCC_DYNLIB_IMPORT(module, "clFinish");
__oclEnqueueReadBuffer = (PFNCLENQUEUEREADBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadBuffer");
__oclEnqueueWriteBuffer = (PFNCLENQUEUEWRITEBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteBuffer");
__oclEnqueueCopyBuffer = (PFNCLENQUEUECOPYBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBuffer");
__oclEnqueueReadImage = (PFNCLENQUEUEREADIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadImage");
__oclEnqueueWriteImage = (PFNCLENQUEUEWRITEIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteImage");
__oclEnqueueCopyImage = (PFNCLENQUEUECOPYIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImage");
__oclEnqueueCopyImageToBuffer = (PFNCLENQUEUECOPYIMAGETOBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImageToBuffer");
__oclEnqueueCopyBufferToImage = (PFNCLENQUEUECOPYBUFFERTOIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBufferToImage");
__oclEnqueueMapBuffer = (PFNCLENQUEUEMAPBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapBuffer");
__oclEnqueueMapImage = (PFNCLENQUEUEMAPIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapImage");
__oclEnqueueUnmapMemObject = (PFNCLENQUEUEUNMAPMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clEnqueueUnmapMemObject");
__oclEnqueueNDRangeKernel = (PFNCLENQUEUENDRANGEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNDRangeKernel");
__oclEnqueueTask = (PFNCLENQUEUETASK )CLCC_DYNLIB_IMPORT(module, "clEnqueueTask");
__oclEnqueueNativeKernel = (PFNCLENQUEUENATIVEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNativeKernel");
__oclEnqueueMarker = (PFNCLENQUEUEMARKER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMarker");
__oclEnqueueWaitForEvents = (PFNCLENQUEUEWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clEnqueueWaitForEvents");
__oclEnqueueBarrier = (PFNCLENQUEUEBARRIER )CLCC_DYNLIB_IMPORT(module, "clEnqueueBarrier");
__oclGetExtensionFunctionAddress = (PFNCLGETEXTENSIONFUNCTIONADDRESS )CLCC_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddress");
if(__oclGetPlatformIDs == NULL) return CLEW_ERROR_OPEN_FAILED;
if(__oclGetPlatformInfo == NULL) return CLEW_ERROR_OPEN_FAILED;
if(__oclGetDeviceIDs == NULL) return CLEW_ERROR_OPEN_FAILED;
if(__oclGetDeviceInfo == NULL) return CLEW_ERROR_OPEN_FAILED;
return CLEW_SUCCESS;
}
//! \param error CL error code
//! \return a string representation of the error code
const char* clewErrorString(cl_int error)
{
static const char* strings[] =
{
// Error Codes
"CL_SUCCESS" // 0
, "CL_DEVICE_NOT_FOUND" // -1
, "CL_DEVICE_NOT_AVAILABLE" // -2
, "CL_COMPILER_NOT_AVAILABLE" // -3
, "CL_MEM_OBJECT_ALLOCATION_FAILURE" // -4
, "CL_OUT_OF_RESOURCES" // -5
, "CL_OUT_OF_HOST_MEMORY" // -6
, "CL_PROFILING_INFO_NOT_AVAILABLE" // -7
, "CL_MEM_COPY_OVERLAP" // -8
, "CL_IMAGE_FORMAT_MISMATCH" // -9
, "CL_IMAGE_FORMAT_NOT_SUPPORTED" // -10
, "CL_BUILD_PROGRAM_FAILURE" // -11
, "CL_MAP_FAILURE" // -12
, "" // -13
, "" // -14
, "" // -15
, "" // -16
, "" // -17
, "" // -18
, "" // -19
, "" // -20
, "" // -21
, "" // -22
, "" // -23
, "" // -24
, "" // -25
, "" // -26
, "" // -27
, "" // -28
, "" // -29
, "CL_INVALID_VALUE" // -30
, "CL_INVALID_DEVICE_TYPE" // -31
, "CL_INVALID_PLATFORM" // -32
, "CL_INVALID_DEVICE" // -33
, "CL_INVALID_CONTEXT" // -34
, "CL_INVALID_QUEUE_PROPERTIES" // -35
, "CL_INVALID_COMMAND_QUEUE" // -36
, "CL_INVALID_HOST_PTR" // -37
, "CL_INVALID_MEM_OBJECT" // -38
, "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" // -39
, "CL_INVALID_IMAGE_SIZE" // -40
, "CL_INVALID_SAMPLER" // -41
, "CL_INVALID_BINARY" // -42
, "CL_INVALID_BUILD_OPTIONS" // -43
, "CL_INVALID_PROGRAM" // -44
, "CL_INVALID_PROGRAM_EXECUTABLE" // -45
, "CL_INVALID_KERNEL_NAME" // -46
, "CL_INVALID_KERNEL_DEFINITION" // -47
, "CL_INVALID_KERNEL" // -48
, "CL_INVALID_ARG_INDEX" // -49
, "CL_INVALID_ARG_VALUE" // -50
, "CL_INVALID_ARG_SIZE" // -51
, "CL_INVALID_KERNEL_ARGS" // -52
, "CL_INVALID_WORK_DIMENSION" // -53
, "CL_INVALID_WORK_GROUP_SIZE" // -54
, "CL_INVALID_WORK_ITEM_SIZE" // -55
, "CL_INVALID_GLOBAL_OFFSET" // -56
, "CL_INVALID_EVENT_WAIT_LIST" // -57
, "CL_INVALID_EVENT" // -58
, "CL_INVALID_OPERATION" // -59
, "CL_INVALID_GL_OBJECT" // -60
, "CL_INVALID_BUFFER_SIZE" // -61
, "CL_INVALID_MIP_LEVEL" // -62
, "CL_INVALID_GLOBAL_WORK_SIZE" // -63
};
return strings[-error];
}

File diff suppressed because it is too large Load Diff

View File

@ -39,7 +39,7 @@ set(INC
../nodes/intern
../render/extern/include
../render/intern/include
../../../intern/opencl
../../../extern/clew/include
../../../intern/guardedalloc
)
@ -540,4 +540,6 @@ list(APPEND INC
data_to_c(${CMAKE_CURRENT_SOURCE_DIR}/operations/COM_OpenCLKernels.cl
${CMAKE_CURRENT_BINARY_DIR}/operations/COM_OpenCLKernels.cl.h SRC)
add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS)
blender_add_lib(bf_compositor "${SRC}" "${INC}" "${INC_SYS}")

View File

@ -26,7 +26,7 @@
# ***** END GPL LICENSE BLOCK *****
Import ('env')
defs = ['GLEW_STATIC']
defs = ['GLEW_STATIC', 'CL_USE_DEPRECATED_OPENCL_1_1_APIS']
sources_intern = env.Glob('intern/*.cpp')
sources_nodes = env.Glob('nodes/*.cpp')
@ -37,7 +37,7 @@ incs = [
'intern',
'nodes',
'operations',
'#/intern/opencl',
'#/extern/clew/include',
'../blenkernel',
'../blenlib',
'../imbuf',

View File

@ -38,7 +38,7 @@ extern "C" {
#include "COM_MemoryProxy.h"
#include "COM_SocketReader.h"
#include "OCL_opencl.h"
#include "clew.h"
using std::list;
using std::min;

View File

@ -103,7 +103,7 @@ void OpenCLDevice::COM_clAttachMemoryBufferOffsetToKernelParameter(cl_kernel ker
if (offsetIndex != -1) {
cl_int error;
rcti *rect = memoryBuffer->getRect();
cl_int2 offset = {rect->xmin, rect->ymin};
cl_int2 offset = {{rect->xmin, rect->ymin}};
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
@ -114,7 +114,7 @@ void OpenCLDevice::COM_clAttachSizeToKernelParameter(cl_kernel kernel, int offse
{
if (offsetIndex != -1) {
cl_int error;
cl_int2 offset = {(cl_int)operation->getWidth(), (cl_int)operation->getHeight()};
cl_int2 offset = {{(cl_int)operation->getWidth(), (cl_int)operation->getHeight()}};
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }
@ -154,7 +154,7 @@ void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemo
bool breaked = false;
for (offsety = 0; offsety < height && (!breaked); offsety += localSize) {
offset[1] = offsety;
offset.y = offsety;
if (offsety + localSize < height) {
size[1] = localSize;
}
@ -169,7 +169,7 @@ void OpenCLDevice::COM_clEnqueueRange(cl_kernel kernel, MemoryBuffer *outputMemo
else {
size[0] = width - offsetx;
}
offset[0] = offsetx;
offset.x = offsetx;
error = clSetKernelArg(kernel, offsetIndex, sizeof(cl_int2), &offset);
if (error != CL_SUCCESS) { printf("CLERROR[%d]: %s\n", error, clewErrorString(error)); }

View File

@ -26,7 +26,7 @@ class OpenCLDevice;
#define _COM_OpenCLDevice_h
#include "COM_Device.h"
#include "OCL_opencl.h"
#include "clew.h"
#include "COM_WorkScheduler.h"
#include "COM_ReadBufferOperation.h"

View File

@ -28,7 +28,7 @@
#include "COM_CPUDevice.h"
#include "COM_OpenCLDevice.h"
#include "COM_OpenCLKernels.cl.h"
#include "OCL_opencl.h"
#include "clew.h"
#include "COM_WriteBufferOperation.h"
#include "MEM_guardedalloc.h"
@ -274,7 +274,7 @@ bool WorkScheduler::hasGPUDevices()
#endif
}
static void clContextError(const char *errinfo, const void *private_info, size_t cb, void *user_data)
static void CL_CALLBACK clContextError(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
printf("OPENCL error: %s\n", errinfo);
}
@ -326,7 +326,7 @@ void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads)
g_context = NULL;
g_program = NULL;
if (!OCL_init()) /* this will check for errors and skip if already initialized */
if (clewInit() != CLEW_SUCCESS) /* this will check for errors and skip if already initialized */
return;
if (clCreateContextFromType) {

View File

@ -32,7 +32,7 @@ extern "C" {
#include "COM_compositor.h"
#include "COM_ExecutionSystem.h"
#include "COM_WorkScheduler.h"
#include "OCL_opencl.h"
#include "clew.h"
#include "COM_MovieDistortionOperation.h"
static ThreadMutex s_compositorMutex;

View File

@ -109,8 +109,8 @@ void DirectionalBlurOperation::executeOpenCL(OpenCLDevice *device,
cl_kernel directionalBlurKernel = device->COM_clCreateKernel("directionalBlurKernel", NULL);
cl_int iterations = pow(2.0f, this->m_data->iter);
cl_float2 ltxy = {this->m_tx, this->m_ty};
cl_float2 centerpix = {this->m_center_x_pix, this->m_center_y_pix};
cl_float2 ltxy = {{this->m_tx, this->m_ty}};
cl_float2 centerpix = {{this->m_center_x_pix, this->m_center_y_pix}};
cl_float lsc = this->m_sc;
cl_float lrot = this->m_rot;

View File

@ -228,3 +228,9 @@ if(WITH_PLAYER)
endif()
setup_liblinks(blenderplayer)
# We put CLEW and CUEW here because OPENSUBDIV_LIBRARIES dpeends on them..
if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
target_link_libraries(blenderplayer "extern_clew")
target_link_libraries(blenderplayer "extern_cuew")
endif()