Blender V2.61 - r43446

util_cuda.h

Go to the documentation of this file.
00001 /*
00002  * Copyright 2011, Blender Foundation.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License
00006  * as published by the Free Software Foundation; either version 2
00007  * of the License, or (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software Foundation,
00016  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 #ifndef __UTIL_CUDA_H__
00020 #define __UTIL_CUDA_H__
00021 
00022 #include <stdlib.h>
00023 #include "util_opengl.h"
00024 #include "util_string.h"
00025 
00026 CCL_NAMESPACE_BEGIN
00027 
00028 /* CUDA is linked in dynamically at runtime, so we can start the application
00029  * without requiring a CUDA installation. Code adapted from the example
00030  * matrixMulDynlinkJIT in the CUDA SDK. */
00031 
00032 bool cuLibraryInit();
00033 string cuCompilerPath();
00034 
00035 CCL_NAMESPACE_END
00036 
00037 /* defines, structs, enums */
00038 
00039 #define CUDA_VERSION 3020
00040 
00041 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
00042 typedef unsigned long long CUdeviceptr;
00043 #else
00044 typedef unsigned int CUdeviceptr;
00045 #endif
00046 
00047 typedef int CUdevice;
00048 typedef struct CUctx_st *CUcontext;
00049 typedef struct CUmod_st *CUmodule;
00050 typedef struct CUfunc_st *CUfunction;
00051 typedef struct CUarray_st *CUarray;
00052 typedef struct CUtexref_st *CUtexref;
00053 typedef struct CUsurfref_st *CUsurfref;
00054 typedef struct CUevent_st *CUevent;
00055 typedef struct CUstream_st *CUstream;
00056 typedef struct CUgraphicsResource_st *CUgraphicsResource;
00057 
00058 typedef struct CUuuid_st {
00059     char bytes[16];
00060 } CUuuid;
00061 
00062 typedef enum CUctx_flags_enum {
00063     CU_CTX_SCHED_AUTO  = 0,
00064     CU_CTX_SCHED_SPIN  = 1,
00065     CU_CTX_SCHED_YIELD = 2,
00066     CU_CTX_SCHED_MASK  = 0x3,
00067     CU_CTX_BLOCKING_SYNC = 4,
00068     CU_CTX_MAP_HOST = 8,
00069     CU_CTX_LMEM_RESIZE_TO_MAX = 16,
00070     CU_CTX_FLAGS_MASK  = 0x1f
00071 } CUctx_flags;
00072 
00073 typedef enum CUevent_flags_enum {
00074     CU_EVENT_DEFAULT        = 0,
00075     CU_EVENT_BLOCKING_SYNC  = 1,
00076     CU_EVENT_DISABLE_TIMING = 2
00077 } CUevent_flags;
00078 
00079 typedef enum CUarray_format_enum {
00080     CU_AD_FORMAT_UNSIGNED_INT8  = 0x01,
00081     CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
00082     CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
00083     CU_AD_FORMAT_SIGNED_INT8    = 0x08,
00084     CU_AD_FORMAT_SIGNED_INT16   = 0x09,
00085     CU_AD_FORMAT_SIGNED_INT32   = 0x0a,
00086     CU_AD_FORMAT_HALF           = 0x10,
00087     CU_AD_FORMAT_FLOAT          = 0x20
00088 } CUarray_format;
00089 
00090 typedef enum CUaddress_mode_enum {
00091     CU_TR_ADDRESS_MODE_WRAP   = 0,
00092     CU_TR_ADDRESS_MODE_CLAMP  = 1,
00093     CU_TR_ADDRESS_MODE_MIRROR = 2,
00094     CU_TR_ADDRESS_MODE_BORDER = 3
00095 } CUaddress_mode;
00096 
00097 typedef enum CUfilter_mode_enum {
00098     CU_TR_FILTER_MODE_POINT  = 0,
00099     CU_TR_FILTER_MODE_LINEAR = 1
00100 } CUfilter_mode;
00101 
00102 typedef enum CUdevice_attribute_enum {
00103     CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
00104     CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
00105     CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
00106     CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
00107     CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
00108     CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
00109     CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
00110     CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
00111     CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
00112     CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
00113     CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
00114     CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
00115     CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
00116     CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
00117     CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
00118     CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
00119     CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
00120     CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
00121     CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
00122     CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
00123     CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
00124     CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
00125     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
00126     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
00127     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
00128     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
00129     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
00130     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
00131     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
00132     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
00133     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
00134     CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
00135     CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
00136     CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
00137     CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
00138     CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
00139     CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
00140 } CUdevice_attribute;
00141 
00142 typedef struct CUdevprop_st {
00143     int maxThreadsPerBlock;
00144     int maxThreadsDim[3];
00145     int maxGridSize[3];
00146     int sharedMemPerBlock;
00147     int totalConstantMemory;
00148     int SIMDWidth;
00149     int memPitch;
00150     int regsPerBlock;
00151     int clockRate;
00152     int textureAlign;
00153 } CUdevprop;
00154 
00155 typedef enum CUfunction_attribute_enum {
00156     CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
00157     CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
00158     CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
00159     CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
00160     CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
00161     CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
00162     CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
00163     CU_FUNC_ATTRIBUTE_MAX
00164 } CUfunction_attribute;
00165 
00166 typedef enum CUfunc_cache_enum {
00167     CU_FUNC_CACHE_PREFER_NONE    = 0x00,
00168     CU_FUNC_CACHE_PREFER_SHARED  = 0x01,
00169     CU_FUNC_CACHE_PREFER_L1      = 0x02
00170 } CUfunc_cache;
00171 
00172 typedef enum CUmemorytype_enum {
00173     CU_MEMORYTYPE_HOST   = 0x01,
00174     CU_MEMORYTYPE_DEVICE = 0x02,
00175     CU_MEMORYTYPE_ARRAY  = 0x03
00176 } CUmemorytype;
00177 
00178 typedef enum CUcomputemode_enum {
00179     CU_COMPUTEMODE_DEFAULT    = 0,
00180     CU_COMPUTEMODE_EXCLUSIVE  = 1,
00181     CU_COMPUTEMODE_PROHIBITED = 2
00182 } CUcomputemode;
00183 
00184 typedef enum CUjit_option_enum
00185 {
00186     CU_JIT_MAX_REGISTERS = 0,
00187     CU_JIT_THREADS_PER_BLOCK,
00188     CU_JIT_WALL_TIME,
00189     CU_JIT_INFO_LOG_BUFFER,
00190     CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
00191     CU_JIT_ERROR_LOG_BUFFER,
00192     CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
00193     CU_JIT_OPTIMIZATION_LEVEL,
00194     CU_JIT_TARGET_FROM_CUCONTEXT,
00195     CU_JIT_TARGET,
00196     CU_JIT_FALLBACK_STRATEGY
00197 
00198 } CUjit_option;
00199 
00200 typedef enum CUjit_target_enum
00201 {
00202     CU_TARGET_COMPUTE_10 = 0,
00203     CU_TARGET_COMPUTE_11,
00204     CU_TARGET_COMPUTE_12,
00205     CU_TARGET_COMPUTE_13,
00206     CU_TARGET_COMPUTE_20,
00207     CU_TARGET_COMPUTE_21
00208 } CUjit_target;
00209 
00210 typedef enum CUjit_fallback_enum
00211 {
00212     CU_PREFER_PTX = 0,
00213     CU_PREFER_BINARY
00214 
00215 } CUjit_fallback;
00216 
00217 typedef enum CUgraphicsRegisterFlags_enum {
00218     CU_GRAPHICS_REGISTER_FLAGS_NONE  = 0x00
00219 } CUgraphicsRegisterFlags;
00220 
00221 typedef enum CUgraphicsMapResourceFlags_enum {
00222     CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE          = 0x00,
00223     CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY     = 0x01,
00224     CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
00225 } CUgraphicsMapResourceFlags;
00226 
00227 typedef enum CUarray_cubemap_face_enum {
00228     CU_CUBEMAP_FACE_POSITIVE_X  = 0x00,
00229     CU_CUBEMAP_FACE_NEGATIVE_X  = 0x01,
00230     CU_CUBEMAP_FACE_POSITIVE_Y  = 0x02,
00231     CU_CUBEMAP_FACE_NEGATIVE_Y  = 0x03,
00232     CU_CUBEMAP_FACE_POSITIVE_Z  = 0x04,
00233     CU_CUBEMAP_FACE_NEGATIVE_Z  = 0x05
00234 } CUarray_cubemap_face;
00235 
00236 typedef enum CUlimit_enum {
00237     CU_LIMIT_STACK_SIZE        = 0x00,
00238     CU_LIMIT_PRINTF_FIFO_SIZE  = 0x01,
00239     CU_LIMIT_MALLOC_HEAP_SIZE  = 0x02
00240 } CUlimit;
00241 
00242 typedef enum cudaError_enum {
00243     CUDA_SUCCESS                              = 0,
00244     CUDA_ERROR_INVALID_VALUE                  = 1,
00245     CUDA_ERROR_OUT_OF_MEMORY                  = 2,
00246     CUDA_ERROR_NOT_INITIALIZED                = 3,
00247     CUDA_ERROR_DEINITIALIZED                  = 4,
00248     CUDA_ERROR_NO_DEVICE                      = 100,
00249     CUDA_ERROR_INVALID_DEVICE                 = 101,
00250     CUDA_ERROR_INVALID_IMAGE                  = 200,
00251     CUDA_ERROR_INVALID_CONTEXT                = 201,
00252     CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
00253     CUDA_ERROR_MAP_FAILED                     = 205,
00254     CUDA_ERROR_UNMAP_FAILED                   = 206,
00255     CUDA_ERROR_ARRAY_IS_MAPPED                = 207,
00256     CUDA_ERROR_ALREADY_MAPPED                 = 208,
00257     CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,
00258     CUDA_ERROR_ALREADY_ACQUIRED               = 210,
00259     CUDA_ERROR_NOT_MAPPED                     = 211,
00260     CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
00261     CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,
00262     CUDA_ERROR_ECC_UNCORRECTABLE              = 214,
00263     CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,
00264     CUDA_ERROR_INVALID_SOURCE                 = 300,
00265     CUDA_ERROR_FILE_NOT_FOUND                 = 301,
00266     CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
00267     CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
00268     CUDA_ERROR_OPERATING_SYSTEM               = 304,
00269     CUDA_ERROR_INVALID_HANDLE                 = 400,
00270     CUDA_ERROR_NOT_FOUND                      = 500,
00271     CUDA_ERROR_NOT_READY                      = 600,
00272     CUDA_ERROR_LAUNCH_FAILED                  = 700,
00273     CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
00274     CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,
00275     CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
00276     CUDA_ERROR_UNKNOWN                        = 999
00277 } CUresult;
00278 
00279 #define CU_MEMHOSTALLOC_PORTABLE        0x01
00280 #define CU_MEMHOSTALLOC_DEVICEMAP       0x02
00281 #define CU_MEMHOSTALLOC_WRITECOMBINED   0x04
00282 
00283 typedef struct CUDA_MEMCPY2D_st {
00284     size_t srcXInBytes;
00285     size_t srcY;
00286 
00287     CUmemorytype srcMemoryType;
00288     const void *srcHost;
00289     CUdeviceptr srcDevice;
00290     CUarray srcArray;
00291     size_t srcPitch;
00292 
00293     size_t dstXInBytes;
00294     size_t dstY;
00295 
00296     CUmemorytype dstMemoryType;
00297     void *dstHost;
00298     CUdeviceptr dstDevice;
00299     CUarray dstArray;
00300     size_t dstPitch;
00301 
00302     size_t WidthInBytes;
00303     size_t Height;
00304 } CUDA_MEMCPY2D;
00305 
00306 typedef struct CUDA_MEMCPY3D_st {
00307     size_t srcXInBytes;
00308     size_t srcY;
00309     size_t srcZ;
00310     size_t srcLOD;
00311     CUmemorytype srcMemoryType;
00312     const void *srcHost;
00313     CUdeviceptr srcDevice;
00314     CUarray srcArray;
00315     void *reserved0;
00316     size_t srcPitch;
00317     size_t srcHeight;
00318 
00319     size_t dstXInBytes;
00320     size_t dstY;
00321     size_t dstZ;
00322     size_t dstLOD;
00323     CUmemorytype dstMemoryType;
00324     void *dstHost;
00325     CUdeviceptr dstDevice;
00326     CUarray dstArray;
00327     void *reserved1;
00328     size_t dstPitch;
00329     size_t dstHeight;
00330 
00331     size_t WidthInBytes;
00332     size_t Height;
00333     size_t Depth;
00334 } CUDA_MEMCPY3D;
00335 
00336 typedef struct CUDA_ARRAY_DESCRIPTOR_st
00337 {
00338     size_t Width;
00339     size_t Height;
00340 
00341     CUarray_format Format;
00342     unsigned int NumChannels;
00343 } CUDA_ARRAY_DESCRIPTOR;
00344 
00345 typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
00346 {
00347     size_t Width;
00348     size_t Height;
00349     size_t Depth;
00350 
00351     CUarray_format Format;
00352     unsigned int NumChannels;
00353     unsigned int Flags;
00354 } CUDA_ARRAY3D_DESCRIPTOR;
00355 
00356 #define CUDA_ARRAY3D_2DARRAY        0x01
00357 #define CUDA_ARRAY3D_SURFACE_LDST   0x02
00358 #define CU_TRSA_OVERRIDE_FORMAT 0x01
00359 #define CU_TRSF_READ_AS_INTEGER         0x01
00360 #define CU_TRSF_NORMALIZED_COORDINATES  0x02
00361 #define CU_TRSF_SRGB  0x10
00362 #define CU_PARAM_TR_DEFAULT -1
00363 
00364 #ifdef _WIN32
00365 #define CUDAAPI __stdcall
00366 #else
00367 #define CUDAAPI
00368 #endif
00369 
00370 /* function types */
00371 
00372 typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
00373 typedef CUresult CUDAAPI tcuDriverGetVersion(int *driverVersion);
00374 typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
00375 typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
00376 typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
00377 typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
00378 typedef CUresult CUDAAPI tcuDeviceTotalMem(size_t *bytes, CUdevice dev);
00379 typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
00380 typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
00381 typedef CUresult CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev);
00382 typedef CUresult CUDAAPI tcuCtxDestroy(CUcontext ctx);
00383 typedef CUresult CUDAAPI tcuCtxAttach(CUcontext *pctx, unsigned int flags);
00384 typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
00385 typedef CUresult CUDAAPI tcuCtxPushCurrent(CUcontext ctx );
00386 typedef CUresult CUDAAPI tcuCtxPopCurrent(CUcontext *pctx);
00387 typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device);
00388 typedef CUresult CUDAAPI tcuCtxSynchronize(void);
00389 typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
00390 typedef CUresult CUDAAPI tcuCtxGetLimit(size_t *pvalue, CUlimit limit);
00391 typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache *pconfig);
00392 typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
00393 typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int *version);
00394 typedef CUresult CUDAAPI tcuModuleLoad(CUmodule *module, const char *fname);
00395 typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule *module, const void *image);
00396 typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
00397 typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule *module, const void *fatCubin);
00398 typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
00399 typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
00400 typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
00401 typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name);
00402 typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name);
00403 typedef CUresult CUDAAPI tcuMemGetInfo(size_t *free, size_t *total);
00404 typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, size_t bytesize);
00405 typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
00406 typedef CUresult CUDAAPI tcuMemFree(CUdeviceptr dptr);
00407 typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr);
00408 typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, size_t bytesize);
00409 typedef CUresult CUDAAPI tcuMemFreeHost(void *p);
00410 typedef CUresult CUDAAPI tcuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags);
00411 typedef CUresult CUDAAPI tcuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags);
00412 typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int *pFlags, void *p);
00413 typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
00414 typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
00415 typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
00416 typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
00417 typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
00418 typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount);
00419 typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
00420 typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
00421 typedef CUresult CUDAAPI tcuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
00422 typedef CUresult CUDAAPI tcuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
00423 typedef CUresult CUDAAPI tcuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
00424 typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
00425 typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
00426 typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
00427 typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream);
00428 typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
00429 typedef CUresult CUDAAPI tcuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream);
00430 typedef CUresult CUDAAPI tcuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream);
00431 typedef CUresult CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N);
00432 typedef CUresult CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N);
00433 typedef CUresult CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N);
00434 typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
00435 typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
00436 typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
00437 typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
00438 typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
00439 typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
00440 typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
00441 typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
00442 typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
00443 typedef CUresult CUDAAPI tcuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
00444 typedef CUresult CUDAAPI tcuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
00445 typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
00446 typedef CUresult CUDAAPI tcuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
00447 typedef CUresult CUDAAPI tcuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
00448 typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int Flags);
00449 typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
00450 typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
00451 typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
00452 typedef CUresult CUDAAPI tcuStreamDestroy(CUstream hStream);
00453 typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int Flags);
00454 typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
00455 typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
00456 typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
00457 typedef CUresult CUDAAPI tcuEventDestroy(CUevent hEvent);
00458 typedef CUresult CUDAAPI tcuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
00459 typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
00460 typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
00461 typedef CUresult CUDAAPI tcuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc);
00462 typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
00463 typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
00464 typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
00465 typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
00466 typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes);
00467 typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
00468 typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
00469 typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
00470 typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
00471 typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
00472 typedef CUresult CUDAAPI tcuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
00473 typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
00474 typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
00475 typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
00476 typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
00477 typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
00478 typedef CUresult CUDAAPI tcuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
00479 typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);
00480 typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim);
00481 typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
00482 typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef);
00483 typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);
00484 typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref *pTexRef);
00485 typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
00486 typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
00487 typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
00488 typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
00489 typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
00490 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource);
00491 typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags);
00492 typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
00493 typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
00494 typedef CUresult CUDAAPI tcuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId);
00495 typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device );
00496 typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
00497 typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
00498 typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
00499 
00500 /* function declarations */
00501 
00502 extern tcuInit *cuInit;
00503 extern tcuDriverGetVersion *cuDriverGetVersion;
00504 extern tcuDeviceGet *cuDeviceGet;
00505 extern tcuDeviceGetCount *cuDeviceGetCount;
00506 extern tcuDeviceGetName *cuDeviceGetName;
00507 extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
00508 extern tcuDeviceTotalMem *cuDeviceTotalMem;
00509 extern tcuDeviceGetProperties *cuDeviceGetProperties;
00510 extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
00511 extern tcuCtxCreate *cuCtxCreate;
00512 extern tcuCtxDestroy *cuCtxDestroy;
00513 extern tcuCtxAttach *cuCtxAttach;
00514 extern tcuCtxDetach *cuCtxDetach;
00515 extern tcuCtxPushCurrent *cuCtxPushCurrent;
00516 extern tcuCtxPopCurrent *cuCtxPopCurrent;
00517 extern tcuCtxGetDevice *cuCtxGetDevice;
00518 extern tcuCtxSynchronize *cuCtxSynchronize;
00519 extern tcuModuleLoad *cuModuleLoad;
00520 extern tcuModuleLoadData *cuModuleLoadData;
00521 extern tcuModuleLoadDataEx *cuModuleLoadDataEx;
00522 extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
00523 extern tcuModuleUnload *cuModuleUnload;
00524 extern tcuModuleGetFunction *cuModuleGetFunction;
00525 extern tcuModuleGetGlobal *cuModuleGetGlobal;
00526 extern tcuModuleGetTexRef *cuModuleGetTexRef;
00527 extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
00528 extern tcuMemGetInfo *cuMemGetInfo;
00529 extern tcuMemAlloc *cuMemAlloc;
00530 extern tcuMemAllocPitch *cuMemAllocPitch;
00531 extern tcuMemFree *cuMemFree;
00532 extern tcuMemGetAddressRange *cuMemGetAddressRange;
00533 extern tcuMemAllocHost *cuMemAllocHost;
00534 extern tcuMemFreeHost *cuMemFreeHost;
00535 extern tcuMemHostAlloc *cuMemHostAlloc;
00536 extern tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
00537 extern tcuMemHostGetFlags *cuMemHostGetFlags;
00538 extern tcuMemcpyHtoD *cuMemcpyHtoD;
00539 extern tcuMemcpyDtoH *cuMemcpyDtoH;
00540 extern tcuMemcpyDtoD *cuMemcpyDtoD;
00541 extern tcuMemcpyDtoA *cuMemcpyDtoA;
00542 extern tcuMemcpyAtoD *cuMemcpyAtoD;
00543 extern tcuMemcpyHtoA *cuMemcpyHtoA;
00544 extern tcuMemcpyAtoH *cuMemcpyAtoH;
00545 extern tcuMemcpyAtoA *cuMemcpyAtoA;
00546 extern tcuMemcpy2D *cuMemcpy2D;
00547 extern tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
00548 extern tcuMemcpy3D *cuMemcpy3D;
00549 extern tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
00550 extern tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
00551 extern tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
00552 extern tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
00553 extern tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
00554 extern tcuMemcpy2DAsync *cuMemcpy2DAsync;
00555 extern tcuMemcpy3DAsync *cuMemcpy3DAsync;
00556 extern tcuMemsetD8 *cuMemsetD8;
00557 extern tcuMemsetD16 *cuMemsetD16;
00558 extern tcuMemsetD32 *cuMemsetD32;
00559 extern tcuMemsetD2D8 *cuMemsetD2D8;
00560 extern tcuMemsetD2D16 *cuMemsetD2D16;
00561 extern tcuMemsetD2D32 *cuMemsetD2D32;
00562 extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
00563 extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
00564 extern tcuFuncGetAttribute *cuFuncGetAttribute;
00565 extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
00566 extern tcuArrayCreate *cuArrayCreate;
00567 extern tcuArrayGetDescriptor *cuArrayGetDescriptor;
00568 extern tcuArrayDestroy *cuArrayDestroy;
00569 extern tcuArray3DCreate *cuArray3DCreate;
00570 extern tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
00571 extern tcuTexRefCreate *cuTexRefCreate;
00572 extern tcuTexRefDestroy *cuTexRefDestroy;
00573 extern tcuTexRefSetArray *cuTexRefSetArray;
00574 extern tcuTexRefSetAddress *cuTexRefSetAddress;
00575 extern tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
00576 extern tcuTexRefSetFormat *cuTexRefSetFormat;
00577 extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
00578 extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
00579 extern tcuTexRefSetFlags *cuTexRefSetFlags;
00580 extern tcuTexRefGetAddress *cuTexRefGetAddress;
00581 extern tcuTexRefGetArray *cuTexRefGetArray;
00582 extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
00583 extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
00584 extern tcuTexRefGetFormat *cuTexRefGetFormat;
00585 extern tcuTexRefGetFlags *cuTexRefGetFlags;
00586 extern tcuSurfRefSetArray *cuSurfRefSetArray;
00587 extern tcuSurfRefGetArray *cuSurfRefGetArray;
00588 extern tcuParamSetSize *cuParamSetSize;
00589 extern tcuParamSeti *cuParamSeti;
00590 extern tcuParamSetf *cuParamSetf;
00591 extern tcuParamSetv *cuParamSetv;
00592 extern tcuParamSetTexRef *cuParamSetTexRef;
00593 extern tcuLaunch *cuLaunch;
00594 extern tcuLaunchGrid *cuLaunchGrid;
00595 extern tcuLaunchGridAsync *cuLaunchGridAsync;
00596 extern tcuEventCreate *cuEventCreate;
00597 extern tcuEventRecord *cuEventRecord;
00598 extern tcuEventQuery *cuEventQuery;
00599 extern tcuEventSynchronize *cuEventSynchronize;
00600 extern tcuEventDestroy *cuEventDestroy;
00601 extern tcuEventElapsedTime *cuEventElapsedTime;
00602 extern tcuStreamCreate *cuStreamCreate;
00603 extern tcuStreamQuery *cuStreamQuery;
00604 extern tcuStreamSynchronize *cuStreamSynchronize;
00605 extern tcuStreamDestroy *cuStreamDestroy;
00606 extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
00607 extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
00608 extern tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
00609 extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
00610 extern tcuGraphicsMapResources *cuGraphicsMapResources;
00611 extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
00612 extern tcuGetExportTable *cuGetExportTable;
00613 extern tcuCtxSetLimit *cuCtxSetLimit;
00614 extern tcuCtxGetLimit *cuCtxGetLimit;
00615 extern tcuGLCtxCreate *cuGLCtxCreate;
00616 extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
00617 extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
00618 extern tcuCtxSetCurrent *cuCtxSetCurrent;
00619 
00620 #endif /* __UTIL_CUDA_H__ */
00621