masterxilo masterxilo - 1 year ago 140
C++ Question

Is there a preprocessor macro in CUDA that tells whether we are compiling device code?

I'd like to define a pointer type that is shared between device and host code and internally stores the device and host pointer to the shared memory. I want it to determine at compile time, which pointer to actually return:

#define F inline __host__ __device__

class SharedMemory;
/**
*
* Can only be set by allocating shared memory.
*/
template<typename T>
class SharedMemoryPtr {
public:
SharedMemoryPtr() : hptr(0), dptr(0) {}

//F ~ SharedMemoryPtr() {cudaFreeHost(hptr);} // Should be freed explicitly (?)

// TODO: Don't allow copying/overwriting (at least not without freeing memory...)

F T& operator() () {
#ifdef __CUDACC__
return *dptr;
#else
return *hptr;
#endif
};

F T* operator-> () {
#ifdef __CUDACC__
return dptr;
#else
return hptr;
#endif
};

F T& operator * () {
#ifdef __CUDACC__
return *dptr;
#else
return *hptr;
#endif
};

F T& operator[] (__int64 i) const {
#ifdef __CUDACC__
return *(dptr + i);
#else
return *(hptr + i);
#endif
};

friend SharedMemory;

// TODO: Make obsolete (possible?)

T * getHptr() {return hptr;}
T * getDptr() {return dptr;}

private:

T *hptr, *dptr;
};

class SharedMemory {
public:
template<typename T>
static SharedMemoryPtr<T> allocate(int count = 1) {
assert(count > 0);

SharedMemoryPtr<T> sptr;

cutilSafeCall(
cudaHostAlloc(&sptr.hptr, sizeof(T) * count, cudaHostAllocMapped));
assert(sptr.hptr);
cutilSafeCall(
cudaHostGetDevicePointer(&sptr.dptr, sptr.hptr, 0));
assert(sptr.dptr);

return sptr;
}
};


This works fine as long as I use these pointers in code that is either in a cpp file (where
__CUDACC__
is never defined) or a .h file (where
__CUDACC__
is only defined if the function is used by some function in a cu file). However in a
__host__
function in a .cu file, I get the
devptr
. Obviously, .cu files are processed exclusively by nvcc. Is there some other preprocessor macro that is defined ONLY for
__global__
and
__device__
functions, not just everything nvcc happens to process? Or do I need to separate my code?

Answer Source

__CUDA_ARCH__ is actual only for device code. You can specify device code behavior with it. This macro actually get compute capability of device code (like 200 for 2.0.)

Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download