16 #ifndef NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED 17 #define NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED 21 #include <cuda_runtime_api.h> 25 #define cudaCheck(ans) \ 27 gpuAssert((ans), __FILE__, __LINE__); \ 30 #define checkPtr(ptr, msg) \ 32 ptrAssert((ptr), (msg), __FILE__, __LINE__); \ 47 uint8_t *mCpuData, *mGpuData;
49 static inline bool gpuAssert(cudaError_t code,
const char* file,
int line,
bool abort =
true)
51 #if defined(DEBUG) || defined(_DEBUG) 52 if (code != cudaSuccess) {
53 fprintf(stderr,
"CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line);
62 #if defined(DEBUG) || defined(_DEBUG) 63 static inline void ptrAssert(
void* ptr,
const char* msg,
const char* file,
int line,
bool abort =
true)
66 fprintf(stderr,
"NULL pointer error: %s %s %d\n", msg, file, line);
72 static inline void ptrAssert(
void*,
const char*,
const char*,
int,
bool =
true)
90 , mCpuData(other.mCpuData)
91 , mGpuData(other.mGpuData)
94 other.mCpuData =
nullptr;
95 other.mGpuData =
nullptr;
104 mCpuData = other.mCpuData;
105 mGpuData = other.mGpuData;
107 other.mCpuData =
nullptr;
108 other.mGpuData =
nullptr;
119 uint8_t*
data()
const {
return mCpuData; }
125 void deviceUpload(
void* stream = 0,
bool sync =
true)
const;
131 uint64_t
size()
const {
return mSize; }
134 bool empty()
const {
return mSize == 0; }
146 static const bool hasDeviceDual =
true;
165 cudaCheck(cudaMallocHost((
void**)&mCpuData, size));
166 checkPtr(mCpuData,
"failed to allocate host data");
171 checkPtr(mCpuData,
"uninitialized cpu data");
172 if (mGpuData ==
nullptr)
173 cudaCheck(cudaMalloc((
void**)&mGpuData, mSize));
174 checkPtr(mGpuData,
"uninitialized gpu data");
175 cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream)));
177 cudaCheck(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
182 checkPtr(mCpuData,
"uninitialized cpu data");
183 checkPtr(mGpuData,
"uninitialized gpu data");
184 cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream)));
186 cudaCheck(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
195 mCpuData = mGpuData =
nullptr;
201 #endif // end of NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED uint8_t * deviceData() const
Definition: CudaDeviceBuffer.h:120
uint64_t size() const
Returns the size in bytes of the raw memory buffer managed by this allocator.
Definition: CudaDeviceBuffer.h:131
Definition: HostBuffer.h:94
HostBuffer - a buffer that contains a shared or private bump pool to either externally or internally ...
void init(uint64_t size)
Definition: CudaDeviceBuffer.h:156
static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer *context=nullptr)
Definition: CudaDeviceBuffer.h:151
void deviceDownload(void *stream=0, bool sync=true) const
Copy grid from the GPU/device to the CPU/host. If sync is false the memory copy is asynchronous! ...
Definition: CudaDeviceBuffer.h:180
#define checkPtr(ptr, msg)
Definition: CudaDeviceBuffer.h:30
Definition: NanoVDB.h:184
CudaDeviceBuffer & operator=(const CudaDeviceBuffer &)=delete
Disallow copy assignment operation.
CudaDeviceBuffer(uint64_t size=0)
Definition: CudaDeviceBuffer.h:78
CudaDeviceBuffer(CudaDeviceBuffer &&other) noexcept
Move copy-constructor.
Definition: CudaDeviceBuffer.h:88
~CudaDeviceBuffer()
Destructor frees memory on both the host and device.
Definition: CudaDeviceBuffer.h:112
Simple memory buffer using un-managed pinned host memory when compiled with NVCC. Obviously this clas...
Definition: CudaDeviceBuffer.h:44
bool empty() const
Returns true if this allocator is empty, i.e. has no allocated memory.
Definition: CudaDeviceBuffer.h:134
uint8_t * data() const
Definition: CudaDeviceBuffer.h:119
CudaDeviceBuffer & operator=(CudaDeviceBuffer &&other) noexcept
Move copy assignment operation.
Definition: CudaDeviceBuffer.h:100
#define cudaCheck(ans)
Definition: CudaDeviceBuffer.h:25
void deviceUpload(void *stream=0, bool sync=true) const
Copy grid from the CPU/host to the GPU/device. If sync is false the memory copy is asynchronous! ...
Definition: CudaDeviceBuffer.h:169
void clear()
De-allocate all memory managed by this allocator and set all pointer to NULL.
Definition: CudaDeviceBuffer.h:189