Skip to content
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set (INCLUDE_HEADERS
include/ActivationTable.h
include/Common.h
include/cuNSearch.h
Utils/cuda_helper.h
)

set (HEADER_FILES
Expand Down
86 changes: 86 additions & 0 deletions Utils/cuda_helper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#include "cuda_helper.h"
#include <cuda_runtime.h>

CUDAException::CUDAException(const char *_const_Message) : std::runtime_error(_const_Message)
{

}

CUDAMallocException::CUDAMallocException(const char *_const_Message) : std::runtime_error(_const_Message)
{

}

CUDAMemCopyException::CUDAMemCopyException(const char *_const_Message) : std::runtime_error(_const_Message)
{

}

void CudaHelper::DeviceSynchronize()
{
cudaError_t cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess)
{
auto temp = cudaGetErrorString(cudaStatus);
throw CUDAException(temp);
}
}

void CudaHelper::GetThreadBlocks(unsigned int numberOfElements, unsigned int alignment, /*out*/ unsigned int &numberOfThreadBlocks, /*out*/ unsigned int &numberOfThreads)
{
numberOfThreads = (numberOfElements / alignment) * alignment;
numberOfThreadBlocks = (numberOfElements / alignment);
if (numberOfElements % alignment != 0)
{
numberOfThreads += alignment;
numberOfThreadBlocks++;
}
}

void CudaHelper::MemcpyHostToDevice(void* host, void* device, size_t size)
{
cudaError_t cudaStatus = cudaMemcpy(device, host, size, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess)
{
throw CUDAMemCopyException("cudaMemcpy() failed!");
}
}

void CudaHelper::MemcpyDeviceToHost(void* device, void* host, size_t size)
{
cudaError_t cudaStatus = cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
throw CUDAMemCopyException("cudaMemcpy() failed!");
}
}

void CudaHelper::CheckLastError()
{
cudaError_t cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess)
{
auto temp = cudaGetErrorString(cudaStatus);
throw CUDAException(temp);
}
}

void CudaHelper::CudaMalloc(void** src, size_t size)
{
cudaError_t cudaStatus = cudaMalloc(src, size);
if (cudaStatus != cudaSuccess)
{
printf("Error in CudaMalloc: %s : ", cudaGetErrorString(cudaStatus));
throw cudaErrorMemoryAllocation;
}
}

void CudaHelper::CudaFree(void* src)
{
cudaError_t cudaStatus = cudaFree(src);
if (cudaStatus != cudaSuccess)
{
printf("Error in CudaFree: %s : ", cudaGetErrorString(cudaStatus));
throw cudaErrorMemoryAllocation;
}
}
28 changes: 24 additions & 4 deletions Utils/cuda_helper.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "cuda_helper.h"
#include "cuda_helper.cuh"
#include <cuda_runtime.h>

CUDAException::CUDAException(const char *_const_Message) : std::runtime_error(_const_Message)
Expand Down Expand Up @@ -42,7 +42,7 @@ void CudaHelper::MemcpyHostToDevice(void* host, void* device, size_t size)
cudaError_t cudaStatus = cudaMemcpy(device, host, size, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess)
{
throw CUDAMemCopyException("cudaMemcpy() failed!");
throw CUDAMemCopyException("cudaMemcpy() from host to device failed!");
}
}

Expand All @@ -51,7 +51,7 @@ void CudaHelper::MemcpyDeviceToHost(void* device, void* host, size_t size)
cudaError_t cudaStatus = cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
throw CUDAMemCopyException("cudaMemcpy() failed!");
throw CUDAMemCopyException("cudaMemcpy() from device to host failed!");
}
}

Expand All @@ -63,4 +63,24 @@ void CudaHelper::CheckLastError()
auto temp = cudaGetErrorString(cudaStatus);
throw CUDAException(temp);
}
}
}

void CudaHelper::CudaMalloc(void** src, size_t size)
{
cudaError_t cudaStatus = cudaMalloc(src, size);
if (cudaStatus != cudaSuccess)
{
printf("Error: %s : ", cudaGetErrorString(cudaStatus));
throw cudaErrorMemoryAllocation;
}
}

void CudaHelper::CudaFree(void* src)
{
cudaError_t cudaStatus = cudaFree(src);
if (cudaStatus != cudaSuccess)
{
printf("Error in CudaFree: %s : ", cudaGetErrorString(cudaStatus));
throw cudaErrorMemoryAllocation;
}
}
97 changes: 97 additions & 0 deletions Utils/cuda_helper.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#pragma once
#include <thrust/device_vector.h>

class CUDAException : public std::runtime_error
{
public:
CUDAException(const char *_const_Message);
};

class CUDAMallocException : public std::runtime_error
{
public:
CUDAMallocException(const char *_const_Message);
};

class CUDAMemCopyException : public std::runtime_error
{
public:
CUDAMemCopyException(const char *const_Message);
};

/*static*/ class CudaHelper
{
public:
/** Synchronizes the device work with the current thread and throws any errors as exception.
*/
static void DeviceSynchronize();


/** Throws the last error as exception.
*/
static void CheckLastError();

static void GetThreadBlocks(unsigned int numberOfElements, unsigned int alignment, /*out*/ unsigned int &numberOfThreadBlocks, /*out*/ unsigned int &numberOfThreads);


/** Gets a raw pointer from a thrust vector
*/
template<typename T>
static T* GetPointer(thrust::device_vector<T> &vector)
{
return thrust::raw_pointer_cast(&vector[0]);
}

/** Gets the size of the device_vector data in bytes.
*/
template<typename T>
static size_t GetSizeInBytes(const thrust::device_vector<T> &vector)
{
return sizeof(T) * vector.size();
}

/** Copies data from host to device.
*/
static void MemcpyHostToDevice(void* host, void* device, size_t size);

/** Copies data from host to device.
*/
template<typename T>
static void MemcpyHostToDevice(T* host, T* device, size_t elements)
{
MemcpyHostToDevice((void*)host, (void*)device, elements * sizeof(T));
}

/** Copies data from device to host.
*/
static void MemcpyDeviceToHost(void* device, void* host, size_t size);

/** Copies data from device to host.
*/
template<typename T>
static void MemcpyDeviceToHost(T* device, T* host, size_t elements)
{
MemcpyDeviceToHost((void*)device, (void*)host, elements * sizeof(T));
}

static void CudaMalloc(void** src, size_t size);

/** Reserve memory for data structures on device.
*/
template<typename T>
static void CudaMalloc(T** src, size_t elements)
{
CudaMalloc((void**)src, elements * sizeof(T));
}

static void CudaFree(void* src);

/** Reserve memory for data structures on device.
*/
template<typename T>
static void CudaFree(T* src)
{
CudaFree((void*)src);
}

};
20 changes: 20 additions & 0 deletions Utils/cuda_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,24 @@ class CUDAMemCopyException : public std::runtime_error
MemcpyDeviceToHost((void*)device, (void*)host, elements * sizeof(T));
}

static void CudaMalloc(void** src, size_t size);

/** Reserve memory for data structures on device.
*/
template<typename T>
static void CudaMalloc(T** src, size_t elements)
{
CudaMalloc((void**)src, elements * sizeof(T));
}

static void CudaFree(void* src);

/** Reserve memory for data structures on device.
*/
template<typename T>
static void CudaFree(T* src)
{
CudaFree((void*)src);
}

};
44 changes: 44 additions & 0 deletions include/PointSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <iostream>
#include <algorithm>
#include <memory>
#include "cuda_helper.h"

#include "Common.h"

Expand Down Expand Up @@ -35,13 +36,31 @@ class PointSet
uint *Offsets;
uint *Neighbors;

//#ifdef GPU_NEIGHBORHOOD_SEARCH
// Same data as in pinned memory. Used to avoid unnecessary data copies in the SPliSHSPlaSH-GPU implementation
uint *d_Counts;
uint *d_Offsets;
uint *d_Neighbors;
//#endif

NeighborSet()
{
NeighborCountAllocationSize = 0u;
ParticleCountAllocationSize = 0u;
Counts = nullptr;
Offsets = nullptr;
Neighbors = nullptr;

d_Counts = nullptr;
d_Offsets = nullptr;
d_Neighbors = nullptr;
}

~NeighborSet()
{
CudaHelper::CudaFree(d_Counts);
CudaHelper::CudaFree(d_Offsets);
CudaHelper::CudaFree(d_Neighbors);
}
};

Expand Down Expand Up @@ -80,6 +99,31 @@ class PointSet
return neighborSet.Neighbors[neighborSet.Offsets[i] + k];
}

inline uint n_neighborsets()
{
return neighbors.size();
}

inline uint* neighbor_indices(const uint i)
{
return neighbors[i].d_Neighbors;
}

inline uint* neighbor_counts(const uint i)
{
return neighbors[i].d_Counts;
}

inline uint* neighbor_offsets(const uint i)
{
return neighbors[i].d_Offsets;
}

PointSetImplementation *getPointSetImplementation()
{
return impl.get();
}

/**
* Fetches pointer to neighbors of point i in the given point set.
* @param point_set Point set index of other point set where neighbors have been searched.
Expand Down
17 changes: 16 additions & 1 deletion src/PointSetImplementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ namespace cuNSearch

void copyToDevice();

thrust::device_vector<Real3> &getParticles()
{
return d_Particles;
}

int getThreadsPerBlock()
{
return ThreadsPerBlock;
}

uint getNumberOfBlocks()
{
return BlockStartsForParticles;
}

private:
friend NeighborhoodSearch;
friend cuNSearchDeviceData;
Expand Down Expand Up @@ -59,4 +74,4 @@ namespace cuNSearch

void prepareInternalDataStructures(GridInfo &gridInfo, size_t numberOfCells);
};
};
};
Loading