From 0a434dd44d942baf09055b337e2b20e2f76612a3 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Thu, 5 Dec 2019 17:20:46 +0100 Subject: [PATCH 01/12] Added getters and additional CudaHelper-functions --- Utils/cuda_helper.cpp | 86 ++++++++++++++++++++++++++++++++++++++ Utils/cuda_helper.cu | 28 +++++++++++-- Utils/cuda_helper.cuh | 97 +++++++++++++++++++++++++++++++++++++++++++ Utils/cuda_helper.h | 20 +++++++++ include/PointSet.h | 35 ++++++++++++++++ 5 files changed, 262 insertions(+), 4 deletions(-) create mode 100644 Utils/cuda_helper.cpp create mode 100644 Utils/cuda_helper.cuh diff --git a/Utils/cuda_helper.cpp b/Utils/cuda_helper.cpp new file mode 100644 index 0000000..2484fad --- /dev/null +++ b/Utils/cuda_helper.cpp @@ -0,0 +1,86 @@ +#include "cuda_helper.h" +#include + +CUDAException::CUDAException(const char *_const_Message) : std::runtime_error(_const_Message) +{ + +} + +CUDAMallocException::CUDAMallocException(const char *_const_Message) : std::runtime_error(_const_Message) +{ + +} + +CUDAMemCopyException::CUDAMemCopyException(const char *_const_Message) : std::runtime_error(_const_Message) +{ + +} + +void CudaHelper::DeviceSynchronize() +{ + cudaError_t cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) + { + auto temp = cudaGetErrorString(cudaStatus); + throw CUDAException(temp); + } +} + +void CudaHelper::GetThreadBlocks(unsigned int numberOfElements, unsigned int alignment, /*out*/ unsigned int &numberOfThreadBlocks, /*out*/ unsigned int &numberOfThreads) +{ + numberOfThreads = (numberOfElements / alignment) * alignment; + numberOfThreadBlocks = (numberOfElements / alignment); + if (numberOfElements % alignment != 0) + { + numberOfThreads += alignment; + numberOfThreadBlocks++; + } +} + +void CudaHelper::MemcpyHostToDevice(void* host, void* device, size_t size) +{ + cudaError_t cudaStatus = cudaMemcpy(device, host, size, cudaMemcpyHostToDevice); + if (cudaStatus != cudaSuccess) + { + throw CUDAMemCopyException("cudaMemcpy() failed!"); + } +} + +void CudaHelper::MemcpyDeviceToHost(void* device, void* host, size_t size) +{ + cudaError_t cudaStatus = cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost); + if (cudaStatus != cudaSuccess) + { + throw CUDAMemCopyException("cudaMemcpy() failed!"); + } +} + +void CudaHelper::CheckLastError() +{ + cudaError_t cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) + { + auto temp = cudaGetErrorString(cudaStatus); + throw CUDAException(temp); + } +} + +void CudaHelper::CudaMalloc(void** src, size_t size) +{ + cudaError_t cudaStatus = cudaMalloc(src, size); + if (cudaStatus != cudaSuccess) + { + printf("Error in CudaMalloc: %s : ", cudaGetErrorString(cudaStatus)); + throw cudaErrorMemoryAllocation; + } +} + +void CudaHelper::CudaFree(void* src) +{ + cudaError_t cudaStatus = cudaFree(src); + if (cudaStatus != cudaSuccess) + { + printf("Error in CudaFree: %s : ", cudaGetErrorString(cudaStatus)); + throw cudaErrorMemoryAllocation; + } +} \ No newline at end of file diff --git a/Utils/cuda_helper.cu b/Utils/cuda_helper.cu index ec60f2d..bdb9955 100644 --- a/Utils/cuda_helper.cu +++ b/Utils/cuda_helper.cu @@ -1,4 +1,4 @@ -#include "cuda_helper.h" +#include "cuda_helper.cuh" #include CUDAException::CUDAException(const char *_const_Message) : std::runtime_error(_const_Message) @@ -42,7 +42,7 @@ void CudaHelper::MemcpyHostToDevice(void* host, void* device, size_t size) cudaError_t cudaStatus = cudaMemcpy(device, host, size, cudaMemcpyHostToDevice); if (cudaStatus != cudaSuccess) { - throw CUDAMemCopyException("cudaMemcpy() failed!"); + throw CUDAMemCopyException("cudaMemcpy() from host to device failed!"); } } @@ -51,7 +51,7 @@ void CudaHelper::MemcpyDeviceToHost(void* device, void* host, size_t size) cudaError_t cudaStatus = cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost); if (cudaStatus != cudaSuccess) { - throw CUDAMemCopyException("cudaMemcpy() failed!"); + throw CUDAMemCopyException("cudaMemcpy() from device to host failed!"); } } @@ -63,4 +63,24 @@ void CudaHelper::CheckLastError() auto temp = cudaGetErrorString(cudaStatus); throw CUDAException(temp); } -} \ No newline at end of file +} + +void CudaHelper::CudaMalloc(void** src, size_t size) +{ + cudaError_t cudaStatus = cudaMalloc(src, size); + if (cudaStatus != cudaSuccess) + { + printf("Error: %s : ", cudaGetErrorString(cudaStatus)); + throw cudaErrorMemoryAllocation; + } +} + +void CudaHelper::CudaFree(void* src) +{ + cudaError_t cudaStatus = cudaFree(src); + if (cudaStatus != cudaSuccess) + { + printf("Error in CudaFree: %s : ", cudaGetErrorString(cudaStatus)); + throw cudaErrorMemoryAllocation; + } +} diff --git a/Utils/cuda_helper.cuh b/Utils/cuda_helper.cuh new file mode 100644 index 0000000..531292d --- /dev/null +++ b/Utils/cuda_helper.cuh @@ -0,0 +1,97 @@ +#pragma once +#include + +class CUDAException : public std::runtime_error +{ +public: + CUDAException(const char *_const_Message); +}; + +class CUDAMallocException : public std::runtime_error +{ +public: + CUDAMallocException(const char *_const_Message); +}; + +class CUDAMemCopyException : public std::runtime_error +{ +public: + CUDAMemCopyException(const char *const_Message); +}; + +/*static*/ class CudaHelper +{ +public: + /** Synchronizes the device work with the current thread and throws any errors as exception. + */ + static void DeviceSynchronize(); + + + /** Throws the last error as exception. + */ + static void CheckLastError(); + + static void GetThreadBlocks(unsigned int numberOfElements, unsigned int alignment, /*out*/ unsigned int &numberOfThreadBlocks, /*out*/ unsigned int &numberOfThreads); + + + /** Gets a raw pointer from a thrust vector + */ + template + static T* GetPointer(thrust::device_vector &vector) + { + return thrust::raw_pointer_cast(&vector[0]); + } + + /** Gets the size of the device_vector data in bytes. + */ + template + static size_t GetSizeInBytes(const thrust::device_vector &vector) + { + return sizeof(T) * vector.size(); + } + + /** Copies data from host to device. + */ + static void MemcpyHostToDevice(void* host, void* device, size_t size); + + /** Copies data from host to device. + */ + template + static void MemcpyHostToDevice(T* host, T* device, size_t elements) + { + MemcpyHostToDevice((void*)host, (void*)device, elements * sizeof(T)); + } + + /** Copies data from device to host. + */ + static void MemcpyDeviceToHost(void* device, void* host, size_t size); + + /** Copies data from device to host. + */ + template + static void MemcpyDeviceToHost(T* device, T* host, size_t elements) + { + MemcpyDeviceToHost((void*)device, (void*)host, elements * sizeof(T)); + } + + static void CudaMalloc(void** src, size_t size); + + /** Reserve memory for data structures on device. + */ + template + static void CudaMalloc(T** src, size_t elements) + { + CudaMalloc((void**)src, elements * sizeof(T)); + } + + static void CudaFree(void* src); + + /** Reserve memory for data structures on device. + */ + template + static void CudaFree(T* src) + { + CudaFree((void*)src); + } + +}; \ No newline at end of file diff --git a/Utils/cuda_helper.h b/Utils/cuda_helper.h index 4a98572..531292d 100644 --- a/Utils/cuda_helper.h +++ b/Utils/cuda_helper.h @@ -74,4 +74,24 @@ class CUDAMemCopyException : public std::runtime_error MemcpyDeviceToHost((void*)device, (void*)host, elements * sizeof(T)); } + static void CudaMalloc(void** src, size_t size); + + /** Reserve memory for data structures on device. + */ + template + static void CudaMalloc(T** src, size_t elements) + { + CudaMalloc((void**)src, elements * sizeof(T)); + } + + static void CudaFree(void* src); + + /** Reserve memory for data structures on device. + */ + template + static void CudaFree(T* src) + { + CudaFree((void*)src); + } + }; \ No newline at end of file diff --git a/include/PointSet.h b/include/PointSet.h index 9d97a7f..f833420 100644 --- a/include/PointSet.h +++ b/include/PointSet.h @@ -80,6 +80,41 @@ class PointSet return neighborSet.Neighbors[neighborSet.Offsets[i] + k]; } + inline uint n_neighborsets() + { + return neighbors.size(); + } + + inline uint neighbor_count(const uint i) + { + return neighbors[i].NeighborCountAllocationSize; + } + + inline uint particle_count(const uint i) + { + return neighbors[i].ParticleCountAllocationSize; + } + + inline const uint* neighbor_indices(const uint i) + { + return neighbors[i].Neighbors; + } + + inline const uint* neighbor_counts(const uint i) + { + return neighbors[i].Counts; + } + + inline const uint* neighbor_offsets(const uint i) + { + return neighbors[i].Offsets; + } + + PointSetImplementation *getPointSetImplementation() + { + return impl.get(); + } + /** * Fetches pointer to neighbors of point i in the given point set. * @param point_set Point set index of other point set where neighbors have been searched. From 40aaa21761f12f704b4955d0899a11139a9f9918 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Thu, 5 Dec 2019 18:14:02 +0100 Subject: [PATCH 02/12] Minor build changes --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index e0b6f93..2da79b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ set (INCLUDE_HEADERS include/ActivationTable.h include/Common.h include/cuNSearch.h + Utils/cuda_helper.h ) set (HEADER_FILES From 4d398a5dfddb5430044c01307d7da491bf499e7b Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Thu, 5 Dec 2019 18:17:58 +0100 Subject: [PATCH 03/12] Added two getters in PointSetImplementation --- src/PointSetImplementation.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/PointSetImplementation.h b/src/PointSetImplementation.h index c4d0792..c7351a7 100644 --- a/src/PointSetImplementation.h +++ b/src/PointSetImplementation.h @@ -2,7 +2,7 @@ #include "Types.h" #include "GridInfo.h" #include -#include "cuda_helper.h" +#include "../Utils/cuda_helper.cuh" namespace cuNSearch { @@ -31,6 +31,21 @@ namespace cuNSearch void copyToDevice(); + thrust::device_vector &getParticles() + { + return d_Particles; + } + + int getThreadsPerBlock() + { + return ThreadsPerBlock; + } + + uint getNumberOfBlocks() + { + return BlockStartsForParticles; + } + private: friend NeighborhoodSearch; friend cuNSearchDeviceData; @@ -59,4 +74,4 @@ namespace cuNSearch void prepareInternalDataStructures(GridInfo &gridInfo, size_t numberOfCells); }; -}; \ No newline at end of file +}; From 2bcadcaf0d7c6666aa0229365e25eb7e92dd2bd6 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Thu, 5 Dec 2019 18:20:40 +0100 Subject: [PATCH 04/12] Minor change --- src/PointSetImplementation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PointSetImplementation.h b/src/PointSetImplementation.h index c7351a7..23076c0 100644 --- a/src/PointSetImplementation.h +++ b/src/PointSetImplementation.h @@ -2,7 +2,7 @@ #include "Types.h" #include "GridInfo.h" #include -#include "../Utils/cuda_helper.cuh" +#include "cuda_helper.cuh" namespace cuNSearch { From 75f0b21714707c0926b15a7d680c0af3639646e4 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Thu, 5 Dec 2019 18:25:46 +0100 Subject: [PATCH 05/12] Setting a path --- src/PointSetImplementation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PointSetImplementation.h b/src/PointSetImplementation.h index 23076c0..c7351a7 100644 --- a/src/PointSetImplementation.h +++ b/src/PointSetImplementation.h @@ -2,7 +2,7 @@ #include "Types.h" #include "GridInfo.h" #include -#include "cuda_helper.cuh" +#include "../Utils/cuda_helper.cuh" namespace cuNSearch { From 63f52c1dfae68a812168f95d39a4d953c03d55d6 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Mon, 9 Dec 2019 16:07:36 +0100 Subject: [PATCH 06/12] Modified the device data, so that the neighborhood-metadata will remain on the GPU --- include/PointSet.h | 19 ++++++++++++++++++ src/cuNSearchDeviceData.cu | 40 ++++++++++++++++++++++---------------- src/cuNSearchDeviceData.h | 8 ++++---- 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/include/PointSet.h b/include/PointSet.h index f833420..1159afe 100644 --- a/include/PointSet.h +++ b/include/PointSet.h @@ -5,6 +5,7 @@ #include #include #include +#include "../Utils/cuda_helper.cuh" #include "Common.h" @@ -35,6 +36,13 @@ class PointSet uint *Offsets; uint *Neighbors; + //#ifdef GPU_NEIGHBORHOOD_SEARCH + // Same data as in pinned memory. Used to avoid unnecessary data copies in the SPliSHSPlaSH-GPU implementation + uint *d_Counts; + uint *d_Offsets; + uint *d_Neighbors; + //#endif + NeighborSet() { NeighborCountAllocationSize = 0u; @@ -42,6 +50,17 @@ class PointSet Counts = nullptr; Offsets = nullptr; Neighbors = nullptr; + + d_Counts = nullptr; + d_Offsets = nullptr; + d_Neighbors = nullptr; + } + + ~NeighborSet() + { + CudaHelper::CudaFree(d_Counts); + CudaHelper::CudaFree(d_Offsets); + CudaHelper::CudaFree(d_Neighbors); } }; diff --git a/src/cuNSearchDeviceData.cu b/src/cuNSearchDeviceData.cu index 3e6bdc0..0087d9c 100644 --- a/src/cuNSearchDeviceData.cu +++ b/src/cuNSearchDeviceData.cu @@ -19,7 +19,7 @@ #include "PointSetImplementation.h" #include "GridInfo.h" -#include "cuda_helper.h" +#include "cuda_helper.cuh" #include "cuNSearchKernels.cuh" namespace cuNSearch @@ -178,13 +178,17 @@ namespace cuNSearch if (queryPointSet.n_points() == 0) return; + auto &neighborSet = queryPointSet.neighbors[neighborListEntry]; auto queryPointSetImpl = queryPointSet.impl.get(); auto pointSetImpl = pointSet.impl.get(); uint particleCount = static_cast(queryPointSet.n_points()); USE_TIMING(Timing::startTiming("Execute kNeighborCount")); - d_NeighborCounts.resize(particleCount); + + uint* &d_NeighborCounts = neighborSet.d_Counts; + CudaHelper::CudaFree(d_NeighborCounts); + CudaHelper::CudaMalloc(&d_NeighborCounts, particleCount); kComputeCounts << BlockStartsForParticles, queryPointSetImpl->ThreadsPerBlock >> > ( (Real3*)CudaHelper::GetPointer(queryPointSetImpl->d_Particles), @@ -195,7 +199,7 @@ namespace cuNSearch CudaHelper::GetPointer(pointSetImpl->d_CellOffsets), CudaHelper::GetPointer(pointSetImpl->d_CellParticleCounts), - CudaHelper::GetPointer(d_NeighborCounts), + d_NeighborCounts, CudaHelper::GetPointer(pointSetImpl->d_ReversedSortIndices) ); @@ -205,23 +209,28 @@ namespace cuNSearch USE_TIMING(Timing::stopTiming(PRINT_STATS)); USE_TIMING(Timing::startTiming("Execute exclusive_scan over counts")); - d_NeighborWriteOffsets.resize(particleCount); + uint* &d_NeighborWriteOffsets = neighborSet.d_Offsets; + CudaHelper::CudaFree(d_NeighborWriteOffsets); + CudaHelper::CudaMalloc(&d_NeighborWriteOffsets, particleCount); //Prefix sum over neighbor counts thrust::exclusive_scan( - d_NeighborCounts.begin(), - d_NeighborCounts.end(), - d_NeighborWriteOffsets.begin()); + d_NeighborCounts, + d_NeighborCounts + particleCount, + d_NeighborWriteOffsets); CudaHelper::DeviceSynchronize(); //Compute total amount of neighbors uint lastOffset = 0; - CudaHelper::MemcpyDeviceToHost(CudaHelper::GetPointer(d_NeighborWriteOffsets) + particleCount - 1, &lastOffset, 1); + CudaHelper::MemcpyDeviceToHost( d_NeighborWriteOffsets + particleCount - 1, &lastOffset, 1); uint lastParticleNeighborCount = 0; - CudaHelper::MemcpyDeviceToHost(CudaHelper::GetPointer(d_NeighborCounts) + particleCount - 1, &lastParticleNeighborCount, 1); + CudaHelper::MemcpyDeviceToHost( d_NeighborCounts + particleCount - 1, &lastParticleNeighborCount, 1); uint totalNeighborCount = lastOffset + lastParticleNeighborCount; - d_Neighbors.resize(totalNeighborCount); + + uint* &d_Neighbors = neighborSet.d_Neighbors; + CudaHelper::CudaFree(d_Neighbors); + CudaHelper::CudaMalloc(&d_Neighbors, totalNeighborCount); CudaHelper::DeviceSynchronize(); @@ -237,8 +246,7 @@ namespace cuNSearch CudaHelper::GetPointer(pointSetImpl->d_CellOffsets), CudaHelper::GetPointer(pointSetImpl->d_CellParticleCounts), - CudaHelper::GetPointer(d_NeighborWriteOffsets), - CudaHelper::GetPointer(d_Neighbors), + d_NeighborWriteOffsets, d_Neighbors, CudaHelper::GetPointer(pointSetImpl->d_ReversedSortIndices) ); @@ -249,8 +257,6 @@ namespace cuNSearch //Copy data to host USE_TIMING(Timing::startTiming("Neighbor copy from device to host - resize")); - auto &neighborSet = queryPointSet.neighbors[neighborListEntry]; - if (neighborSet.NeighborCountAllocationSize < totalNeighborCount) { if (neighborSet.NeighborCountAllocationSize != 0) @@ -285,9 +291,9 @@ namespace cuNSearch printf("Expected amount: %f MB \n", bytesToCopy / (1024.0f * 1024.0f)); } - CudaHelper::MemcpyDeviceToHost(CudaHelper::GetPointer(d_Neighbors), neighborSet.Neighbors, totalNeighborCount); - CudaHelper::MemcpyDeviceToHost(CudaHelper::GetPointer(d_NeighborCounts), neighborSet.Counts, particleCount); - CudaHelper::MemcpyDeviceToHost(CudaHelper::GetPointer(d_NeighborWriteOffsets), neighborSet.Offsets, particleCount); + CudaHelper::MemcpyDeviceToHost( d_Neighbors, neighborSet.Neighbors, totalNeighborCount); + CudaHelper::MemcpyDeviceToHost( d_NeighborCounts, neighborSet.Counts, particleCount); + CudaHelper::MemcpyDeviceToHost( d_NeighborWriteOffsets, neighborSet.Offsets, particleCount); USE_TIMING(Timing::stopTiming(PRINT_STATS)); } diff --git a/src/cuNSearchDeviceData.h b/src/cuNSearchDeviceData.h index e804971..868e58b 100644 --- a/src/cuNSearchDeviceData.h +++ b/src/cuNSearchDeviceData.h @@ -38,9 +38,9 @@ namespace cuNSearch thrust::device_vector d_MinMax; thrust::device_vector d_TempSortIndices; - //Device neighbor buffers (only temporary used: after the computation the data is copied to the host) - thrust::device_vector d_Neighbors; - thrust::device_vector d_NeighborCounts; - thrust::device_vector d_NeighborWriteOffsets; + // only temporary used. After neighborhood computation pointer is handed over to point set +/* uint* &d_Neighbors; + uint* &d_NeighborCounts; + uint* &d_NeighborWriteOffsets; */ }; }; \ No newline at end of file From 547d981aac951c69ad7703d58cf3378cf2208576 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Mon, 9 Dec 2019 16:10:44 +0100 Subject: [PATCH 07/12] Reframed two paths --- include/PointSet.h | 2 +- src/PointSetImplementation.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/PointSet.h b/include/PointSet.h index 1159afe..f5d3542 100644 --- a/include/PointSet.h +++ b/include/PointSet.h @@ -5,7 +5,7 @@ #include #include #include -#include "../Utils/cuda_helper.cuh" +#include "cuda_helper.cuh" #include "Common.h" diff --git a/src/PointSetImplementation.h b/src/PointSetImplementation.h index c7351a7..23076c0 100644 --- a/src/PointSetImplementation.h +++ b/src/PointSetImplementation.h @@ -2,7 +2,7 @@ #include "Types.h" #include "GridInfo.h" #include -#include "../Utils/cuda_helper.cuh" +#include "cuda_helper.cuh" namespace cuNSearch { From 75f2c4e84e93131bc0035536a2dac81d4e9aaa66 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Mon, 9 Dec 2019 16:26:03 +0100 Subject: [PATCH 08/12] Reframed headers, so SPliSHSPlaSH can work with them --- include/PointSet.h | 2 +- src/PointSetImplementation.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/PointSet.h b/include/PointSet.h index f5d3542..381fbf3 100644 --- a/include/PointSet.h +++ b/include/PointSet.h @@ -5,7 +5,7 @@ #include #include #include -#include "cuda_helper.cuh" +#include "../Utils/cuda_helper.h" #include "Common.h" diff --git a/src/PointSetImplementation.h b/src/PointSetImplementation.h index 23076c0..3797c65 100644 --- a/src/PointSetImplementation.h +++ b/src/PointSetImplementation.h @@ -2,7 +2,7 @@ #include "Types.h" #include "GridInfo.h" #include -#include "cuda_helper.cuh" +#include "../Utils/cuda_helper.h" namespace cuNSearch { From 218d3a6f2cd12e796228042bf63ae6bbde71473f Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Mon, 9 Dec 2019 16:35:16 +0100 Subject: [PATCH 09/12] paths.. --- include/PointSet.h | 2 +- src/PointSetImplementation.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/PointSet.h b/include/PointSet.h index 381fbf3..7b17c7e 100644 --- a/include/PointSet.h +++ b/include/PointSet.h @@ -5,7 +5,7 @@ #include #include #include -#include "../Utils/cuda_helper.h" +#include "cuda_helper.h" #include "Common.h" diff --git a/src/PointSetImplementation.h b/src/PointSetImplementation.h index 3797c65..db07778 100644 --- a/src/PointSetImplementation.h +++ b/src/PointSetImplementation.h @@ -2,7 +2,7 @@ #include "Types.h" #include "GridInfo.h" #include -#include "../Utils/cuda_helper.h" +#include "cuda_helper.h" namespace cuNSearch { From 679368d8b4d6ee8e9f74b8f4f886bcf3e09c9508 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Tue, 10 Dec 2019 12:03:04 +0100 Subject: [PATCH 10/12] Fixed a bug that arises in thrust from static dispatching, see introduction to thrust for details --- src/cuNSearchDeviceData.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cuNSearchDeviceData.cu b/src/cuNSearchDeviceData.cu index 0087d9c..70e4ecb 100644 --- a/src/cuNSearchDeviceData.cu +++ b/src/cuNSearchDeviceData.cu @@ -215,9 +215,9 @@ namespace cuNSearch //Prefix sum over neighbor counts thrust::exclusive_scan( - d_NeighborCounts, - d_NeighborCounts + particleCount, - d_NeighborWriteOffsets); + thrust::device_ptr(d_NeighborCounts), + thrust::device_ptr(d_NeighborCounts) + particleCount, + thrust::device_ptr(d_NeighborWriteOffsets)); CudaHelper::DeviceSynchronize(); From fa290910b6f6185b62bfe2ebd935c61f80a14f67 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Tue, 10 Dec 2019 15:37:48 +0100 Subject: [PATCH 11/12] Updated the getter-functions for the device-data --- include/PointSet.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/include/PointSet.h b/include/PointSet.h index 7b17c7e..924f2c7 100644 --- a/include/PointSet.h +++ b/include/PointSet.h @@ -104,29 +104,19 @@ class PointSet return neighbors.size(); } - inline uint neighbor_count(const uint i) - { - return neighbors[i].NeighborCountAllocationSize; - } - - inline uint particle_count(const uint i) - { - return neighbors[i].ParticleCountAllocationSize; - } - inline const uint* neighbor_indices(const uint i) { - return neighbors[i].Neighbors; + return neighbors[i].d_Neighbors; } inline const uint* neighbor_counts(const uint i) { - return neighbors[i].Counts; + return neighbors[i].d_Counts; } inline const uint* neighbor_offsets(const uint i) { - return neighbors[i].Offsets; + return neighbors[i].d_Offsets; } PointSetImplementation *getPointSetImplementation() From 9cd6d64c03a1b60d6eb99dcaf4fe647f90fe7020 Mon Sep 17 00:00:00 2001 From: Robert Baumgartner Date: Tue, 10 Dec 2019 16:58:25 +0100 Subject: [PATCH 12/12] Minor change to the getter functions --- include/PointSet.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/PointSet.h b/include/PointSet.h index 924f2c7..11a438e 100644 --- a/include/PointSet.h +++ b/include/PointSet.h @@ -104,17 +104,17 @@ class PointSet return neighbors.size(); } - inline const uint* neighbor_indices(const uint i) + inline uint* neighbor_indices(const uint i) { return neighbors[i].d_Neighbors; } - inline const uint* neighbor_counts(const uint i) + inline uint* neighbor_counts(const uint i) { return neighbors[i].d_Counts; } - inline const uint* neighbor_offsets(const uint i) + inline uint* neighbor_offsets(const uint i) { return neighbors[i].d_Offsets; }