Skip to content

Commit 0dd02e8

Browse files
author
kevyuu
committed
Refactor Radix Sorter and use histogram as skip list
1 parent f4b8ac6 commit 0dd02e8

File tree

4 files changed

+123
-115
lines changed

4 files changed

+123
-115
lines changed

examples_tests

include/nbl/core/algorithm/radix_sort.h

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,23 +38,26 @@ constexpr int8_t find_msb(const T& a_variable)
3838
{
3939
static_assert(std::is_unsigned<T>::value, "Variable must be unsigned");
4040

41-
constexpr uint8_t number_of_bits = std::numeric_limits<T>::digits;
41+
constexpr int8_t number_of_bits = std::numeric_limits<T>::digits;
4242
const std::bitset<number_of_bits> variable_bitset{a_variable};
4343

44-
for (uint8_t msb = number_of_bits - 1; msb >= 0; msb--)
44+
for (int8_t msb = number_of_bits - 1; msb >= 0; msb--)
4545
{
4646
if (variable_bitset[msb] == 1)
4747
return msb;
4848
}
4949
return -1;
5050
}
5151

52+
53+
}
54+
5255
template<size_t key_bit_count, typename histogram_t>
53-
struct RadixSorter
56+
struct LSBSorter
5457
{
5558
_NBL_STATIC_INLINE_CONSTEXPR uint16_t histogram_bytesize = 8192u;
5659
_NBL_STATIC_INLINE_CONSTEXPR size_t histogram_size = size_t(histogram_bytesize)/sizeof(histogram_t);
57-
_NBL_STATIC_INLINE_CONSTEXPR uint8_t radix_bits = find_msb(histogram_size);
60+
_NBL_STATIC_INLINE_CONSTEXPR uint8_t radix_bits = impl::find_msb(histogram_size);
5861
_NBL_STATIC_INLINE_CONSTEXPR size_t last_pass = (key_bit_count-1ull)/size_t(radix_bits);
5962
_NBL_STATIC_INLINE_CONSTEXPR uint16_t radix_mask = (1u<<radix_bits)-1u;
6063

@@ -63,6 +66,14 @@ struct RadixSorter
6366
{
6467
return pass<RandomIt,KeyAccessor,0ull>(input,output,rangeSize,comp);
6568
}
69+
70+
std::pair<histogram_t, histogram_t> getHashBound(size_t key) const
71+
{
72+
constexpr histogram_t shift = static_cast<histogram_t>(radix_bits * last_pass);
73+
const auto histogramIx = (key >> shift) & radix_mask;
74+
return { histogram[histogramIx], histogram[histogramIx + 1] };
75+
}
76+
6677
private:
6778
template<class RandomIt, class KeyAccessor, size_t pass_ix>
6879
inline RandomIt pass(RandomIt input, RandomIt output, const histogram_t rangeSize, const KeyAccessor& comp)
@@ -91,19 +102,17 @@ struct RadixSorter
91102
alignas(sizeof(histogram_t)) histogram_t histogram[histogram_size];
92103
};
93104

94-
}
95-
96105
template<class RandomIt, class KeyAccessor>
97106
inline RandomIt radix_sort(RandomIt input, RandomIt scratch, const size_t rangeSize, const KeyAccessor& comp)
98107
{
99108
assert(std::abs(std::distance(input,scratch))>=rangeSize);
100109

101110
if (rangeSize<static_cast<decltype(rangeSize)>(0x1ull<<16ull))
102-
return impl::RadixSorter<KeyAccessor::key_bit_count,uint16_t>()(input,scratch,static_cast<uint16_t>(rangeSize),comp);
111+
return LSBSorter<KeyAccessor::key_bit_count,uint16_t>()(input,scratch,static_cast<uint16_t>(rangeSize),comp);
103112
if (rangeSize<static_cast<decltype(rangeSize)>(0x1ull<<32ull))
104-
return impl::RadixSorter<KeyAccessor::key_bit_count,uint32_t>()(input,scratch,static_cast<uint32_t>(rangeSize),comp);
113+
return LSBSorter<KeyAccessor::key_bit_count,uint32_t>()(input,scratch,static_cast<uint32_t>(rangeSize),comp);
105114
else
106-
return impl::RadixSorter<KeyAccessor::key_bit_count,size_t>()(input,scratch,rangeSize,comp);
115+
return LSBSorter<KeyAccessor::key_bit_count,size_t>()(input,scratch,rangeSize,comp);
107116
}
108117

109118
//! Because Radix Sort needs O(2n) space and a number of passes dependant on the key length, the final sorted range can be either in `input` or `scratch`

src/nbl/asset/utils/CSmoothNormalGenerator.cpp

Lines changed: 74 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,14 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CSmoothNormalGenerator::calculateNor
8484
return smoothPolygon;
8585
}
8686

87-
CSmoothNormalGenerator::VertexHashMap::VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize)
88-
:m_hashTableMaxSize(_hashTableMaxSize),
89-
m_cellSize(_cellSize)
87+
CSmoothNormalGenerator::VertexHashMap::VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) :
88+
m_sorter(createSorter(_vertexCount)),
89+
m_hashTableMaxSize(_hashTableMaxSize),
90+
m_cellSize(_cellSize)
9091
{
91-
assert((core::isPoT(m_hashTableMaxSize)));
92+
assert((core::isPoT(m_hashTableMaxSize)));
9293

93-
m_vertices.reserve(_vertexCount);
94-
m_buckets.reserve(_hashTableMaxSize + 1);
94+
m_vertices.reserve(_vertexCount);
9595
}
9696

9797
uint32_t CSmoothNormalGenerator::VertexHashMap::hash(const CPolygonGeometryManipulator::SSNGVertexData & vertex) const
@@ -121,8 +121,14 @@ CSmoothNormalGenerator::VertexHashMap::BucketBounds CSmoothNormalGenerator::Vert
121121
if (hash == invalidHash)
122122
return { m_vertices.end(), m_vertices.end() };
123123

124-
core::vector<CPolygonGeometryManipulator::SSNGVertexData>::iterator begin = std::lower_bound(m_vertices.begin(), m_vertices.end(), hash);
125-
core::vector<CPolygonGeometryManipulator::SSNGVertexData>::iterator end = std::upper_bound(m_vertices.begin(), m_vertices.end(), hash);
124+
const auto skipListBound = std::visit([&](auto& sorter)
125+
{
126+
auto hashBound = sorter.getHashBound(hash);
127+
return std::pair<collection_t::iterator, collection_t::iterator>(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second);
128+
}, m_sorter);
129+
130+
auto begin = std::lower_bound(skipListBound.first, skipListBound.second, hash);
131+
auto end = std::upper_bound(skipListBound.first, skipListBound.second, hash);
126132

127133
//bucket missing
128134
if (begin == m_vertices.end())
@@ -135,22 +141,12 @@ CSmoothNormalGenerator::VertexHashMap::BucketBounds CSmoothNormalGenerator::Vert
135141
return { begin, end };
136142
}
137143

138-
struct KeyAccessor
139-
{
140-
_NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull;
141-
142-
template<auto bit_offset, auto radix_mask>
143-
inline decltype(radix_mask) operator()(const CPolygonGeometryManipulator::SSNGVertexData& item) const
144-
{
145-
return static_cast<decltype(radix_mask)>(item.hash>>static_cast<uint32_t>(bit_offset))&radix_mask;
146-
}
147-
};
148144
void CSmoothNormalGenerator::VertexHashMap::validate()
149145
{
150146
const auto oldSize = m_vertices.size();
151147
m_vertices.resize(oldSize*2u);
152148
// TODO: maybe use counting sort (or big radix) and use the histogram directly for the m_buckets
153-
auto finalSortedOutput = core::radix_sort(m_vertices.data(),m_vertices.data()+oldSize,oldSize,KeyAccessor());
149+
auto finalSortedOutput = std::visit( [&](auto& sorter) { return sorter(m_vertices.data(), m_vertices.data() + oldSize, oldSize, KeyAccessor()); },m_sorter );
154150
// TODO: optimize out the erase
155151
if (finalSortedOutput != m_vertices.data())
156152
m_vertices.erase(m_vertices.begin(), m_vertices.begin() + oldSize);
@@ -160,19 +156,6 @@ void CSmoothNormalGenerator::VertexHashMap::validate()
160156
// TODO: are `m_buckets` even begin USED!?
161157
uint16_t prevHash = m_vertices[0].hash;
162158
core::vector<CPolygonGeometryManipulator::SSNGVertexData>::iterator prevBegin = m_vertices.begin();
163-
m_buckets.push_back(prevBegin);
164-
165-
while (true)
166-
{
167-
core::vector<CPolygonGeometryManipulator::SSNGVertexData>::iterator next = std::upper_bound(prevBegin, m_vertices.end(), prevHash);
168-
m_buckets.push_back(next);
169-
170-
if (next == m_vertices.end())
171-
break;
172-
173-
prevBegin = next;
174-
prevHash = next->hash;
175-
}
176159
}
177160

178161
CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const asset::ICPUPolygonGeometry* polygon, float epsilon)
@@ -229,35 +212,31 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CSmoothNormalGenerator::processConne
229212
auto* normalPtr = reinterpret_cast<std::byte*>(outPolygon->getNormalPtr());
230213
auto normalStride = outPolygon->getNormalView().composed.stride;
231214

232-
for (uint32_t cell = 0; cell < vertexHashMap.getBucketCount() - 1; cell++)
233-
{
234-
VertexHashMap::BucketBounds processedBucket = vertexHashMap.getBucketBoundsById(cell);
235215

236-
for (core::vector<CPolygonGeometryManipulator::SSNGVertexData>::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++)
237-
{
238-
std::array<uint32_t, 8> neighboringCells;
239-
const auto cellCount = vertexHashMap.getNeighboringCellHashes(neighboringCells.data(), *processedVertex);
240-
hlsl::float32_t3 normal = processedVertex->weightedNormal;
216+
for (auto processedVertex = vertexHashMap.vertices().begin(); processedVertex != vertexHashMap.vertices().end(); processedVertex++)
217+
{
218+
std::array<uint32_t, 8> neighboringCells;
219+
const auto cellCount = vertexHashMap.getNeighboringCellHashes(neighboringCells.data(), *processedVertex);
220+
hlsl::float32_t3 normal = processedVertex->weightedNormal;
241221

242-
//iterate among all neighboring cells
243-
for (int i = 0; i < cellCount; i++)
244-
{
245-
VertexHashMap::BucketBounds bounds = vertexHashMap.getBucketBoundsByHash(neighboringCells[i]);
246-
for (; bounds.begin != bounds.end; bounds.begin++)
247-
{
248-
if (processedVertex != bounds.begin)
249-
if (compareVertexPosition(processedVertex->position, bounds.begin->position, epsilon) &&
250-
vxcmp(*processedVertex, *bounds.begin, polygon))
251-
{
252-
//TODO: better mean calculation algorithm
253-
normal += bounds.begin->weightedNormal;
254-
}
255-
}
256-
}
257-
normal = normalize(normal);
258-
memcpy(normalPtr + (normalStride * processedVertex->index), &normal, sizeof(normal));
259-
}
260-
}
222+
//iterate among all neighboring cells
223+
for (uint8_t i = 0; i < cellCount; i++)
224+
{
225+
VertexHashMap::BucketBounds bounds = vertexHashMap.getBucketBoundsByHash(neighboringCells[i]);
226+
for (; bounds.begin != bounds.end; bounds.begin++)
227+
{
228+
if (processedVertex != bounds.begin)
229+
if (compareVertexPosition(processedVertex->position, bounds.begin->position, epsilon) &&
230+
vxcmp(*processedVertex, *bounds.begin, polygon))
231+
{
232+
//TODO: better mean calculation algorithm
233+
normal += bounds.begin->weightedNormal;
234+
}
235+
}
236+
}
237+
normal = normalize(normal);
238+
memcpy(normalPtr + (normalStride * processedVertex->index), &normal, sizeof(normal));
239+
}
261240

262241
CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get());
263242

@@ -343,49 +322,44 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CSmoothNormalGenerator::weldVertices
343322
return true;
344323
};
345324

346-
for (uint32_t cell = 0; cell < vertices.getBucketCount() - 1; cell++)
347-
{
348-
VertexHashMap::BucketBounds processedBucket = vertices.getBucketBoundsById(cell);
349-
350-
for (core::vector<CPolygonGeometryManipulator::SSNGVertexData>::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++)
351-
{
352-
std::array<uint32_t, 8> neighboringCells;
353-
const auto cellCount = vertices.getNeighboringCellHashes(neighboringCells.data(), *processedVertex);
325+
for (auto processedVertex = vertices.vertices().begin(); processedVertex != vertices.vertices().end(); processedVertex++)
326+
{
327+
std::array<uint32_t, 8> neighboringCells;
328+
const auto cellCount = vertices.getNeighboringCellHashes(neighboringCells.data(), *processedVertex);
354329

355-
auto& groupIndex = groupIndexes[processedVertex->index];
330+
auto& groupIndex = groupIndexes[processedVertex->index];
356331

357-
//iterate among all neighboring cells
358-
for (int i = 0; i < cellCount; i++)
359-
{
360-
VertexHashMap::BucketBounds bounds = vertices.getBucketBoundsByHash(neighboringCells[i]);
361-
for (auto neighbourVertex_it = bounds.begin; neighbourVertex_it != bounds.end; neighbourVertex_it++)
362-
{
363-
const auto neighbourGroupIndex = groupIndexes[neighbourVertex_it->index];
364-
365-
hlsl::float32_t3 normal1, normal2;
366-
polygon->getNormalView().decodeElement(processedVertex->index, normal1);
367-
polygon->getNormalView().decodeElement(neighbourVertex_it->index, normal2);
368-
369-
hlsl::float32_t3 position1, position2;
370-
polygon->getPositionView().decodeElement(processedVertex->index, position1);
371-
polygon->getPositionView().decodeElement(neighbourVertex_it->index, position2);
372-
373-
// find the first group that this vertex can join
374-
if (processedVertex != neighbourVertex_it && neighbourGroupIndex && canJoinVertices(processedVertex->index, neighbourVertex_it->index))
375-
{
376-
groupIndex = neighbourGroupIndex;
377-
break;
378-
}
379-
}
380-
}
381-
if (!groupIndex)
382-
{
383-
// create new group if no group nearby that is compatible with this vertex
384-
groupIndex = groups.size();
385-
groups.push_back({ processedVertex->index});
386-
}
387-
}
388-
}
332+
//iterate among all neighboring cells
333+
for (int i = 0; i < cellCount; i++)
334+
{
335+
VertexHashMap::BucketBounds bounds = vertices.getBucketBoundsByHash(neighboringCells[i]);
336+
for (auto neighbourVertex_it = bounds.begin; neighbourVertex_it != bounds.end; neighbourVertex_it++)
337+
{
338+
const auto neighbourGroupIndex = groupIndexes[neighbourVertex_it->index];
339+
340+
hlsl::float32_t3 normal1, normal2;
341+
polygon->getNormalView().decodeElement(processedVertex->index, normal1);
342+
polygon->getNormalView().decodeElement(neighbourVertex_it->index, normal2);
343+
344+
hlsl::float32_t3 position1, position2;
345+
polygon->getPositionView().decodeElement(processedVertex->index, position1);
346+
polygon->getPositionView().decodeElement(neighbourVertex_it->index, position2);
347+
348+
// find the first group that this vertex can join
349+
if (processedVertex != neighbourVertex_it && neighbourGroupIndex && canJoinVertices(processedVertex->index, neighbourVertex_it->index))
350+
{
351+
groupIndex = neighbourGroupIndex;
352+
break;
353+
}
354+
}
355+
}
356+
if (!groupIndex)
357+
{
358+
// create new group if no group nearby that is compatible with this vertex
359+
groupIndex = groups.size();
360+
groups.push_back({ processedVertex->index});
361+
}
362+
}
389363

390364
auto outPolygon = core::move_and_static_cast<ICPUPolygonGeometry>(polygon->clone(0u));
391365
outPolygon->setIndexing(IPolygonGeometryBase::TriangleList());

src/nbl/asset/utils/CSmoothNormalGenerator.h

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class CSmoothNormalGenerator
2929
collection_t::iterator end;
3030
};
3131

32+
3233
public:
3334
VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize);
3435

@@ -40,20 +41,44 @@ class CSmoothNormalGenerator
4041

4142
inline uint32_t getVertexCount() const { return m_vertices.size(); }
4243

43-
uint8_t getNeighboringCellHashes(uint32_t* outNeighbours, const CPolygonGeometryManipulator::SSNGVertexData& vertex);
44+
uint8_t getNeighboringCellHashes(uint32_t* outNeighbours, const CPolygonGeometryManipulator::SSNGVertexData& vertex);
4445

45-
inline uint32_t getBucketCount() { return m_buckets.size(); }
46-
inline BucketBounds getBucketBoundsById(uint32_t index) const { return { m_buckets[index], m_buckets[index + 1] }; }
4746
BucketBounds getBucketBoundsByHash(uint32_t hash);
4847

48+
const collection_t& vertices() const { return m_vertices; }
49+
4950
private:
51+
struct KeyAccessor
52+
{
53+
_NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull;
54+
55+
template<auto bit_offset, auto radix_mask>
56+
inline decltype(radix_mask) operator()(const CPolygonGeometryManipulator::SSNGVertexData& item) const
57+
{
58+
return static_cast<decltype(radix_mask)>(item.hash>>static_cast<uint32_t>(bit_offset))&radix_mask;
59+
}
60+
};
61+
5062
static constexpr uint32_t invalidHash = 0xFFFFFFFF;
5163
static constexpr uint32_t primeNumber1 = 73856093;
5264
static constexpr uint32_t primeNumber2 = 19349663;
5365
static constexpr uint32_t primeNumber3 = 83492791;
5466

55-
//holds iterators pointing to beginning of each bucket, last iterator points to m_vertices.end()
56-
core::vector<collection_t::iterator> m_buckets;
67+
using sorter_t = std::variant<
68+
core::LSBSorter<KeyAccessor::key_bit_count, uint16_t>,
69+
core::LSBSorter<KeyAccessor::key_bit_count, uint32_t>,
70+
core::LSBSorter<KeyAccessor::key_bit_count, size_t>>;
71+
sorter_t m_sorter;
72+
73+
static sorter_t createSorter(size_t vertexCount)
74+
{
75+
if (vertexCount < (0x1ull << 16ull))
76+
return core::LSBSorter<KeyAccessor::key_bit_count,uint16_t>();
77+
if (vertexCount< (0x1ull << 32ull))
78+
return core::LSBSorter<KeyAccessor::key_bit_count,uint32_t>();
79+
return core::LSBSorter<KeyAccessor::key_bit_count,size_t>();
80+
}
81+
5782
collection_t m_vertices;
5883
const uint32_t m_hashTableMaxSize;
5984
const float m_cellSize;

0 commit comments

Comments
 (0)