diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index b9ee660309..cc41bba7e9 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -9,6 +9,7 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/utils/CGeometryManipulator.h" +#include "nbl/asset/utils/CSmoothNormalGenerator.h" namespace nbl::asset { @@ -17,17 +18,6 @@ namespace nbl::asset class NBL_API2 CPolygonGeometryManipulator { public: - //vertex data needed for CSmoothNormalGenerator - struct SSNGVertexData - { - uint32_t index; //offset of the vertex into index buffer - uint32_t hash; // - float wage; //angle wage of the vertex - hlsl::float32_t3 position; //position of the vertex in 3D space - hlsl::float32_t3 parentTriangleFaceNormal; // - }; - - using VxCmpFunction = std::function; static inline void recomputeContentHashes(ICPUPolygonGeometry* geo) { @@ -243,11 +233,14 @@ class NBL_API2 CPolygonGeometryManipulator static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); + using SSNGVertexData = CSmoothNormalGenerator::VertexData; + using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; + static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, - VxCmpFunction vxcmp = [](const CPolygonGeometryManipulator::SSNGVertexData& v0, const CPolygonGeometryManipulator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) + SSNGVxCmpFunction vxcmp = [](const SSNGVertexData& v0, const SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) { - static constexpr float cosOf45Deg = 0.70710678118f; - return dot(v0.parentTriangleFaceNormal,v1.parentTriangleFaceNormal) > cosOf45Deg; + constexpr float cosOf45Deg = 0.70710678118f; + return dot(normalize(v0.weightedNormal),normalize(v1.weightedNormal)) > cosOf45Deg; }); #if 0 // TODO: REDO @@ -277,14 +270,14 @@ class NBL_API2 CPolygonGeometryManipulator }; typedef std::function VxCmpFunction; - //! Compares two attributes of floating point types in accordance with passed error metric. - /** - @param _a First attribute. - @param _b Second attribute. - @param _cpa Component count. - @param _errMetric Error metric info. - */ - static inline bool compareFloatingPointAttribute(const core::vectorSIMDf& _a, const core::vectorSIMDf& _b, size_t _cpa, const SErrorMetric& _errMetric) + //! Compares two attributes of floating point types in accordance with passed error metric. + /** + @param _a First attribute. + @param _b Second attribute. + @param _cpa Component count. + @param _errMetric Error metric info. + */ + static inline bool compareFloatingPointAttribute(const core::vectorSIMDf& _a, const core::vectorSIMDf& _b, size_t _cpa, const SErrorMetric& _errMetric) { using ErrorF_t = core::vectorSIMDf(*)(core::vectorSIMDf, core::vectorSIMDf); @@ -365,41 +358,41 @@ class NBL_API2 CPolygonGeometryManipulator } - //! Swaps the index buffer for a new index buffer with invalid triangles removed. - /** - Invalid triangle is such consisting of two or more same indices. - @param _input Input index buffer. - @param _idxType Type of indices in the index buffer. - @returns New index buffer or nullptr if input indices were of unknown type or _input was nullptr. - */ - static void filterInvalidTriangles(ICPUMeshBuffer* _input); - - //! Creates index buffer from input converting it to indices for line list primitives. Input is assumed to be indices for line strip. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - - //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle strip. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - - //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle fan. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); + //! Swaps the index buffer for a new index buffer with invalid triangles removed. + /** + Invalid triangle is such consisting of two or more same indices. + @param _input Input index buffer. + @param _idxType Type of indices in the index buffer. + @returns New index buffer or nullptr if input indices were of unknown type or _input was nullptr. + */ + static void filterInvalidTriangles(ICPUMeshBuffer* _input); + + //! Creates index buffer from input converting it to indices for line list primitives. Input is assumed to be indices for line strip. + /** + @param _input Input index buffer's data. + @param _idxCount Index count. + @param _inIndexType Type of input index buffer data (32bit or 16bit). + @param _outIndexType Type of output index buffer data (32bit or 16bit). + */ + static core::smart_refctd_ptr idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); + + //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle strip. + /** + @param _input Input index buffer's data. + @param _idxCount Index count. + @param _inIndexType Type of input index buffer data (32bit or 16bit). + @param _outIndexType Type of output index buffer data (32bit or 16bit). + */ + static core::smart_refctd_ptr idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); + + //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle fan. + /** + @param _input Input index buffer's data. + @param _idxCount Index count. + @param _inIndexType Type of input index buffer data (32bit or 16bit). + @param _outIndexType Type of output index buffer data (32bit or 16bit). + */ + static core::smart_refctd_ptr idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); //! static inline std::array getTriangleIndices(const ICPUMeshBuffer* mb, uint32_t triangleIx) @@ -635,7 +628,7 @@ class NBL_API2 CPolygonGeometryManipulator //! Creates a copy of a mesh with vertices welded /** \param mesh Input mesh - \param errMetrics Array of size EVAI_COUNT. Describes error metric for each vertex attribute (used if attribute is of floating point or normalized type). + \param errMetrics Array of size EVAI_COUNT. Describes error metric for each vertex attribute (used if attribute is of floating point or normalized type). \param tolerance The threshold for vertex comparisons. \return Mesh without redundant vertices. */ static core::smart_refctd_ptr createMeshBufferWelded(ICPUMeshBuffer *inbuffer, const SErrorMetric* errMetrics, const bool& optimIndexType = true, const bool& makeNewMesh = false); @@ -653,12 +646,12 @@ class NBL_API2 CPolygonGeometryManipulator */ static void requantizeMeshBuffer(ICPUMeshBuffer* _meshbuffer, const SErrorMetric* _errMetric); - //! Creates a 32bit index buffer for a mesh with primitive types changed to list types - /**# + //! Creates a 32bit index buffer for a mesh with primitive types changed to list types + /**# @param _newPrimitiveType - @param _begin non-const iterator to beginning of meshbuffer range - @param _end non-const iterator to ending of meshbuffer range - */ + @param _begin non-const iterator to beginning of meshbuffer range + @param _end non-const iterator to ending of meshbuffer range + */ template static inline void homogenizePrimitiveTypeAndIndices(Iterator _begin, Iterator _end, const E_PRIMITIVE_TOPOLOGY _newPrimitiveType, const E_INDEX_TYPE outIndexType = EIT_32BIT) { diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h new file mode 100644 index 0000000000..ce094fabe4 --- /dev/null +++ b/include/nbl/asset/utils/CVertexHashGrid.h @@ -0,0 +1,221 @@ +#ifndef _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_ +#define _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_ + +#include "nbl/core/declarations.h" + +namespace nbl::asset +{ + +template +concept HashGridVertexData = requires(T obj, T const cobj, uint32_t hash) { + { cobj.getHash() } -> std::same_as; + { obj.setHash(hash) } -> std::same_as; + { cobj.getPosition() } -> std::same_as; +}; + +template +concept HashGridIteratorFn = HashGridVertexData && requires(Fn && fn, T const cobj) +{ + // return whether hash grid should continue the iteration + { std::invoke(std::forward(fn), cobj) } -> std::same_as; +}; + +// TODO: implement a class template that take position type(either float32_t3 or float64_t3 as template argument +template +class CVertexHashGrid +{ +public: + + using vertex_data_t = VertexData; + using collection_t = core::vector; + struct BucketBounds + { + collection_t::const_iterator begin; + collection_t::const_iterator end; + }; + + inline CVertexHashGrid(float cellSize, uint32_t hashTableMaxSizeLog2, size_t vertexCountReserve = 8192) : + m_cellSize(cellSize), + m_hashTableMaxSize(1llu << hashTableMaxSizeLog2), + m_sorter(createSorter(vertexCountReserve)) + { + m_vertices.reserve(vertexCountReserve); + } + + //inserts vertex into hash table + inline void add(VertexData&& vertex) + { + vertex.setHash(hash(vertex)); + m_vertices.push_back(std::move(vertex)); + } + + inline void bake() + { + auto scratchBuffer = collection_t(m_vertices.size()); + + auto finalSortedOutput = std::visit( [&](auto& sorter) + { + return sorter(m_vertices.data(), scratchBuffer.data(), m_vertices.size(), KeyAccessor()); + }, m_sorter ); + + if (finalSortedOutput != m_vertices.data()) + m_vertices = std::move(scratchBuffer); + } + + inline const collection_t& vertices() const { return m_vertices; } + + inline uint32_t getVertexCount() const { return m_vertices.size(); } + + template Fn> + inline void forEachBroadphaseNeighborCandidates(const hlsl::float32_t3& position, Fn&& fn) const + { + std::array neighboringCells; + const auto cellCount = getNeighboringCellHashes(neighboringCells.data(), position); + + //iterate among all neighboring cells + for (uint8_t i = 0; i < cellCount; i++) + { + const auto& neighborCell = neighboringCells[i]; + BucketBounds bounds = getBucketBoundsByHash(neighborCell); + for (; bounds.begin != bounds.end; bounds.begin++) + { + const vertex_data_t& neighborVertex = *bounds.begin; + if (!std::invoke(std::forward(fn), neighborVertex)) break; + } + } + } + +private: + struct KeyAccessor + { + constexpr static inline size_t key_bit_count = 32ull; + + template + inline decltype(radix_mask) operator()(const VertexData& item) const + { + return static_cast(item.getHash() >> static_cast(bit_offset)) & radix_mask; + } + }; + + static constexpr inline uint32_t primeNumber1 = 73856093; + static constexpr inline uint32_t primeNumber2 = 19349663; + static constexpr inline uint32_t primeNumber3 = 83492791; + + using sorter_t = std::variant< + core::RadixLsbSorter, + core::RadixLsbSorter, + core::RadixLsbSorter>; + sorter_t m_sorter; + + inline static sorter_t createSorter(size_t vertexCount) + { + if (vertexCount < (0x1ull << 16ull)) + return core::RadixLsbSorter(); + if (vertexCount < (0x1ull << 32ull)) + return core::RadixLsbSorter(); + return core::RadixLsbSorter(); + } + + collection_t m_vertices; + const uint32_t m_hashTableMaxSize; + const float m_cellSize; + + inline uint32_t hash(const VertexData& vertex) const + { + const hlsl::float32_t3 position = floor(vertex.getPosition() / m_cellSize); + const auto position_uint32 = hlsl::uint32_t3(position.x, position.y, position.z); + return hash(position_uint32); + } + + inline uint32_t hash(const hlsl::uint32_t3& position) const + { + return ((position.x * primeNumber1) ^ + (position.y * primeNumber2) ^ + (position.z * primeNumber3))& (m_hashTableMaxSize - 1); + } + + inline uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, hlsl::float32_t3 position) const + { + // We substract the coordinate by 0.5 since the cellSize is expected to be twice the epsilon. This is to snap the vertex into the cell that contain the most bottom left cell that could collide with of our vertex. + // ------- ------- + // | | y| | | | + // | |x | | |y | + // ------- -> ------- + // | | | | x| | + // | | | | | | + // ------- ------- + // |2e|e| + // In the example,x is snapped into a different cell which is the most bottom left cell that could collide with x. Since we have move it into its bottom left candidate, there is no need to check to the bottom and to the left of the snapped coordinate. We only need to check the upper and to the right of the snapped cell, which include the original cell. Note that we do not need to check the upper and to the right of the original cell. The cell size is 2 * epsilon and x is located on the lower and lefter side of the cell. + // Contrary to x, y is still snapped into its original cell. It means the most bottom left cell that collide with y is its own cell. + // The above scheme is to reduce the number of cell candidates that we need to check for collision, from 9 cell to 4 cell in 2d, or from 27 cells to 8 cells in 3d. + // both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly. + hlsl::float32_t3 cellfloatcoord = floor(position / m_cellSize - hlsl::float32_t3(0.5)); + hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast(cellfloatcoord.x), static_cast(cellfloatcoord.y), static_cast(cellfloatcoord.z)); + + uint8_t neighborCount = 0; + + outNeighbors[neighborCount] = hash(baseCoord); + neighborCount++; + + auto addUniqueNeighbor = [&neighborCount, outNeighbors](uint32_t hashval) + { + if (std::find(outNeighbors, outNeighbors + neighborCount, hashval) == outNeighbors + neighborCount) + { + outNeighbors[neighborCount] = hashval; + neighborCount++; + } + }; + + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 0, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 0))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 1))); + addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 1))); + + return neighborCount; + } + + inline BucketBounds getBucketBoundsByHash(uint32_t hash) const + { + const auto skipListBound = std::visit([&](auto& sorter) + { + auto hashBound = sorter.getMostSignificantRadixBound(hash); + return std::pair(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second); + }, m_sorter); + + auto begin = std::lower_bound( + skipListBound.first, + skipListBound.second, + hash, + [](const VertexData& vertex, uint32_t hash) + { + return vertex.getHash() < hash; + }); + + auto end = std::upper_bound( + skipListBound.first, + skipListBound.second, + hash, + [](uint32_t hash, const VertexData& vertex) + { + return hash < vertex.getHash(); + }); + + const auto beginIx = begin - m_vertices.begin(); + const auto endIx = end - m_vertices.begin(); + //bucket missing + if (begin == end) + return { m_vertices.end(), m_vertices.end() }; + + //bucket missing + if (begin->hash != hash) + return { m_vertices.end(), m_vertices.end() }; + + return { begin, end }; + } +}; + +} +#endif \ No newline at end of file diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h new file mode 100644 index 0000000000..9faf1b33a8 --- /dev/null +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -0,0 +1,353 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_C_POLYGON_VERTEX_WELDER_H_INCLUDED_ +#define _NBL_ASSET_C_POLYGON_VERTEX_WELDER_H_INCLUDED_ + +#include "nbl/asset/utils/CPolygonGeometryManipulator.h" + +namespace nbl::asset { + +template +concept VertexWelderAccelerationStructure = requires(T const cobj, hlsl::float32_t3 position, std::function fn) +{ + typename T::vertex_data_t; + { std::same_as }; + { cobj.forEachBroadphaseNeighborCandidates(position, fn) } -> std::same_as; +}; + +class CVertexWelder { + + public: + + class WeldPredicate + { + public: + virtual bool init(const ICPUPolygonGeometry* geom) = 0; + virtual bool operator()(const ICPUPolygonGeometry* geom, uint32_t idx1, uint32_t idx2) const = 0; + virtual ~WeldPredicate() = default; + }; + + class DefaultWeldPredicate : public WeldPredicate + { + private: + + struct SDataViewContext + { + uint32_t channelCount : 3; + uint32_t byteSize: 29; + }; + + static inline bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t byteSize) + { + const auto* basePtr = reinterpret_cast(view.getPointer()); + const auto stride = view.composed.stride; + return (memcmp(basePtr + (index1 * stride), basePtr + (index2 * stride), byteSize) == 0); + } + + static inline bool isRealElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t channelCount, float epsilon) + { + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + { + const auto diff = abs(val1[channel_i] - val2[channel_i]); + if (diff > epsilon) return false; + } + return true; + } + + static inline bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) + { + if (context.byteSize == 0) return true; + + assert(view); + assert(view.composed.isFormatted()); + assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + return isIntegralElementEqual(view, index1, index2, context.byteSize); + } + default: + { + return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); + } + } + return true; + } + + static inline bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) + { + if (context.byteSize == 0) return true; + + assert(view); + assert(view.composed.isFormatted()); + assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + return isIntegralElementEqual(view, index1, index2, context.byteSize); + } + default: + { + if (context.channelCount != 3) + return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); + + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } + } + } + + float m_epsilon; + + SDataViewContext m_positionViewContext; + SDataViewContext m_normalViewContext; + + struct SJointViewContext + { + SDataViewContext indices; + SDataViewContext weights; + }; + core::vector m_jointViewContexts; + + core::vector m_auxAttributeViewContexts; + + public: + + inline DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} + + inline bool init(const ICPUPolygonGeometry* polygon) override + { + auto isViewFormatValid = [](const ICPUPolygonGeometry::SDataView& view) + { + return view.composed.isFormatted() && IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat; + }; + auto getViewContext = [](const ICPUPolygonGeometry::SDataView& view) -> SDataViewContext + { + if (!view) + { + return { + .channelCount = 0, + .byteSize = 0 + }; + } + return { + .channelCount = getFormatChannelCount(view.composed.format), + .byteSize = getTexelOrBlockBytesize(view.composed.format) + }; + }; + + if (!polygon->valid()) return false; + + const auto& positionView = polygon->getPositionView(); + if (IGeometryBase::getMatchingAABBFormat(positionView.composed.format) == positionView.composed.rangeFormat) return false; + m_positionViewContext = { + .channelCount = getFormatChannelCount(positionView.composed.format), + .byteSize = getTexelOrBlockBytesize(positionView.composed.format), + }; + + const auto& normalView = polygon->getNormalView(); + if (normalView && !isViewFormatValid(normalView)) return false; + m_normalViewContext = getViewContext(normalView); + + m_jointViewContexts.reserve(polygon->getJointWeightViews().size()); + for (const auto& jointWeightView : polygon->getJointWeightViews()) + { + if (jointWeightView.indices && !isViewFormatValid(jointWeightView.indices)) return false; + if (jointWeightView.weights && !isViewFormatValid(jointWeightView.weights)) return false; + m_jointViewContexts.push_back({ + .indices = getViewContext(jointWeightView.indices), + .weights = getViewContext(jointWeightView.weights), + }); + } + + m_auxAttributeViewContexts.reserve(polygon->getAuxAttributeViews().size()); + for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) + { + if (auxAttributeView && !isViewFormatValid(auxAttributeView)) return false; + m_auxAttributeViewContexts.push_back(getViewContext(auxAttributeView)); + } + + } + + inline bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) const override + { + if (!isAttributeValEqual(polygon->getPositionView(), m_positionViewContext, index1, index2, m_epsilon)) + return false; + + const auto& normalView = polygon->getNormalView(); + if (!isAttributeDirEqual(normalView, m_normalViewContext, index1, index2, m_epsilon)) + return false; + + for (uint64_t joint_i = 0; joint_i < polygon->getJointWeightViews().size(); joint_i++) + { + const auto& jointWeightView = polygon->getJointWeightViews()[joint_i]; + if (!isAttributeValEqual(jointWeightView.indices, m_jointViewContexts[joint_i].indices, index1, index2, m_epsilon)) return false; + if (!isAttributeValEqual(jointWeightView.weights, m_jointViewContexts[joint_i].weights, index1, index2, m_epsilon)) return false; + } + + const auto& auxAttrViews = polygon->getAuxAttributeViews(); + for (uint64_t aux_i = 0; aux_i < auxAttrViews.size(); aux_i++) + { + if (!isAttributeValEqual(auxAttrViews[aux_i], m_auxAttributeViewContexts[aux_i], index1, index2, m_epsilon)) return false; + } + + return true; + } + + inline ~DefaultWeldPredicate() override = default; + + }; + + template + static inline core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, const WeldPredicate& shouldWeldFn) { + auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); + + const auto& positionView = polygon->getPositionView(); + const auto vertexCount = positionView.getElementCount(); + + constexpr auto INVALID_INDEX = std::numeric_limits::max(); + core::vector remappedVertexIndexes(vertexCount); + + uint32_t maxRemappedIndex = 0; + // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together + for (uint32_t index = 0; index < vertexCount; index++) + { + hlsl::float32_t3 position; + positionView.decodeElement(index, position); + auto remappedVertexIndex = INVALID_INDEX; + as.forEachBroadphaseNeighborCandidates(position, [&](const typename AccelStructureT::vertex_data_t& candidate) { + const auto neighborRemappedIndex = remappedVertexIndexes[candidate.index]; + // make sure we can only map higher indices to lower indices to disallow loops + if (candidate.indexgetIndexView(); + const auto remappedRangeFormat = (maxRemappedIndex - 1) < static_cast(std::numeric_limits::max()) ? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32; + + auto createRemappedIndexView = [&](size_t indexCount) { + const uint32_t indexSize = remappedRangeFormat == IGeometryBase::EAABBFormat::U16 ? sizeof(uint16_t) : sizeof(uint32_t); + auto remappedIndexBuffer = ICPUBuffer::create({indexSize * indexCount, IBuffer::EUF_INDEX_BUFFER_BIT}); + auto remappedIndexView = ICPUPolygonGeometry::SDataView{ + .composed = { + .stride = indexSize, + .rangeFormat = remappedRangeFormat + }, + .src = { + .offset = 0, + .size = remappedIndexBuffer->getSize(), + .buffer = std::move(remappedIndexBuffer) + } + }; + + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) + { + hlsl::shapes::AABB<4, uint16_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxRemappedIndex; + remappedIndexView.composed.encodedDataRange.u16 = aabb; + remappedIndexView.composed.format = EF_R16_UINT; + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + hlsl::shapes::AABB<4, uint32_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxRemappedIndex; + remappedIndexView.composed.encodedDataRange.u32 = aabb; + remappedIndexView.composed.format = EF_R32_UINT; + } + + return remappedIndexView; + }; + + + if (indexView) + { + auto remappedIndexView = createRemappedIndexView(polygon->getIndexCount()); + auto remappedIndexes = [&]() -> bool { + auto* remappedIndexPtr = reinterpret_cast(remappedIndexView.getPointer()); + for (uint32_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) + { + hlsl::vector index; + indexView.decodeElement>(index_i, index); + const auto remappedIndex = remappedVertexIndexes[index.x]; + remappedIndexPtr[index_i] = static_cast(remappedIndex); + if (remappedIndex == INVALID_INDEX) return false; + } + return true; + }; + + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { + if (!remappedIndexes.template operator()()) return nullptr; + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + if (!remappedIndexes.template operator()()) return nullptr; + } + + outPolygon->setIndexView(std::move(remappedIndexView)); + + } else + { + auto remappedIndexView = createRemappedIndexView(remappedVertexIndexes.size()); + + auto fillRemappedIndex = [&](){ + auto remappedIndexBufferPtr = reinterpret_cast(remappedIndexView.getPointer()); + for (uint32_t index_i = 0; index_i < remappedVertexIndexes.size(); index_i++) + { + if (remappedVertexIndexes[index_i] == INVALID_INDEX) return false; + remappedIndexBufferPtr[index_i] = remappedVertexIndexes[index_i]; + } + return true; + }; + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { + if (!fillRemappedIndex.template operator()()) return nullptr; + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + if (!fillRemappedIndex.template operator()()) return nullptr; + } + + outPolygon->setIndexView(std::move(remappedIndexView)); + } + + CGeometryManipulator::recomputeContentHash(outPolygon->getIndexView()); + return outPolygon; + } +}; + +} + +#endif diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl new file mode 100644 index 0000000000..4677b0e155 --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + +namespace util +{ + // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. + template + vector compInternalAngle(NBL_CONST_REF_ARG(vector) e0, NBL_CONST_REF_ARG(vector) e1, NBL_CONST_REF_ARG(vector) e2) + { + // Calculate this triangle's weight for each of its three m_vertices + // start by calculating the lengths of its sides + const float_t a = hlsl::dot(e0, e0); + const float_t asqrt = hlsl::sqrt(a); + const float_t b = hlsl::dot(e1, e1); + const float_t bsqrt = hlsl::sqrt(b); + const float_t c = hlsl::dot(e2, e2); + const float_t csqrt = hlsl::sqrt(c); + + const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); + // use them to find the angle at each vertex + return vector(angle0, angle1, angle2); + } +} + +} +} +} + +#endif diff --git a/include/nbl/core/algorithm/radix_sort.h b/include/nbl/core/algorithm/radix_sort.h index 749a8b8309..cd1a0aaa6a 100644 --- a/include/nbl/core/algorithm/radix_sort.h +++ b/include/nbl/core/algorithm/radix_sort.h @@ -38,10 +38,10 @@ constexpr int8_t find_msb(const T& a_variable) { static_assert(std::is_unsigned::value, "Variable must be unsigned"); - constexpr uint8_t number_of_bits = std::numeric_limits::digits; + constexpr int8_t number_of_bits = std::numeric_limits::digits; const std::bitset variable_bitset{a_variable}; - for (uint8_t msb = number_of_bits - 1; msb >= 0; msb--) + for (int8_t msb = number_of_bits - 1; msb >= 0; msb--) { if (variable_bitset[msb] == 1) return msb; @@ -49,61 +49,77 @@ constexpr int8_t find_msb(const T& a_variable) return -1; } + +} + template -struct RadixSorter +struct RadixLsbSorter { - _NBL_STATIC_INLINE_CONSTEXPR uint16_t histogram_bytesize = 8192u; - _NBL_STATIC_INLINE_CONSTEXPR size_t histogram_size = size_t(histogram_bytesize)/sizeof(histogram_t); - _NBL_STATIC_INLINE_CONSTEXPR uint8_t radix_bits = find_msb(histogram_size); - _NBL_STATIC_INLINE_CONSTEXPR size_t last_pass = (key_bit_count-1ull)/size_t(radix_bits); - _NBL_STATIC_INLINE_CONSTEXPR uint16_t radix_mask = (1u< inline RandomIt operator()(RandomIt input, RandomIt output, const histogram_t rangeSize, const KeyAccessor& comp) { return pass(input,output,rangeSize,comp); } + + inline std::pair getMostSignificantRadixBound(size_t key) const + { + constexpr histogram_t shift = static_cast(radix_bits * last_pass); + const auto histogramIx = (key >> shift) & radix_mask; + const auto boundBegin = histogramIx == 0 ? 0 : m_histogram[histogramIx - 1]; + return { boundBegin, m_histogram[histogramIx] }; + } + private: template inline RandomIt pass(RandomIt input, RandomIt output, const histogram_t rangeSize, const KeyAccessor& comp) { // clear - std::fill_n(histogram,histogram_size,static_cast(0u)); + std::fill_n(m_histogram,histogram_size,static_cast(0u)); + // count constexpr histogram_t shift = static_cast(radix_bits*pass_ix); - for (histogram_t i=0u; i(input[i])]; - // prefix sum - std::inclusive_scan(histogram,histogram+histogram_size,histogram); - // scatter - for (histogram_t i=rangeSize; i!=0u;) + for (histogram_t i = rangeSize; i != 0;) { i--; - output[--histogram[comp.template operator()(input[i])]] = input[i]; + ++m_histogram[comp.template operator()(input[i])]; + } + + // prefix sum + std::exclusive_scan(m_histogram, m_histogram + histogram_size, m_histogram, 0); + + // scatter. After scatter m_histogram now become a skiplist + for (histogram_t i = 0; i < rangeSize; i++) + { + const auto& val = input[i]; + const auto& histogramIx = comp.template operator()(val); + output[m_histogram[histogramIx]++] = val; } if constexpr (pass_ix != last_pass) - return pass(output,input,rangeSize,comp); - else - return output; + return pass(output,input,rangeSize,comp); + return output; } - alignas(sizeof(histogram_t)) histogram_t histogram[histogram_size]; + alignas(sizeof(histogram_t)) histogram_t m_histogram[histogram_size]; }; -} - template inline RandomIt radix_sort(RandomIt input, RandomIt scratch, const size_t rangeSize, const KeyAccessor& comp) { assert(std::abs(std::distance(input,scratch))>=rangeSize); if (rangeSize(0x1ull<<16ull)) - return impl::RadixSorter()(input,scratch,static_cast(rangeSize),comp); + return RadixLsbSorter()(input,scratch,static_cast(rangeSize),comp); if (rangeSize(0x1ull<<32ull)) - return impl::RadixSorter()(input,scratch,static_cast(rangeSize),comp); + return RadixLsbSorter()(input,scratch,static_cast(rangeSize),comp); else - return impl::RadixSorter()(input,scratch,rangeSize,comp); + return RadixLsbSorter()(input,scratch,rangeSize,comp); } //! Because Radix Sort needs O(2n) space and a number of passes dependant on the key length, the final sorted range can be either in `input` or `scratch` diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 9dc8cdd42a..2aa2e08fe5 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -83,7 +83,6 @@ template requires(std::is_same_v || std::is_same_v) static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t maxIndex) { - const auto bytesize = sizeof(IndexT) * indexCount; auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index c2b360c7af..b355f7fb43 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -9,6 +9,7 @@ #include #include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/asset/utils/CVertexWelder.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" @@ -17,140 +18,145 @@ namespace nbl::asset core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo) { - const auto* indexing = inGeo->getIndexingCallback(); - if (!indexing) - return nullptr; - - const auto indexView = inGeo->getIndexView(); - const auto primCount = inGeo->getPrimitiveCount(); - const uint8_t degree = indexing->degree(); - const auto outIndexCount = primCount*degree; - if (outIndexCount(inGeo->clone(0u)); - - auto* outGeo = outGeometry.get(); - outGeo->setIndexing(IPolygonGeometryBase::NGonList(degree)); - - auto createOutView = [&](const ICPUPolygonGeometry::SDataView& inView) -> ICPUPolygonGeometry::SDataView - { - if (!inView) - return {}; - auto buffer = ICPUBuffer::create({ outIndexCount*inView.composed.stride , inView.src.buffer->getUsageFlags() }); - return { - .composed = inView.composed, - .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} - }; - }; - - const auto inIndexView = inGeo->getIndexView(); - auto outIndexView = createOutView(inIndexView); - auto indexBuffer = outIndexView.src.buffer; - const auto indexSize = inIndexView.composed.stride; - std::byte* outIndices = reinterpret_cast(outIndexView.getPointer()); - outGeo->setIndexView({}); - - const auto inVertexView = inGeo->getPositionView(); - auto outVertexView = createOutView(inVertexView); - auto vertexBuffer = outVertexView.src.buffer; - const auto vertexSize = inVertexView.composed.stride; - const std::byte* inVertices = reinterpret_cast(inVertexView.getPointer()); - std::byte* const outVertices = reinterpret_cast(vertexBuffer->getPointer()); - outGeo->setPositionView(std::move(outVertexView)); - - const auto inNormalView = inGeo->getNormalView(); - const std::byte* const inNormals = reinterpret_cast(inNormalView.getPointer()); - auto outNormalView = createOutView(inNormalView); - auto outNormalBuffer = outNormalView.src.buffer; - outGeo->setNormalView(std::move(outNormalView)); - - outGeometry->getJointWeightViews()->resize(inGeo->getJointWeightViews().size()); - for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) - { - auto& inJointWeightView = inGeo->getJointWeightViews()[jointView_i]; - auto& outJointWeightView = outGeometry->getJointWeightViews()->operator[](jointView_i); - outJointWeightView.indices = createOutView(inJointWeightView.indices); - outJointWeightView.weights = createOutView(inJointWeightView.weights); - } - - outGeometry->getAuxAttributeViews()->resize(inGeo->getAuxAttributeViews().size()); - for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) - outGeo->getAuxAttributeViews()->operator[](auxView_i) = createOutView(inGeo->getAuxAttributeViews()[auxView_i]); - - std::array indices; - for (uint64_t prim_i = 0u; prim_i < primCount; prim_i++) - { - IPolygonGeometryBase::IIndexingCallback::SContext context{ - .indexBuffer = indexView.getPointer(), - .indexSize = indexView.composed.stride, - .beginPrimitive = prim_i, - .endPrimitive = prim_i + 1, - .out = indices.data() - }; - indexing->operator()(context); - for (uint8_t primIndex_i=0; primIndex_i(outNormalBuffer->getPointer()); - const auto normalSize = inNormalView.composed.stride; - memcpy(outNormals + outIndex * normalSize, inNormals + inIndex * normalSize, normalSize); - } - - for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) - { - auto& inView = inGeo->getJointWeightViews()[jointView_i]; - auto& outView = outGeometry->getJointWeightViews()->operator[](jointView_i); - - const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); - const auto jointIndexSize = inView.indices.composed.stride; - std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); - memcpy(outJointIndices + outIndex * jointIndexSize, inJointIndices + inIndex * jointIndexSize, jointIndexSize); - - const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); - const auto jointWeightSize = inView.weights.composed.stride; - std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); - memcpy(outWeights + outIndex * jointWeightSize, outWeights + inIndex * jointWeightSize, jointWeightSize); - } - - for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) - { - auto& inView = inGeo->getAuxAttributeViews()[auxView_i]; - auto& outView = outGeometry->getAuxAttributeViews()->operator[](auxView_i); - const auto attrSize = inView.composed.stride; - const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); - std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); - memcpy(outAuxs + outIndex * attrSize, inAuxs + inIndex * attrSize, attrSize); - } - } - } - - recomputeContentHashes(outGeo); - return outGeometry; + const auto* indexing = inGeo->getIndexingCallback(); + if (!indexing) + return nullptr; + + const auto indexView = inGeo->getIndexView(); + const auto primCount = inGeo->getPrimitiveCount(); + const uint8_t degree = indexing->degree(); + const auto outIndexCount = primCount*degree; + if (outIndexCount(inGeo->clone(0u)); + + auto* outGeo = outGeometry.get(); + outGeo->setIndexing(IPolygonGeometryBase::NGonList(degree)); + + auto createOutView = [&](const ICPUPolygonGeometry::SDataView& inView) -> ICPUPolygonGeometry::SDataView + { + if (!inView) + return {}; + auto buffer = ICPUBuffer::create({ outIndexCount*inView.composed.stride , inView.src.buffer->getUsageFlags() }); + return { + .composed = inView.composed, + .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} + }; + }; + + const auto inIndexView = inGeo->getIndexView(); + auto outIndexView = createOutView(inIndexView); + auto indexBuffer = outIndexView.src.buffer; + const auto indexSize = inIndexView.composed.stride; + std::byte* outIndices = reinterpret_cast(outIndexView.getPointer()); + outGeo->setIndexView({}); + + const auto inVertexView = inGeo->getPositionView(); + auto outVertexView = createOutView(inVertexView); + auto vertexBuffer = outVertexView.src.buffer; + const auto vertexSize = inVertexView.composed.stride; + const std::byte* inVertices = reinterpret_cast(inVertexView.getPointer()); + std::byte* const outVertices = reinterpret_cast(vertexBuffer->getPointer()); + outGeo->setPositionView(std::move(outVertexView)); + + const auto inNormalView = inGeo->getNormalView(); + const std::byte* const inNormals = reinterpret_cast(inNormalView.getPointer()); + auto outNormalView = createOutView(inNormalView); + auto outNormalBuffer = outNormalView.src.buffer; + outGeo->setNormalView(std::move(outNormalView)); + + outGeometry->getJointWeightViews()->resize(inGeo->getJointWeightViews().size()); + for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) + { + auto& inJointWeightView = inGeo->getJointWeightViews()[jointView_i]; + auto& outJointWeightView = outGeometry->getJointWeightViews()->operator[](jointView_i); + outJointWeightView.indices = createOutView(inJointWeightView.indices); + outJointWeightView.weights = createOutView(inJointWeightView.weights); + } + + outGeometry->getAuxAttributeViews()->resize(inGeo->getAuxAttributeViews().size()); + for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) + outGeo->getAuxAttributeViews()->operator[](auxView_i) = createOutView(inGeo->getAuxAttributeViews()[auxView_i]); + + std::array indices; + for (uint64_t prim_i = 0u; prim_i < primCount; prim_i++) + { + IPolygonGeometryBase::IIndexingCallback::SContext context{ + .indexBuffer = indexView.getPointer(), + .indexSize = indexView.composed.stride, + .beginPrimitive = prim_i, + .endPrimitive = prim_i + 1, + .out = indices.data() + }; + indexing->operator()(context); + for (uint8_t primIndex_i=0; primIndex_i(outNormalBuffer->getPointer()); + const auto normalSize = inNormalView.composed.stride; + memcpy(outNormals + outIndex * normalSize, inNormals + inIndex * normalSize, normalSize); + } + + for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) + { + auto& inView = inGeo->getJointWeightViews()[jointView_i]; + auto& outView = outGeometry->getJointWeightViews()->operator[](jointView_i); + + const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); + const auto jointIndexSize = inView.indices.composed.stride; + std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); + memcpy(outJointIndices + outIndex * jointIndexSize, inJointIndices + inIndex * jointIndexSize, jointIndexSize); + + const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); + const auto jointWeightSize = inView.weights.composed.stride; + std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); + memcpy(outWeights + outIndex * jointWeightSize, outWeights + inIndex * jointWeightSize, jointWeightSize); + } + + for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) + { + auto& inView = inGeo->getAuxAttributeViews()[auxView_i]; + auto& outView = outGeometry->getAuxAttributeViews()->operator[](auxView_i); + const auto attrSize = inView.composed.stride; + const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); + std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); + memcpy(outAuxs + outIndex * attrSize, inAuxs + inIndex * attrSize, attrSize); + } + } + } + + recomputeContentHashes(outGeo); + return outGeometry; } -core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, VxCmpFunction vxcmp) +core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, SSNGVxCmpFunction vxcmp) { - if (!inPolygon) - { - _NBL_DEBUG_BREAK_IF(true); - return nullptr; - } - - // Mesh need to be unwelded (TODO: why? the output only need to be unwelded, really should be checking `inPolygon->getIndexingCallback()->count()!=3`) - if (inPolygon->getIndexView() && inPolygon->getIndexingCallback()!=IPolygonGeometryBase::TriangleList()) - { - _NBL_DEBUG_BREAK_IF(true); - return nullptr; - } - - return CSmoothNormalGenerator::calculateNormals(inPolygon, enableWelding, epsilon, vxcmp); + if (!inPolygon) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } + + // Mesh need to be unwelded (TODO: why? the output only need to be unwelded, really should be checking `inPolygon->getIndexingCallback()->count()!=3`) + if (inPolygon->getIndexView() && inPolygon->getIndexingCallback()!=IPolygonGeometryBase::TriangleList()) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } + + auto result = CSmoothNormalGenerator::calculateNormals(inPolygon, epsilon, vxcmp); + if (enableWelding) + { + return CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, CVertexWelder::DefaultWeldPredicate(epsilon)); + } + return result.geom; } #if 0 @@ -159,28 +165,28 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp if (!_inbuffer) return nullptr; - const auto* pipeline = _inbuffer->getPipeline(); - const void* ind = _inbuffer->getIndices(); + const auto* pipeline = _inbuffer->getPipeline(); + const void* ind = _inbuffer->getIndices(); if (!pipeline || !ind) return nullptr; auto outbuffer = core::move_and_static_cast(_inbuffer->clone(1u)); - outbuffer->setAttachedDescriptorSet(core::smart_refctd_ptr(const_cast(_inbuffer->getAttachedDescriptorSet()))); - outbuffer->setSkin( - SBufferBinding(reinterpret_cast&>(_inbuffer->getInverseBindPoseBufferBinding())), - SBufferBinding(reinterpret_cast&>(_inbuffer->getJointAABBBufferBinding())), - _inbuffer->getJointCount(),_inbuffer->getMaxJointsPerVertex() - ); + outbuffer->setAttachedDescriptorSet(core::smart_refctd_ptr(const_cast(_inbuffer->getAttachedDescriptorSet()))); + outbuffer->setSkin( + SBufferBinding(reinterpret_cast&>(_inbuffer->getInverseBindPoseBufferBinding())), + SBufferBinding(reinterpret_cast&>(_inbuffer->getJointAABBBufferBinding())), + _inbuffer->getJointCount(),_inbuffer->getMaxJointsPerVertex() + ); - constexpr uint32_t MAX_ATTRIBS = asset::ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; + constexpr uint32_t MAX_ATTRIBS = asset::ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; // Find vertex count size_t vertexCount = IMeshManipulator::upperBoundVertexID(_inbuffer); core::unordered_set buffers; for (size_t i = 0; i < MAX_ATTRIBS; ++i) - if (auto* buf = _inbuffer->getAttribBoundBuffer(i).buffer.get()) - buffers.insert(buf); + if (auto* buf = _inbuffer->getAttribBoundBuffer(i).buffer.get()) + buffers.insert(buf); size_t offsets[MAX_ATTRIBS]; memset(offsets, -1, sizeof(offsets)); @@ -195,36 +201,36 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp { types[i] = _inbuffer->getAttribFormat(i); - const uint32_t typeSz = getTexelOrBlockBytesize(types[i]); - const size_t alignment = (typeSz/getFormatChannelCount(types[i]) == 8u) ? 8ull : 4ull; // if format 64bit per channel, then align to 8 + const uint32_t typeSz = getTexelOrBlockBytesize(types[i]); + const size_t alignment = (typeSz/getFormatChannelCount(types[i]) == 8u) ? 8ull : 4ull; // if format 64bit per channel, then align to 8 offsets[i] = lastOffset + lastSize; const size_t mod = offsets[i] % alignment; offsets[i] += mod; lastOffset = offsets[i]; - lastSize = typeSz; + lastSize = typeSz; } } const size_t vertexSize = lastOffset + lastSize; - constexpr uint32_t NEW_VTX_BUF_BINDING = 0u; - auto& vtxParams = outbuffer->getPipeline()->getCachedCreationParams().vertexInput; - vtxParams = SVertexInputParams(); - vtxParams.enabledAttribFlags = _inbuffer->getPipeline()->getCachedCreationParams().vertexInput.enabledAttribFlags; - vtxParams.enabledBindingFlags = 1u << NEW_VTX_BUF_BINDING; - vtxParams.bindings[NEW_VTX_BUF_BINDING].stride = vertexSize; - vtxParams.bindings[NEW_VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; + constexpr uint32_t NEW_VTX_BUF_BINDING = 0u; + auto& vtxParams = outbuffer->getPipeline()->getCachedCreationParams().vertexInput; + vtxParams = SVertexInputParams(); + vtxParams.enabledAttribFlags = _inbuffer->getPipeline()->getCachedCreationParams().vertexInput.enabledAttribFlags; + vtxParams.enabledBindingFlags = 1u << NEW_VTX_BUF_BINDING; + vtxParams.bindings[NEW_VTX_BUF_BINDING].stride = vertexSize; + vtxParams.bindings[NEW_VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; auto newVertBuffer = ICPUBuffer::create({ vertexCount*vertexSize }); - outbuffer->setVertexBufferBinding({ 0u, core::smart_refctd_ptr(newVertBuffer) }, NEW_VTX_BUF_BINDING); + outbuffer->setVertexBufferBinding({ 0u, core::smart_refctd_ptr(newVertBuffer) }, NEW_VTX_BUF_BINDING); for (size_t i = 0; i < MAX_ATTRIBS; ++i) { if (offsets[i] < 0xffffffff) { - vtxParams.attributes[i].binding = NEW_VTX_BUF_BINDING; - vtxParams.attributes[i].format = types[i]; - vtxParams.attributes[i].relativeOffset = offsets[i]; + vtxParams.attributes[i].binding = NEW_VTX_BUF_BINDING; + vtxParams.attributes[i].format = types[i]; + vtxParams.attributes[i].relativeOffset = offsets[i]; } } } @@ -254,7 +260,7 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp { E_FORMAT type = types[activeAttribs[j]]; - if (!isNormalizedFormat(type) && (isIntegerFormat(type) || isScaledFormat(type))) + if (!isNormalizedFormat(type) && (isIntegerFormat(type) || isScaledFormat(type))) { uint32_t dst[4]; _inbuffer->getAttribute(dst, activeAttribs[j], index); @@ -277,7 +283,7 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp ((uint16_t*)indices)[i] = remap; } - _NBL_DELETE_ARRAY(remapBuffer,vertexCount); + _NBL_DELETE_ARRAY(remapBuffer,vertexCount); _NBL_DEBUG_BREAK_IF(nextVert > vertexCount) diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index b87fc805b0..8c03ad99b9 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -5,236 +5,78 @@ #include "CSmoothNormalGenerator.h" #include "nbl/core/declarations.h" +#include "nbl/builtin/hlsl/shapes/triangle.hlsl" #include -#include namespace nbl { namespace asset { -static bool operator<(uint32_t lhs, const CPolygonGeometryManipulator::SSNGVertexData& rhs) +static bool operator<(uint32_t lhs, const CSmoothNormalGenerator::VertexData& rhs) { return lhs < rhs.hash; } -static bool operator<(const CPolygonGeometryManipulator::SSNGVertexData& lhs, uint32_t rhs) +static bool operator<(const CSmoothNormalGenerator::VertexData& lhs, uint32_t rhs) { return lhs.hash < rhs; } -static bool isAttributeEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, float epsilon) -{ - if (!view) return true; - const auto channelCount = getFormatChannelCount(view.composed.format); - switch (view.composed.rangeFormat) - { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: - { - hlsl::uint64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - if (val1[channel_i] != val2[channel_i]) return false; - break; - } - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: - { - hlsl::int64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - if (val1[channel_i] != val2[channel_i]) return false; - break; - } - default: - { - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - { - const auto diff = abs(val1[channel_i] - val2[channel_i]); - if (diff > epsilon) return false; - } - break; - } - } - return true; -} - static bool compareVertexPosition(const hlsl::float32_t3& a, const hlsl::float32_t3& b, float epsilon) { const hlsl::float32_t3 difference = abs(b - a); return (difference.x <= epsilon && difference.y <= epsilon && difference.z <= epsilon); } -static hlsl::float32_t3 getAngleWeight( - const hlsl::float32_t3& v1, - const hlsl::float32_t3& v2, - const hlsl::float32_t3& v3) -{ - auto distancesquared = [](const hlsl::float32_t3& v1, const hlsl::float32_t3& v2) - { - const auto diff = v1 - v2; - return hlsl::dot(diff, diff); - }; - // Calculate this triangle's weight for each of its three m_vertices - // start by calculating the lengths of its sides - const float a = distancesquared(v2, v3); - const float asqrt = sqrt(a); - const float b = distancesquared(v1,v3); - const float bsqrt = sqrt(b); - const float c = distancesquared(v1,v2); - const float csqrt = sqrt(c); - - // use them to find the angle at each vertex - return hlsl::float32_t3( - acosf((b + c - a) / (2.f * bsqrt * csqrt)), - acosf((-b + c + a) / (2.f * asqrt * csqrt)), - acosf((b - c + a) / (2.f * bsqrt * asqrt))); -} - -core::smart_refctd_ptr CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, bool enableWelding, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp) -{ - VertexHashMap vertexArray = setupData(polygon, epsilon); - const auto smoothPolygon = processConnectedVertices(polygon, vertexArray, epsilon,vxcmp); - - if (enableWelding) - { - return weldVertices(smoothPolygon.get(), vertexArray, epsilon); - } - return smoothPolygon; -} - -CSmoothNormalGenerator::VertexHashMap::VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize) - :m_hashTableMaxSize(_hashTableMaxSize), - m_cellSize(_cellSize) -{ - assert((core::isPoT(m_hashTableMaxSize))); - - m_vertices.reserve(_vertexCount); - m_buckets.reserve(_hashTableMaxSize + 1); -} - -uint32_t CSmoothNormalGenerator::VertexHashMap::hash(const CPolygonGeometryManipulator::SSNGVertexData & vertex) const -{ - const hlsl::float32_t3 position = vertex.position / m_cellSize; - - return ((static_cast(position.x) * primeNumber1) ^ - (static_cast(position.y) * primeNumber2) ^ - (static_cast(position.z) * primeNumber3))& (m_hashTableMaxSize - 1); -} - -uint32_t CSmoothNormalGenerator::VertexHashMap::hash(const hlsl::uint32_t3& position) const +CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction vxcmp) { - return ((position.x * primeNumber1) ^ - (position.y * primeNumber2) ^ - (position.z * primeNumber3))& (m_hashTableMaxSize - 1); + assert(polygon->getIndexingCallback()->degree() == 3); + static constexpr auto MinEpsilon = 0.00001f; + const auto patchedEpsilon = epsilon < MinEpsilon ? MinEpsilon : epsilon; + VertexHashMap vertexHashMap = setupData(polygon, patchedEpsilon); + const auto smoothPolygon = processConnectedVertices(polygon, vertexHashMap, patchedEpsilon,vxcmp); + return { vertexHashMap, smoothPolygon }; } -void CSmoothNormalGenerator::VertexHashMap::add(CPolygonGeometryManipulator::SSNGVertexData && vertex) -{ - vertex.hash = hash(vertex); - m_vertices.push_back(vertex); -} - -CSmoothNormalGenerator::VertexHashMap::BucketBounds CSmoothNormalGenerator::VertexHashMap::getBucketBoundsByHash(uint32_t hash) -{ - if (hash == invalidHash) - return { m_vertices.end(), m_vertices.end() }; - - core::vector::iterator begin = std::lower_bound(m_vertices.begin(), m_vertices.end(), hash); - core::vector::iterator end = std::upper_bound(m_vertices.begin(), m_vertices.end(), hash); - - //bucket missing - if (begin == m_vertices.end()) - return { m_vertices.end(), m_vertices.end() }; - - //bucket missing - if (begin->hash != hash) - return { m_vertices.end(), m_vertices.end() }; - - return { begin, end }; -} - -struct KeyAccessor -{ - _NBL_STATIC_INLINE_CONSTEXPR size_t key_bit_count = 32ull; - - template - inline decltype(radix_mask) operator()(const CPolygonGeometryManipulator::SSNGVertexData& item) const - { - return static_cast(item.hash>>static_cast(bit_offset))&radix_mask; - } -}; -void CSmoothNormalGenerator::VertexHashMap::validate() -{ - const auto oldSize = m_vertices.size(); - m_vertices.resize(oldSize*2u); - // TODO: maybe use counting sort (or big radix) and use the histogram directly for the m_buckets - auto finalSortedOutput = core::radix_sort(m_vertices.data(),m_vertices.data()+oldSize,oldSize,KeyAccessor()); - // TODO: optimize out the erase - if (finalSortedOutput!=m_vertices.data()) - m_vertices.erase(m_vertices.begin(),m_vertices.begin()+oldSize); - else - m_vertices.erase(m_vertices.begin()+oldSize,m_vertices.end()); - - // TODO: are `m_buckets` even begin USED!? - uint16_t prevHash = m_vertices[0].hash; - core::vector::iterator prevBegin = m_vertices.begin(); - m_buckets.push_back(prevBegin); - - while (true) - { - core::vector::iterator next = std::upper_bound(prevBegin, m_vertices.end(), prevHash); - m_buckets.push_back(next); - - if (next == m_vertices.end()) - break; - - prevBegin = next; - prevHash = next->hash; - } -} CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const asset::ICPUPolygonGeometry* polygon, float epsilon) { const size_t idxCount = polygon->getPrimitiveCount() * 3; - VertexHashMap vertices(idxCount, std::min(16u * 1024u, core::roundUpToPoT(idxCount * 1.0f / 32.0f)), epsilon == 0.0f ? 0.00001f : epsilon * 2.f); + const auto cellCount = std::max(core::roundUpToPoT((idxCount + 31) >> 5), 4); + VertexHashMap vertices(idxCount, std::min(16u * 1024u, cellCount), epsilon * 2.f); for (uint32_t i = 0; i < idxCount; i += 3) { //calculate face normal of parent triangle - hlsl::float32_t3 v1, v2, v3; - polygon->getPositionView().decodeElement(i, v1); - polygon->getPositionView().decodeElement(i + 1, v2); - polygon->getPositionView().decodeElement(i + 2, v3); + hlsl::float32_t3 v0, v1, v2; + polygon->getPositionView().decodeElement(i, v0); + polygon->getPositionView().decodeElement(i + 1, v1); + polygon->getPositionView().decodeElement(i + 2, v2); - const auto faceNormal = normalize(cross(v2 - v1, v3 - v1)); + const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); //set data for m_vertices - const auto angleWages = getAngleWeight(v1, v2, v3); + const auto angleWages = hlsl::shapes::util::compInternalAngle(v2 - v1, v0 - v2, v1 - v2); - vertices.add({ i, 0, angleWages.x, v1, faceNormal}); - vertices.add({ i + 1, 0, angleWages.y, v2, faceNormal}); - vertices.add({ i + 2, 0, angleWages.z, v3, faceNormal}); + vertices.add({ i, 0, faceNormal * angleWages.x, v0}); + vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); + vertices.add({ i + 2, 0, faceNormal * angleWages.z, v2}); } - vertices.validate(); + vertices.bake(); return vertices; } -core::smart_refctd_ptr CSmoothNormalGenerator::processConnectedVertices(const asset::ICPUPolygonGeometry* polygon, VertexHashMap& vertexHashMap, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp) +core::smart_refctd_ptr CSmoothNormalGenerator::processConnectedVertices(const asset::ICPUPolygonGeometry* polygon, VertexHashMap& vertexHashMap, float epsilon, VxCmpFunction vxcmp) { auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); static constexpr auto NormalFormat = EF_R32G32B32_SFLOAT; const auto normalFormatBytesize = asset::getTexelOrBlockBytesize(NormalFormat); auto normalBuf = ICPUBuffer::create({ normalFormatBytesize * outPolygon->getPositionView().getElementCount()}); + auto normalView = polygon->getNormalView(); hlsl::shapes::AABB<4,hlsl::float32_t> aabb; aabb.maxVx = hlsl::float32_t4(1, 1, 1, 0.f); @@ -253,33 +95,27 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne constexpr auto normalStride = sizeof(hlsl::float32_t3); assert(outPolygon->getNormalView().composed.stride==normalStride); - for (uint32_t cell = 0; cell < vertexHashMap.getBucketCount() - 1; cell++) + for (auto& processedVertex : vertexHashMap.vertices()) { - VertexHashMap::BucketBounds processedBucket = vertexHashMap.getBucketBoundsById(cell); + auto normal = processedVertex.weightedNormal; - for (core::vector::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++) - { - std::array neighboringCells = vertexHashMap.getNeighboringCellHashes(*processedVertex); - hlsl::float32_t3 normal = processedVertex->parentTriangleFaceNormal * processedVertex->wage; - - //iterate among all neighboring cells - for (int i = 0; i < 8; i++) + // We perform double the work (since `vxcmp` must be commutative but not required to be associative) intentionally, + // because without guaranteed associativity we cannot partition the vertices into disjoint sets (we're not reconstructing OBJ-like + // smooth groups with this), so we can't have all vertices in a set just copy their normal from a "master vertex". + // For an example of why that is good, think of a cone or cylinder and why its good to have non-associative smoothing predicate. + vertexHashMap.forEachBroadphaseNeighborCandidates(processedVertex.getPosition(), [&](const VertexHashMap::vertex_data_t& candidate) { - VertexHashMap::BucketBounds bounds = vertexHashMap.getBucketBoundsByHash(neighboringCells[i]); - for (; bounds.begin != bounds.end; bounds.begin++) + if (processedVertex.index != candidate.index && compareVertexPosition(processedVertex.position, candidate.position, epsilon) && + vxcmp(processedVertex, candidate, polygon)) { - if (processedVertex != bounds.begin) - if (compareVertexPosition(processedVertex->position, bounds.begin->position, epsilon) && - vxcmp(*processedVertex, *bounds.begin, polygon)) - { - //TODO: better mean calculation algorithm - normal += bounds.begin->parentTriangleFaceNormal * bounds.begin->wage; - } + //TODO: better mean calculation algorithm + normal += candidate.weightedNormal; } - } - normal = normalize(normal); - memcpy(normalPtr + (normalStride * processedVertex->index), &normal, sizeof(normal)); - } + return true; + }); + + normal = normalize(normal); + memcpy(normalPtr + (normalStride * processedVertex.index), &normal, sizeof(normal)); } CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); @@ -287,251 +123,5 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne return outPolygon; } -std::array CSmoothNormalGenerator::VertexHashMap::getNeighboringCellHashes(const CPolygonGeometryManipulator::SSNGVertexData & vertex) -{ - std::array neighbourhood; - - hlsl::float32_t3 cellFloatCoord = vertex.position / m_cellSize - hlsl::float32_t3(0.5f); - hlsl::uint32_t3 neighbor = hlsl::uint32_t3(static_cast(cellFloatCoord.x), static_cast(cellFloatCoord.y), static_cast(cellFloatCoord.z)); - - //left bottom near - neighbourhood[0] = hash(neighbor); - - //right bottom near - neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); - neighbourhood[1] = hash(neighbor); - - //right bottom far - neighbor = neighbor + hlsl::uint32_t3(0, 0, 1); - neighbourhood[2] = hash(neighbor); - - //left bottom far - neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); - neighbourhood[3] = hash(neighbor); - - //left top far - neighbor = neighbor + hlsl::uint32_t3(0, 1, 0); - neighbourhood[4] = hash(neighbor); - - //right top far - neighbor = neighbor + hlsl::uint32_t3(1, 0, 0); - neighbourhood[5] = hash(neighbor); - - //righ top near - neighbor = neighbor - hlsl::uint32_t3(0, 0, 1); - neighbourhood[6] = hash(neighbor); - - //left top near - neighbor = neighbor - hlsl::uint32_t3(1, 0, 0); - neighbourhood[7] = hash(neighbor); - - //erase duplicated hashes - for (int i = 0; i < 8; i++) - { - uint32_t currHash = neighbourhood[i]; - for (int j = i + 1; j < 8; j++) - { - if (neighbourhood[j] == currHash) - neighbourhood[j] = invalidHash; - } - } - return neighbourhood; -} - -core::smart_refctd_ptr CSmoothNormalGenerator::weldVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon) -{ - struct Group - { - uint32_t vertex_reference_index; // index to referenced vertex in the original polygon - }; - core::vector groups; - groups.reserve(vertices.getVertexCount()); - - core::vector> groupIndexes(vertices.getVertexCount()); - - auto canJoinVertices = [&](uint32_t index1, uint32_t index2)-> bool - { - if (!isAttributeEqual(polygon->getPositionView(), index1, index2, epsilon)) - return false; - if (!isAttributeEqual(polygon->getNormalView(), index1, index2, epsilon)) - return false; - for (const auto& jointWeightView : polygon->getJointWeightViews()) - { - if (!isAttributeEqual(jointWeightView.indices, index1, index2, epsilon)) return false; - if (!isAttributeEqual(jointWeightView.weights, index1, index2, epsilon)) return false; - } - for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) - if (!isAttributeEqual(auxAttributeView, index1, index2, epsilon)) return false; - - return true; - }; - - for (uint32_t cell = 0; cell < vertices.getBucketCount() - 1; cell++) - { - VertexHashMap::BucketBounds processedBucket = vertices.getBucketBoundsById(cell); - - for (core::vector::iterator processedVertex = processedBucket.begin; processedVertex != processedBucket.end; processedVertex++) - { - std::array neighboringCells = vertices.getNeighboringCellHashes(*processedVertex); - - auto& groupIndex = groupIndexes[processedVertex->index]; - - //iterate among all neighboring cells - for (int i = 0; i < 8; i++) - { - VertexHashMap::BucketBounds bounds = vertices.getBucketBoundsByHash(neighboringCells[i]); - for (auto neighbourVertex_it = bounds.begin; neighbourVertex_it != bounds.end; neighbourVertex_it++) - { - const auto neighbourGroupIndex = groupIndexes[neighbourVertex_it->index]; - - // find the first group that this vertex can join - if (processedVertex != neighbourVertex_it && neighbourGroupIndex && canJoinVertices(processedVertex->index, neighbourVertex_it->index)) - { - groupIndex = neighbourGroupIndex; - break; - } - } - } - if (!groupIndex) - { - // create new group if no group nearby that is compatible with this vertex - groupIndex = groups.size(); - groups.push_back({ processedVertex->index}); - } - } - } - - auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); - outPolygon->setIndexing(IPolygonGeometryBase::TriangleList()); - - const uint32_t indexSize = (groups.size() < std::numeric_limits::max()) ? sizeof(uint16_t) : sizeof(uint32_t); - auto indexBuffer = ICPUBuffer::create({ indexSize * groupIndexes.size(), IBuffer::EUF_INDEX_BUFFER_BIT }); - auto indexBufferPtr = reinterpret_cast(indexBuffer->getPointer()); - auto indexView = ICPUPolygonGeometry::SDataView{ - .composed = { - .stride = indexSize, - }, - .src = { - .offset = 0, - .size = indexBuffer->getSize(), - .buffer = std::move(indexBuffer) - } - }; - if (indexSize == 2) - { - indexView.composed.encodedDataRange.u16.minVx[0] = 0; - indexView.composed.encodedDataRange.u16.maxVx[0] = groups.size() - 1; - indexView.composed.format = EF_R16_UINT; - indexView.composed.rangeFormat = IGeometryBase::EAABBFormat::U16; - } - else if (indexSize == 4) - { - indexView.composed.encodedDataRange.u32.minVx[0] = 0; - indexView.composed.encodedDataRange.u32.maxVx[0] = groups.size() - 1; - indexView.composed.format = EF_R32_UINT; - indexView.composed.rangeFormat = IGeometryBase::EAABBFormat::U32; - } - - for (auto index_i = 0u; index_i < groupIndexes.size(); index_i++) - { - if (indexSize == 2) - { - uint16_t index = *groupIndexes[index_i]; - memcpy(indexBufferPtr + indexSize * index_i, &index, sizeof(index)); - } - else if (indexSize == 4) - { - uint32_t index = *groupIndexes[index_i]; - memcpy(indexBufferPtr + indexSize * index_i, &index, sizeof(index)); - } - } - outPolygon->setIndexView(std::move(indexView)); - - - using position_t = hlsl::float32_t3; - constexpr auto PositionAttrSize = sizeof(position_t); - auto positionBuffer = ICPUBuffer::create({ PositionAttrSize * groups.size(), IBuffer::EUF_NONE }); - auto outPositions = reinterpret_cast(positionBuffer->getPointer()); - const auto inPositions = reinterpret_cast(polygon->getPositionView().getPointer()); - outPolygon->setPositionView({ - .composed = polygon->getPositionView().composed, - .src = {.offset = 0, .size = positionBuffer->getSize(), .buffer = std::move(positionBuffer)} - }); - - using normal_t = hlsl::float32_t3; - constexpr auto NormalAttrSize = sizeof(normal_t); - auto normalBuffer = ICPUBuffer::create({ NormalAttrSize * groups.size(), IBuffer::EUF_NONE }); - auto outNormals = reinterpret_cast(normalBuffer->getPointer()); - const auto inNormals = reinterpret_cast(polygon->getNormalView().getPointer()); - outPolygon->setNormalView({ - .composed = polygon->getNormalView().composed, - .src = {.offset = 0, .size = normalBuffer->getSize(), .buffer = std::move(normalBuffer)} - }); - - auto createOutView = [&](const ICPUPolygonGeometry::SDataView& view) - { - auto buffer = ICPUBuffer::create({ view.composed.stride * groups.size(), view.src.buffer->getUsageFlags() }); - return ICPUPolygonGeometry::SDataView{ - .composed = view.composed, - .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} - }; - }; - - const auto& inJointWeightViews = polygon->getJointWeightViews(); - auto* outJointWeightViews = outPolygon->getJointWeightViews(); - outJointWeightViews->resize(inJointWeightViews.size()); - for (auto jointWeightView_i = 0u; jointWeightView_i < inJointWeightViews.size(); jointWeightView_i++) - { - const auto& inJointWeightView = inJointWeightViews[jointWeightView_i]; - outJointWeightViews->operator[](jointWeightView_i).indices = createOutView(inJointWeightView.indices); - outJointWeightViews->operator[](jointWeightView_i).weights = createOutView(inJointWeightView.weights); - } - - const auto& inAuxAttributeViews = polygon->getAuxAttributeViews(); - auto* outAuxAttributeViews = outPolygon->getAuxAttributeViews(); - outAuxAttributeViews->resize(inAuxAttributeViews.size()); - for (auto auxAttributeView_i = 0u; auxAttributeView_i < inAuxAttributeViews.size(); auxAttributeView_i++) - { - const auto& inAuxAttributeView = inAuxAttributeViews[auxAttributeView_i]; - outAuxAttributeViews->operator[](auxAttributeView_i) = createOutView(inAuxAttributeView); - } - - for (auto group_i = 0u; group_i < groups.size(); group_i++) - { - const auto srcIndex = groups[group_i].vertex_reference_index; - outPositions[group_i] = inPositions[srcIndex]; - outNormals[group_i] = inPositions[srcIndex]; - - for (uint64_t jointView_i = 0u; jointView_i < polygon->getJointWeightViews().size(); jointView_i++) - { - auto& inView = polygon->getJointWeightViews()[jointView_i]; - auto& outView = outPolygon->getJointWeightViews()->operator[](jointView_i); - - const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); - const auto jointIndexSize = inView.indices.composed.stride; - std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); - memcpy(outJointIndices + group_i * jointIndexSize, inJointIndices + srcIndex * jointIndexSize, jointIndexSize); - - const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); - const auto jointWeightSize = inView.weights.composed.stride; - std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); - memcpy(outWeights + group_i * jointWeightSize, inWeights + srcIndex * jointWeightSize, jointWeightSize); - } - - for (auto auxView_i = 0u; auxView_i < polygon->getAuxAttributeViews().size(); auxView_i++) - { - auto& inView = polygon->getAuxAttributeViews()[auxView_i]; - auto& outView = outPolygon->getAuxAttributeViews()->operator[](auxView_i); - const auto attrSize = inView.composed.stride; - const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); - std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); - memcpy(outAuxs + group_i * attrSize, inAuxs + srcIndex * attrSize, attrSize); - } - } - - CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); - return outPolygon; - } } -} \ No newline at end of file diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index d200bfe7b3..6ac4daf6c4 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -4,69 +4,58 @@ #ifndef _NBL_ASSET_C_SMOOTH_NORMAL_GENERATOR_H_INCLUDED_ #define _NBL_ASSET_C_SMOOTH_NORMAL_GENERATOR_H_INCLUDED_ +#include "nbl/asset/utils/CVertexHashGrid.h" -#include "nbl/asset/utils/CPolygonGeometryManipulator.h" namespace nbl::asset { +// TODO: implement a class template that take position type(either float32_t3 or float64_t3 as template argument class CSmoothNormalGenerator { public: CSmoothNormalGenerator() = delete; ~CSmoothNormalGenerator() = delete; - static core::smart_refctd_ptr calculateNormals(const ICPUPolygonGeometry* polygon, bool enableWelding, float epsilon, CPolygonGeometryManipulator::VxCmpFunction function); + struct VertexData + { + uint32_t index; //offset of the vertex into index buffer + uint32_t hash; + hlsl::float32_t3 weightedNormal; + hlsl::float32_t3 position; //position of the vertex in 3D space - private: - class VertexHashMap - { - public: - struct BucketBounds - { - core::vector::iterator begin; - core::vector::iterator end; - }; - - public: - VertexHashMap(size_t _vertexCount, uint32_t _hashTableMaxSize, float _cellSize); - - //inserts vertex into hash table - void add(CPolygonGeometryManipulator::SSNGVertexData&& vertex); + hlsl::float32_t3 getPosition() const + { + return position; + } - //sorts hashtable and sets iterators at beginnings of bucktes - void validate(); + void setHash(uint32_t hash) + { + this->hash = hash; + } - inline uint32_t getVertexCount() const { return m_vertices.size(); } + uint32_t getHash() const + { + return hash; + }; - // - std::array getNeighboringCellHashes(const CPolygonGeometryManipulator::SSNGVertexData& vertex); + }; - inline uint32_t getBucketCount() { return m_buckets.size(); } - inline BucketBounds getBucketBoundsById(uint32_t index) const { return { m_buckets[index], m_buckets[index + 1] }; } - BucketBounds getBucketBoundsByHash(uint32_t hash); + using VxCmpFunction = std::function; - private: - static inline constexpr uint32_t invalidHash = 0xFFFFFFFF; - static inline constexpr uint32_t primeNumber1 = 73856093; - static inline constexpr uint32_t primeNumber2 = 19349663; - static inline constexpr uint32_t primeNumber3 = 83492791; + using VertexHashMap = CVertexHashGrid; - //holds iterators pointing to beginning of each bucket, last iterator points to m_vertices.end() - core::vector::iterator> m_buckets; - core::vector m_vertices; - const uint32_t m_hashTableMaxSize; - const float m_cellSize; - - uint32_t hash(const CPolygonGeometryManipulator::SSNGVertexData& vertex) const; - uint32_t hash(const hlsl::uint32_t3& position) const; - - }; + struct Result + { + VertexHashMap vertexHashGrid; + core::smart_refctd_ptr geom; + }; + static Result calculateNormals(const ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction function); private: + static VertexHashMap setupData(const ICPUPolygonGeometry* polygon, float epsilon); - static core::smart_refctd_ptr processConnectedVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon, CPolygonGeometryManipulator::VxCmpFunction vxcmp); - static core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon); + static core::smart_refctd_ptr processConnectedVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon, VxCmpFunction vxcmp); }; } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index cc81b093a2..3736a8e321 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -246,6 +246,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/circle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/ellipse.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") #sampling LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl")