diff --git a/examples_tests b/examples_tests index 3400a2a498..bbc8ab80fe 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 3400a2a498b6b3738d63aff66dd0363a4a9f8c67 +Subproject commit bbc8ab80fecf44abb9b03f4fa147918fee7c310f diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl new file mode 100644 index 0000000000..6678a66942 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -0,0 +1,100 @@ +#ifndef _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ +#define _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + +struct CascadeSettings +{ + uint32_t size; + uint32_t start; + uint32_t base; +}; + +template +struct CascadeEntry +{ + CascadeLayerType data[CascadeSize]; +}; + +template +struct CascadeAccumulator +{ + using output_storage_type = CascadeEntry; + using initialization_data = CascadeSettings; + output_storage_type accumulation; + uint32_t cascadeSampleCounter[CascadeSize]; + CascadeSettings cascadeSettings; + + void initialize(in CascadeSettings settings) + { + for (int i = 0; i < CascadeSize; ++i) + { + accumulation.data[i] = (CascadeLayerType)0.0f; + cascadeSampleCounter[i] = 0u; + } + + cascadeSettings.size = settings.size; + cascadeSettings.start = settings.start; + cascadeSettings.base = settings.base; + } + + typename vector_traits::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) + { + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); + } + + // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp + void addSample(uint32_t sampleIndex, float32_t3 sample) + { + float lowerScale = cascadeSettings.start; + float upperScale = lowerScale * cascadeSettings.base; + + const float luma = getLuma(sample); + + uint32_t lowerCascadeIndex = 0u; + while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) + { + lowerScale = upperScale; + upperScale *= cascadeSettings.base; + ++lowerCascadeIndex; + } + + float lowerCascadeLevelWeight; + float higherCascadeLevelWeight; + + if (luma <= lowerScale) + lowerCascadeLevelWeight = 1.0f; + else if (luma < upperScale) + lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); + else // Inf, NaN ... + lowerCascadeLevelWeight = 0.0f; + + if (luma < upperScale) + higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); + else + higherCascadeLevelWeight = upperScale / luma; + + uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; + + const uint32_t sampleCount = sampleIndex + 1u; + const float reciprocalSampleCount = 1.0f / float(sampleCount); + accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount; + accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[lowerCascadeIndex] = sampleCount; + cascadeSampleCounter[higherCascadeIndex] = sampleCount; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl new file mode 100644 index 0000000000..d7b151af86 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl @@ -0,0 +1,160 @@ +#ifndef _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ +namespace impl +{ + +struct CascadeSample +{ + float32_t3 centerValue; + float normalizedCenterLuma; + float normalizedNeighbourhoodAverageLuma; +}; + +// TODO: figure out what values should pixels outside have, 0.0f is incorrect +float32_t3 sampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, in RWTexture2DArray cascade, uint32_t cascadeIndex) +{ + const int32_t2 texelCoord = currentCoord + offset; + if (any(texelCoord < int32_t2(0, 0))) + return float32_t3(0.0f, 0.0f, 0.0f); + + float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex))); + return float32_t3(output.r, output.g, output.b); +} + +float32_t calcLuma(in float32_t3 col) +{ + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); +} + +CascadeSample SampleCascade(in int32_t2 coord, in RWTexture2DArray cascade, in uint cascadeIndex, in float reciprocalBaseI) +{ + float32_t3 neighbourhood[9]; + neighbourhood[0] = sampleCascadeTexel(coord, int32_t2(-1, -1), cascade, cascadeIndex); + neighbourhood[1] = sampleCascadeTexel(coord, int32_t2(0, -1), cascade, cascadeIndex); + neighbourhood[2] = sampleCascadeTexel(coord, int32_t2(1, -1), cascade, cascadeIndex); + neighbourhood[3] = sampleCascadeTexel(coord, int32_t2(-1, 0), cascade, cascadeIndex); + neighbourhood[4] = sampleCascadeTexel(coord, int32_t2(0, 0), cascade, cascadeIndex); + neighbourhood[5] = sampleCascadeTexel(coord, int32_t2(1, 0), cascade, cascadeIndex); + neighbourhood[6] = sampleCascadeTexel(coord, int32_t2(-1, 1), cascade, cascadeIndex); + neighbourhood[7] = sampleCascadeTexel(coord, int32_t2(0, 1), cascade, cascadeIndex); + neighbourhood[8] = sampleCascadeTexel(coord, int32_t2(1, 1), cascade, cascadeIndex); + + // numerical robustness + float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + + ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); + + CascadeSample retval; + retval.centerValue = neighbourhood[4]; + retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI; + retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; + return retval; +} + +} // namespace impl + +struct ReweightingParameters +{ + uint32_t lastCascadeIndex; + float initialEmin; // a minimum image brightness that we always consider reliable + float reciprocalBase; + float reciprocalN; + float reciprocalKappa; + float colorReliabilityFactor; + float NOverKappa; +}; + +ReweightingParameters computeReweightingParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) +{ + ReweightingParameters retval; + retval.lastCascadeIndex = cascadeSize - 1u; + retval.initialEmin = minReliableLuma; + retval.reciprocalBase = 1.f / base; + const float N = float(sampleCount); + retval.reciprocalN = 1.f / N; + retval.reciprocalKappa = 1.f / kappa; + // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have + // allow up to ~ more energy in one sample to lessen bias in some cases + retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; + retval.NOverKappa = N * retval.reciprocalKappa; + + return retval; +} + +float32_t3 reweight(in ReweightingParameters params, in RWTexture2DArray cascade, in int32_t2 coord) +{ + float reciprocalBaseI = 1.f; + impl::CascadeSample curr = impl::SampleCascade(coord, cascade, 0u, reciprocalBaseI); + + float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); + float Emin = params.initialEmin; + + float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + for (uint i = 0u; i <= params.lastCascadeIndex; i++) + { + const bool notFirstCascade = i != 0u; + const bool notLastCascade = i != params.lastCascadeIndex; + + impl::CascadeSample next; + if (notLastCascade) + { + reciprocalBaseI *= params.reciprocalBase; + next = impl::SampleCascade(coord, cascade, i + 1u, reciprocalBaseI); + } + + float reliability = 1.f; + // sample counting-based reliability estimation + if (params.reciprocalKappa <= 1.f) + { + float localReliability = curr.normalizedCenterLuma; + // reliability in 3x3 pixel block (see robustness) + float globalReliability = curr.normalizedNeighbourhoodAverageLuma; + if (notFirstCascade) + { + localReliability += prevNormalizedCenterLuma; + globalReliability += prevNormalizedNeighbourhoodAverageLuma; + } + if (notLastCascade) + { + localReliability += next.normalizedCenterLuma; + globalReliability += next.normalizedNeighbourhoodAverageLuma; + } + // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) + reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; + { + const float accumLuma = impl::calcLuma(accumulation); + if (accumLuma > Emin) + Emin = accumLuma; + + const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; + + reliability += colorReliability; + reliability *= params.NOverKappa; + reliability -= params.reciprocalKappa; + reliability = clamp(reliability * 0.5f, 0.f, 1.f); + } + } + accumulation += curr.centerValue * reliability; + + prevNormalizedCenterLuma = curr.normalizedCenterLuma; + prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; + curr = next; + } + + return accumulation; +} + +} +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 2f4d11baf3..e0bd6921cc 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -389,5 +389,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") +#rwmc +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/rwmc.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/CascadeAccumulator.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") diff --git a/src/nbl/video/CSurfaceVulkan.cpp b/src/nbl/video/CSurfaceVulkan.cpp index 7d135b04cc..7feda9fcd3 100644 --- a/src/nbl/video/CSurfaceVulkan.cpp +++ b/src/nbl/video/CSurfaceVulkan.cpp @@ -60,7 +60,7 @@ bool ISurfaceVulkan::isSupportedForPhysicalDevice(const IPhysicalDevice* physica core::bitflag ISurfaceVulkan::getAvailablePresentModesForPhysicalDevice(const IPhysicalDevice* physicalDevice) const { - constexpr uint32_t MAX_PRESENT_MODE_COUNT = 4u; + constexpr uint32_t MAX_PRESENT_MODE_COUNT = 5u; core::bitflag result = ISurface::EPM_UNKNOWN;