@@ -43,6 +43,7 @@ struct ImageToDenoise
4343 uint32_t width = 0u , height = 0u ;
4444 uint32_t colorTexelSize = 0u ;
4545 E_IMAGE_INPUT denoiserType = EII_COUNT;
46+ VkExtent3D scaledKernelExtent;
4647 float bloomScale;
4748};
4849struct DenoiserToUse
@@ -220,13 +221,15 @@ nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
220221}
221222#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
222223
224+ #include "nbl/builtin/glsl/ext/FFT/parameters.glsl"
225+
223226#include <nbl/builtin/glsl/math/complex.glsl>
224227nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel)
225228{
226- const ivec2 inputImageSize = textureSize(inputImage,0 );
227- const ivec2 halfInputImageSize = inputImageSize>>1 ;
228- const vec2 relativeCoords = vec2(coordinate.xy-halfInputImageSize)/pc.kernelScale ;
229- const vec4 texelValue = textureLod(inputImage,(relativeCoords+vec2(0.5))/vec2(inputImageSize) +vec2(0.5),-log2(pc.kernelScale) );
229+ const vec2 inputSize = vec2(nbl_glsl_ext_FFT_Parameters_t_getDimensions().xy );
230+ const vec2 halfInputSize = inputSize*0.5 ;
231+ const vec2 relativeCoords = vec2(coordinate.xy)-halfInputSize ;
232+ const vec4 texelValue = textureLod(inputImage,(relativeCoords+vec2(0.5))/inputSize +vec2(0.5),0.0 );
230233 return nbl_glsl_complex(texelValue[channel], 0.0f);
231234}
232235#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
@@ -1032,25 +1035,38 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
10321035 assert (region.bufferRowLength );
10331036 outParam.colorTexelSize = asset::getTexelOrBlockBytesize (colorCreationParams.format );
10341037 }
1035-
1036- const auto & kerDim = outParam.kernel ->getCreationParameters ().extent ;
1037- const float bloomScale = core::min (float (extent.width ) / float (kerDim.width ), float (extent.height ) / float (kerDim.height ))* bloomScaleBundle[i].value ();
1038- if (bloomScale>1 .f )
1039- os::Printer::log (imageIDString + " Bloom Kernel loose sharpness, increase resolution of bloom kernel!" , ELL_WARNING);
1040- const auto marginSrcDim = [extent,kerDim,bloomScale]() -> auto
1038+
1039+ const float bloomRelativeScale = bloomScaleBundle[i].value ();
1040+ const auto kernelScale = [&outParam,&extent,bloomRelativeScale]() -> auto
1041+ {
1042+ auto kerDim = outParam.kernel ->getCreationParameters ().extent ;
1043+ float kernelScale;
1044+ if (extent.width <extent.height )
1045+ kernelScale = float (extent.width )*bloomRelativeScale/float (kerDim.width );
1046+ else
1047+ kernelScale = float (extent.height )*bloomRelativeScale/float (kerDim.height );
1048+ outParam.scaledKernelExtent .width = core::ceil (float (kerDim.width )*kernelScale);
1049+ outParam.scaledKernelExtent .height = core::ceil (float (kerDim.height )*kernelScale);
1050+ outParam.scaledKernelExtent .depth = 1u ;
1051+ return kernelScale;
1052+ }();
1053+ if (kernelScale>1 .f )
1054+ os::Printer::log (imageIDString + " Bloom Kernel loose sharpness, increase resolution of bloom kernel or reduce its relative scale!" , ELL_WARNING);
1055+ const auto marginSrcDim = [extent,outParam]() -> auto
10411056 {
10421057 auto tmp = extent;
10431058 for (auto i=0u ; i<3u ; i++)
10441059 {
1045- const auto coord = (&kerDim .width )[i];
1060+ const auto coord = (&outParam. scaledKernelExtent .width )[i];
10461061 if (coord>1u )
1047- (&tmp.width )[i] += core::max ( coord*bloomScale, 1u ) -1u ;
1062+ (&tmp.width )[i] += coord-1u ;
10481063 }
10491064 return tmp;
10501065 }();
1051- outParam.bloomScale = bloomScale ;
1052- fftScratchSize = core::max (FFTClass::getOutputBufferSize (usingHalfFloatFFTStorage,kerDim ,colorChannelsFFT)*2u ,fftScratchSize);
1066+ outParam.bloomScale = kernelScale ;
1067+ fftScratchSize = core::max (FFTClass::getOutputBufferSize (usingHalfFloatFFTStorage,outParam. scaledKernelExtent ,colorChannelsFFT)*2u ,fftScratchSize);
10531068 fftScratchSize = core::max (FFTClass::getOutputBufferSize (usingHalfFloatFFTStorage,marginSrcDim,colorChannelsFFT),fftScratchSize);
1069+ // TODO: maybe move them to nested loop and compute JIT
10541070 {
10551071 auto * fftPushConstants = outParam.fftPushConstants ;
10561072 auto * fftDispatchInfo = outParam.fftDispatchInfo ;
@@ -1319,15 +1335,14 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
13191335 }
13201336
13211337 // kernel outputs
1322- const auto kerDim = kerImageView->getCreationParameters ().image ->getCreationParameters ().extent ;
1323- const VkExtent3D paddedKerDim = FFTClass::padDimensions (kerImageView->getCreationParameters ().image ->getCreationParameters ().extent );
1338+ auto paddedKernelExtent = FFTClass::padDimensions (param.scaledKernelExtent );
13241339 for (uint32_t i=0u ; i<colorChannelsFFT; i++)
13251340 {
13261341 video::IGPUImage::SCreationParams imageParams;
13271342 imageParams.flags = static_cast <asset::IImage::E_CREATE_FLAGS>(0u );
13281343 imageParams.type = asset::IImage::ET_2D;
13291344 imageParams.format = EF_R32G32_SFLOAT;
1330- imageParams.extent = { paddedKerDim .width ,paddedKerDim .height ,1u };
1345+ imageParams.extent = {paddedKernelExtent .width ,paddedKernelExtent .height ,1u };
13311346 imageParams.mipLevels = 1u ;
13321347 imageParams.arrayLayers = 1u ;
13331348 imageParams.samples = asset::IImage::ESCF_1_BIT;
@@ -1348,7 +1363,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
13481363 FFTClass::Parameters_t fftPushConstants[2 ];
13491364 FFTClass::DispatchInfo_t fftDispatchInfo[2 ];
13501365 const ISampler::E_TEXTURE_CLAMP fftPadding[2 ] = { ISampler::ETC_CLAMP_TO_BORDER,ISampler::ETC_CLAMP_TO_BORDER };
1351- const auto passes = FFTClass::buildParameters (false ,colorChannelsFFT,kerDim ,fftPushConstants,fftDispatchInfo,fftPadding);
1366+ const auto passes = FFTClass::buildParameters (false ,colorChannelsFFT,param. scaledKernelExtent ,fftPushConstants,fftDispatchInfo,fftPadding);
13521367
13531368 // the kernel's FFTs
13541369 {
@@ -1391,12 +1406,12 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
13911406 {
13921407 NormalizationPushConstants normalizationPC;
13931408 normalizationPC.stride = fftPushConstants[1 ].output_strides ;
1394- normalizationPC.bitreverse_shift .x = 32 -core::findMSB (paddedKerDim .width );
1395- normalizationPC.bitreverse_shift .y = 32 -core::findMSB (paddedKerDim .height );
1409+ normalizationPC.bitreverse_shift .x = 32 -core::findMSB (paddedKernelExtent .width );
1410+ normalizationPC.bitreverse_shift .y = 32 -core::findMSB (paddedKernelExtent .height );
13961411 normalizationPC.bitreverse_shift .z = 0 ;
13971412 driver->pushConstants (kernelNormalizationPipeline->getLayout (),ICPUSpecializedShader::ESS_COMPUTE,0u ,sizeof (normalizationPC),&normalizationPC);
1398- const uint32_t dispatchSizeX = (paddedKerDim .width -1u )/16u +1u ;
1399- const uint32_t dispatchSizeY = (paddedKerDim .height -1u )/16u +1u ;
1413+ const uint32_t dispatchSizeX = (paddedKernelExtent .width -1u )/16u +1u ;
1414+ const uint32_t dispatchSizeY = (paddedKernelExtent .height -1u )/16u +1u ;
14001415 driver->dispatch (dispatchSizeX,dispatchSizeY,colorChannelsFFT);
14011416 }
14021417 FFTClass::defaultBarrier ();
0 commit comments