diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp index 98844e4..3a3b51f 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp @@ -201,16 +201,16 @@ VkFFTResult sample_0_benchmark_VkFFT_single(VkGPU* vkGPU, uint64_t file_output, //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)(((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize); #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1000_benchmark_VkFFT_single_2_4096.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1000_benchmark_VkFFT_single_2_4096.cpp index c942598..a24e53b 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_1000_benchmark_VkFFT_single_2_4096.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_1000_benchmark_VkFFT_single_2_4096.cpp @@ -213,16 +213,16 @@ VkFFTResult sample_1000_benchmark_VkFFT_single_2_4096(VkGPU* vkGPU, uint64_t fil //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp index 8642acb..bc8f3d8 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp @@ -216,16 +216,16 @@ VkFFTResult sample_1001_benchmark_VkFFT_double_2_4096(VkGPU* vkGPU, uint64_t fil //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1002_benchmark_VkFFT_half_2_4096.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1002_benchmark_VkFFT_half_2_4096.cpp index c0883a7..79c3f36 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_1002_benchmark_VkFFT_half_2_4096.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_1002_benchmark_VkFFT_half_2_4096.cpp @@ -215,16 +215,16 @@ VkFFTResult sample_1002_benchmark_VkFFT_half_2_4096(VkGPU* vkGPU, uint64_t file_ //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp index b793dae..3ad4296 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp @@ -212,16 +212,16 @@ VkFFTResult sample_1003_benchmark_VkFFT_single_3d_2_512(VkGPU* vkGPU, uint64_t f //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.cpp index 08e9882..7ce63cb 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.cpp @@ -216,16 +216,16 @@ VkFFTResult sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096(VkGPU* vkGPU, ui //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp index fe1bf31..c3b53a0 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp @@ -239,16 +239,16 @@ VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp index 29be650..14d646c 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp @@ -240,16 +240,16 @@ VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp index f0028ce..0c08033 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp @@ -198,9 +198,9 @@ VkFFTResult sample_10_benchmark_VkFFT_single_multipleBuffers(VkGPU* vkGPU, uint6 //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / (numBuf * bufferSize[0]) > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / (numBuf * bufferSize[0]); - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; if (num_iter == 0) num_iter = 1; - if (vkGPU->physicalDeviceProperties.vendorID != 0x8086) num_iter *= 5; + if (vkGPU->physicalDeviceProperties.vendorID != VKFFT_VENDOR_INTEL) num_iter *= 5; double totTime = 0; VkFFTLaunchParams launchParams = {}; resFFT = performVulkanFFTiFFT(vkGPU, &app, &launchParams, num_iter, &totTime); diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp index bb42d9d..67a1322 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp @@ -206,16 +206,16 @@ VkFFTResult sample_1_benchmark_VkFFT_double(VkGPU* vkGPU, uint64_t file_output, //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp index 8572198..632cb90 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp @@ -95,7 +95,7 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI //PARAMETERS THAT CAN BE ADJUSTED FOR SPECIFIC GPU's - this configuration is by no means final form #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) { + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) { if (n > 22)//128byte coalescing has a limit of 2^24 max size configuration.coalescedMemory = 64; else @@ -104,7 +104,7 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) { + if (vendorID == VKFFT_VENDOR_INTEL) { if (n > 22)//128byte coalescing has a limit of 2^24 max size configuration.coalescedMemory = 64; else @@ -114,7 +114,7 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) { + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) { if (n > 22)//128byte coalescing has a limit of 2^24 max size configuration.coalescedMemory = 64; else @@ -236,11 +236,11 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) - if (vendorID == 0x8086) num_iter /= 4; + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==4) - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp index 931a148..6647e00 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp @@ -211,16 +211,16 @@ VkFFTResult sample_3_benchmark_VkFFT_single_3d(VkGPU* vkGPU, uint64_t file_outpu //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp index 507af15..0f23f7a 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp @@ -222,16 +222,16 @@ VkFFTResult sample_4_benchmark_VkFFT_single_3d_zeropadding(VkGPU* vkGPU, uint64_ //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp index 9a39d4c..82f616f 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp @@ -206,16 +206,16 @@ VkFFTResult sample_5_benchmark_VkFFT_single_disableReorderFourStep(VkGPU* vkGPU, //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp index 82b87a1..cf8d969 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp @@ -205,16 +205,16 @@ VkFFTResult sample_6_benchmark_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_outp //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp index 9005213..e833eee 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp @@ -206,16 +206,16 @@ VkFFTResult sample_7_benchmark_VkFFT_single_Bluestein(VkGPU* vkGPU, uint64_t fil //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp index 35eea3f..8a8981a 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp @@ -207,16 +207,16 @@ VkFFTResult sample_8_benchmark_VkFFT_double_Bluestein(VkGPU* vkGPU, uint64_t fil //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_9_benchmark_VkFFT_quadDoubleDouble.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_9_benchmark_VkFFT_quadDoubleDouble.cpp index 36ca6a7..4122ea5 100644 --- a/benchmark_scripts/vkFFT_scripts/src/sample_9_benchmark_VkFFT_quadDoubleDouble.cpp +++ b/benchmark_scripts/vkFFT_scripts/src/sample_9_benchmark_VkFFT_quadDoubleDouble.cpp @@ -206,16 +206,16 @@ VkFFTResult sample_9_benchmark_VkFFT_quadDoubleDouble(VkGPU* vkGPU, uint64_t fil //Submit FFT+iFFT. uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize; #if(VKFFT_BACKEND==0) - if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4; + if (vkGPU->physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4; #elif(VKFFT_BACKEND==3) cl_uint vendorID; clGetDeviceInfo(vkGPU->device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, 0); - if (vendorID == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (vendorID == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #elif(VKFFT_BACKEND==4) ze_device_properties_t device_properties; res = zeDeviceGetProperties(vkGPU->device, &device_properties); if (res != 0) return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; - if (device_properties.vendorId == 0x8086) num_iter /= 4;//smaller benchmark for Intel GPUs + if (device_properties.vendorId == VKFFT_VENDOR_INTEL) num_iter /= 4;//smaller benchmark for Intel GPUs #endif if (num_iter == 0) num_iter = 1; double totTime = 0; diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index 488bad4..68736ff 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -33,7 +33,7 @@ static inline VkFFTResult initializeBluesteinAutoPadding(VkFFTApplication* app) VkFFTResult resFFT = VKFFT_SUCCESS; if (!app->configuration.useCustomBluesteinPaddingPattern) { switch (app->configuration.vendorID) { - case 0x10DE://NVIDIA + case VKFFT_VENDOR_NVIDIA: if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) { app->configuration.autoCustomBluesteinPaddingPattern = 49; } @@ -55,7 +55,7 @@ static inline VkFFTResult initializeBluesteinAutoPadding(VkFFTApplication* app) app->configuration.paddedSizes = (pfUINT*)malloc(app->configuration.autoCustomBluesteinPaddingPattern * sizeof(pfUINT)); if (!app->configuration.paddedSizes) return VKFFT_ERROR_MALLOC_FAILED; switch (app->configuration.vendorID) { - case 0x10DE://Nvidia + case VKFFT_VENDOR_NVIDIA: if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) { app->configuration.primeSizes[0] = 17; app->configuration.paddedSizes[0] = 36; @@ -483,20 +483,20 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf VkPhysicalDeviceProperties physicalDeviceProperties = { 0 }; vkGetPhysicalDeviceProperties(app->configuration.physicalDevice[0], &physicalDeviceProperties); app->configuration.maxThreadsNum = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations; - if (physicalDeviceProperties.vendorID == 0x8086) app->configuration.maxThreadsNum = 256; //Intel fix + if ((VkFFTVendor)physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) app->configuration.maxThreadsNum = 256; //Intel fix app->configuration.maxComputeWorkGroupCount[0] = physicalDeviceProperties.limits.maxComputeWorkGroupCount[0]; app->configuration.maxComputeWorkGroupCount[1] = physicalDeviceProperties.limits.maxComputeWorkGroupCount[1]; app->configuration.maxComputeWorkGroupCount[2] = physicalDeviceProperties.limits.maxComputeWorkGroupCount[2]; app->configuration.maxComputeWorkGroupSize[0] = physicalDeviceProperties.limits.maxComputeWorkGroupSize[0]; app->configuration.maxComputeWorkGroupSize[1] = physicalDeviceProperties.limits.maxComputeWorkGroupSize[1]; app->configuration.maxComputeWorkGroupSize[2] = physicalDeviceProperties.limits.maxComputeWorkGroupSize[2]; - //if ((physicalDeviceProperties.vendorID == 0x8086) && (!app->configuration.doublePrecision) && (!app->configuration.doublePrecisionFloatMemory)) app->configuration.halfThreads = 1; + //if ((physicalDeviceProperties.vendorID == VKFFT_VENDOR_INTEL) && (!app->configuration.doublePrecision) && (!app->configuration.doublePrecisionFloatMemory)) app->configuration.halfThreads = 1; app->configuration.sharedMemorySize = physicalDeviceProperties.limits.maxComputeSharedMemorySize; - app->configuration.vendorID = physicalDeviceProperties.vendorID; + app->configuration.vendorID = (VkFFTVendor)physicalDeviceProperties.vendorID; if (inputLaunchConfiguration.pipelineCache != 0) app->configuration.pipelineCache = inputLaunchConfiguration.pipelineCache; app->configuration.useRaderUintLUT = 1; switch (physicalDeviceProperties.vendorID) { - case 0x10DE://NVIDIA + case VKFFT_VENDOR_NVIDIA: app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32;//the coalesced memory is equal to 32 bytes between L2 and VRAM. app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 1 : -1; app->configuration.warpSize = 32; @@ -505,7 +505,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.registerBoost4Step = 1; app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 4194305 : 4194305; break; - case 0x8086://INTEL + case VKFFT_VENDOR_INTEL: app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 128 : 64; app->configuration.useLUT = 1; app->configuration.warpSize = 32; @@ -514,7 +514,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.registerBoost4Step = 1; app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 262144 : 524288; break; - case 0x1002://AMD + case VKFFT_VENDOR_AMD: app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32; app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 1 : -1; app->configuration.warpSize = 64; @@ -649,7 +649,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.registerBoost = 1; app->configuration.registerBoost4Step = 1; app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 4194305 : 4194305; - app->configuration.vendorID = 0x10DE; + app->configuration.vendorID = VKFFT_VENDOR_NVIDIA; #elif(VKFFT_BACKEND==2) hipError_t res = hipSuccess; if (inputLaunchConfiguration.device == 0) { @@ -754,7 +754,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.registerBoost = 1; app->configuration.registerBoost4Step = 1; app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 1048576 : 2097152; - app->configuration.vendorID = 0x1002; + app->configuration.vendorID = VKFFT_VENDOR_AMD; #elif(VKFFT_BACKEND==3) cl_int res = 0; if (inputLaunchConfiguration.device == 0) { @@ -807,7 +807,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.maxComputeWorkGroupCount[0] = UINT64_MAX; app->configuration.maxComputeWorkGroupCount[1] = UINT64_MAX; app->configuration.maxComputeWorkGroupCount[2] = UINT64_MAX; - //if ((vendorID == 0x8086) && (!app->configuration.doublePrecision) && (!app->configuration.doublePrecisionFloatMemory)) app->configuration.halfThreads = 1; + //if ((vendorID == VKFFT_VENDOR_INTEL) && (!app->configuration.doublePrecision) && (!app->configuration.doublePrecisionFloatMemory)) app->configuration.halfThreads = 1; cl_ulong sharedMemorySize; res = clGetDeviceInfo(app->configuration.device[0], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &sharedMemorySize, 0); if (res != 0) { @@ -815,10 +815,10 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE; } app->configuration.sharedMemorySize = sharedMemorySize; - app->configuration.vendorID = vendorID; + app->configuration.vendorID = (VkFFTVendor)vendorID; app->configuration.useRaderUintLUT = 1; switch (vendorID) { - case 0x10DE://NVIDIA + case VKFFT_VENDOR_NVIDIA: app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32;//the coalesced memory is equal to 32 bytes between L2 and VRAM. app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 1 : -1; app->configuration.warpSize = 32; @@ -828,7 +828,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 4194305 : 4194305; app->configuration.sharedMemorySize -= 0x10;//reserved by system break; - case 0x8086://INTEL + case VKFFT_VENDOR_INTEL: app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 128 : 64; app->configuration.useLUT = 1; app->configuration.warpSize = 32; @@ -837,7 +837,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.registerBoost4Step = 1; app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 262144 : 524288; break; - case 0x1002://AMD + case VKFFT_VENDOR_AMD: app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32; app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 1 : -1; app->configuration.warpSize = 64; @@ -889,7 +889,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.maxComputeWorkGroupCount[0] = compute_properties.maxGroupCountX; app->configuration.maxComputeWorkGroupCount[1] = compute_properties.maxGroupCountY; app->configuration.maxComputeWorkGroupCount[2] = compute_properties.maxGroupCountZ; - //if ((vendorID == 0x8086) && (!app->configuration.doublePrecision) && (!app->configuration.doublePrecisionFloatMemory)) app->configuration.halfThreads = 1; + //if ((vendorID == VKFFT_VENDOR_INTEL) && (!app->configuration.doublePrecision) && (!app->configuration.doublePrecisionFloatMemory)) app->configuration.halfThreads = 1; app->configuration.sharedMemorySize = compute_properties.maxSharedLocalMemory; app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 128 : 64; @@ -899,7 +899,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.registerBoost = (app->configuration.sharedMemorySize >= 65536) ? 1 : 2; app->configuration.registerBoost4Step = 1; app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 262144 : 524288; - app->configuration.vendorID = 0x8086; + app->configuration.vendorID = VKFFT_VENDOR_INTEL; app->configuration.useRaderUintLUT = 1; #elif(VKFFT_BACKEND==5) if (inputLaunchConfiguration.device == 0) { @@ -958,7 +958,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.registerBoost = 1; app->configuration.registerBoost4Step = 1; app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision || app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) ? 262144 : 524288; - app->configuration.vendorID = 0x1027f00; + app->configuration.vendorID = VKFFT_VENDOR_APPLE; dummy_state->release(); function->release(); @@ -1212,10 +1212,10 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf } else{ app->configuration.fixMinRaderPrimeMult = 17; switch (app->configuration.vendorID) { - case 0x10DE://NVIDIA + case VKFFT_VENDOR_NVIDIA: app->configuration.fixMaxRaderPrimeMult = 89; break; - case 0x1002://AMD profile + case VKFFT_VENDOR_AMD: app->configuration.fixMaxRaderPrimeMult = 89; break; default: @@ -1227,7 +1227,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf if (inputLaunchConfiguration.fixMaxRaderPrimeMult != 0) app->configuration.fixMaxRaderPrimeMult = inputLaunchConfiguration.fixMaxRaderPrimeMult; switch (app->configuration.vendorID) { - case 0x1002://AMD profile + case VKFFT_VENDOR_AMD: if (app->configuration.quadDoubleDoublePrecision || app->configuration.quadDoubleDoublePrecisionDoubleMemory) app->configuration.fixMinRaderPrimeFFT = 19; else if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_HostFunctions/vkFFT_AxisBlockSplitter.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_HostFunctions/vkFFT_AxisBlockSplitter.h index 5a80f16..11b1db7 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_HostFunctions/vkFFT_AxisBlockSplitter.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_HostFunctions/vkFFT_AxisBlockSplitter.h @@ -125,7 +125,7 @@ static inline VkFFTResult VkFFTSplitAxisBlock(VkFFTApplication* app, VkFFTPlan* if ((scale > 1) && ((axis->specializationConstants.fftDim.data.i * axis->groupedBatch * scale <= maxSequenceLengthSharedMemory))) axis->groupedBatch *= scale; axis->axisBlock[0] = ((pfUINT)axis->specializationConstants.stageStartSize.data.i > axis->groupedBatch) ? axis->groupedBatch : (pfUINT)axis->specializationConstants.stageStartSize.data.i; - if (app->configuration.vendorID == 0x10DE) { + if (app->configuration.vendorID == VKFFT_VENDOR_NVIDIA) { while ((axis->axisBlock[1] * axis->axisBlock[0] >= 2 * app->configuration.aimThreads) && (axis->axisBlock[0] > maxBatchCoalesced)) { axis->axisBlock[0] /= 2; if (axis->axisBlock[0] < maxBatchCoalesced) axis->axisBlock[0] = maxBatchCoalesced; @@ -209,7 +209,7 @@ static inline VkFFTResult VkFFTSplitAxisBlock(VkFFTApplication* app, VkFFTPlan* //axis->groupedBatch = 8; //shared memory bank conflict resolve //#if(VKFFT_BACKEND!=2)//for some reason, hip doesn't get performance increase from having variable shared memory strides. - if (app->configuration.vendorID == 0x10DE) { + if (app->configuration.vendorID == VKFFT_VENDOR_NVIDIA) { if (FFTPlan->numAxisUploads[axis_id] == 2) { if ((axis_upload_id > 0) || (axis->specializationConstants.fftDim.data.i <= 512)) { if ((pfUINT)(axis->specializationConstants.fftDim.data.i * (64 / axis->specializationConstants.complexSize)) <= maxSequenceLengthSharedMemory) { @@ -327,7 +327,7 @@ static inline VkFFTResult VkFFTSplitAxisBlock(VkFFTApplication* app, VkFFTPlan* r2cmult = 1; } if ((FFTPlan->numAxisUploads[0] == 1) && ((pfUINT)pfceil(FFTPlan->actualFFTSizePerAxis[axis_id][1] / (double)r2cmult) < axis->axisBlock[1])) axis->axisBlock[1] = (pfUINT)pfceil(FFTPlan->actualFFTSizePerAxis[axis_id][1] / (double)r2cmult); - if (app->configuration.vendorID == 0x10DE) { + if (app->configuration.vendorID == VKFFT_VENDOR_NVIDIA) { while ((axis->axisBlock[1] * axis->axisBlock[0] >= 2 * app->configuration.aimThreads) && (axis->axisBlock[1] > maxBatchCoalesced)) { axis->axisBlock[1] /= 2; if (axis->axisBlock[1] < maxBatchCoalesced) axis->axisBlock[1] = maxBatchCoalesced; @@ -394,7 +394,7 @@ static inline VkFFTResult VkFFTSplitAxisBlock(VkFFTApplication* app, VkFFTPlan* if ((scale > 1) && ((axis->specializationConstants.fftDim.data.i * axis->groupedBatch * scale <= maxSequenceLengthSharedMemory))) axis->groupedBatch *= scale; axis->axisBlock[0] = ((pfUINT)axis->specializationConstants.stageStartSize.data.i > axis->groupedBatch) ? axis->groupedBatch : axis->specializationConstants.stageStartSize.data.i; - if (app->configuration.vendorID == 0x10DE) { + if (app->configuration.vendorID == VKFFT_VENDOR_NVIDIA) { while ((axis->axisBlock[1] * axis->axisBlock[0] >= 2 * app->configuration.aimThreads) && (axis->axisBlock[0] > maxBatchCoalesced)) { axis->axisBlock[0] /= 2; if (axis->axisBlock[0] < maxBatchCoalesced) axis->axisBlock[0] = maxBatchCoalesced; @@ -444,7 +444,7 @@ static inline VkFFTResult VkFFTSplitAxisBlock(VkFFTApplication* app, VkFFTPlan* } axis->axisBlock[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] > axis->groupedBatch) ? axis->groupedBatch : FFTPlan->actualFFTSizePerAxis[axis_id][0]; - if (app->configuration.vendorID == 0x10DE) { + if (app->configuration.vendorID == VKFFT_VENDOR_NVIDIA) { while ((axis->axisBlock[1] * axis->axisBlock[0] >= 2 * app->configuration.aimThreads) && (axis->axisBlock[0] > maxBatchCoalesced)) { axis->axisBlock[0] /= 2; if (axis->axisBlock[0] < maxBatchCoalesced) axis->axisBlock[0] = maxBatchCoalesced; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_HostFunctions/vkFFT_Scheduler.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_HostFunctions/vkFFT_Scheduler.h index 1c81317..4de0ec6 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_HostFunctions/vkFFT_Scheduler.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_HostFunctions/vkFFT_Scheduler.h @@ -2653,7 +2653,7 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl locAxisSplit[0] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; } if (numPasses == 2) { - if (isPowOf2 && (!((app->configuration.vendorID == 0x10DE) && (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] > 262144)))) { + if (isPowOf2 && (!((app->configuration.vendorID == VKFFT_VENDOR_NVIDIA) && (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] > 262144)))) { if ((axis_id == nonStridedAxisId) && ((!app->configuration.reorderFourStep) || (app->useBluesteinFFT[axis_id]))) { int maxPow8SharedMemory = (int)pow(8, ((int)log2(maxSequenceLengthSharedMemory)) / 3); //unit stride @@ -2749,7 +2749,7 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl } } if (numPasses == 3) { - if (isPowOf2 && (!((app->configuration.vendorID == 0x10DE) && (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] > 262144)))) { + if (isPowOf2 && (!((app->configuration.vendorID == VKFFT_VENDOR_NVIDIA) && (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] > 262144)))) { int maxPow8Strided = (int)pow(8, ((int)log2(maxSingleSizeStrided)) / 3); if ((axis_id == nonStridedAxisId) && ((!app->configuration.reorderFourStep) || (app->useBluesteinFFT[axis_id]))) { //unit stride diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index 67ed7d7..7b9490b 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -90,6 +90,14 @@ struct PfContainer{ int size; // bytes allcoated in name }; +typedef enum VkFFTVendor +{ + VKFFT_VENDOR_AMD = 0x1002, + VKFFT_VENDOR_APPLE = 0x1027f00, + VKFFT_VENDOR_INTEL = 0x8086, + VKFFT_VENDOR_NVIDIA = 0x10de +} VkFFTVendor; + typedef struct { //WHDCN layout @@ -303,7 +311,7 @@ typedef struct { pfINT maxTempLength; //specify how big can be buffer used for intermediate string sprintfs be (in char). Default 5000 chars. If code segfaults for some reason - try increasing this number. pfUINT autoCustomBluesteinPaddingPattern; // default value for useCustomBluesteinPaddingPattern pfUINT useRaderUintLUT; // allocate additional LUT to store g_pow - pfUINT vendorID; // vendorID 0x10DE - NVIDIA, 0x8086 - Intel, 0x1002 - AMD, etc. + VkFFTVendor vendorID; // vendorID #if(VKFFT_BACKEND==0) VkDeviceMemory tempBufferDeviceMemory;//Filled at app creation VkCommandBuffer* commandBuffer;//Filled at app execution