Skip to content

Commit e26c5c4

Browse files
committed
Add updates for AVX512 version of POV-RAY
1 parent ca89731 commit e26c5c4

File tree

79 files changed

+4052
-260
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+4052
-260
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
Notes for AVX512 Windows build
2+
==============================
3+
4+
The visual studio version was updated from vs2015 to vs2022 to enable support of AVX512 Version
5+
1. Set the configuration in solution file to Release-AVX512 | x64
6+
2. Select the 'Generic POV-Ray > povbase' project and expand 'Backend Headers', then open the
7+
file `build.h`(source/base/build.h) listed within it.In it replace with name
8+
and email of person who builds the code in `BUILT_BY` flag and comment the #error directive (line 129)
9+
3. In syspovconfig.h(windows/povconfig/syspovconfig.h) uncomment the #define _CONSOLE. (line 56)
10+
The AVX512 version was developed with the console version.
11+
The GUI build has been skipped in the solution file.
12+
**Note:** (Presently with the updated code the GUI project is skipped for building,
13+
as the cmedit64.dll and povcmax64.dll from official windows distribution are
14+
incompatible with VS2022. The console version alone is available to build and test).
15+
4. Build the solution file and in the vs2022/bin64 folder we can run the POVRAY examples with povconsole-avx512.exe.
16+
```
17+
General command example - povconsole-avx512.exe +Ibenchmark.pov
18+
Single worker thread - povconsole-avx512.exe +WT1 benchmark.pov
19+
Output image - benchmark.png
20+
```
21+
5. Results with the AVX512 version has been attached in the same folder.
22+
23+
Notes for UNIX build
24+
====================
25+
26+
Dependencies for unix build
27+
```
28+
libboost-dev
29+
libboost-date-time-dev
30+
libboost-thread-dev
31+
libz-dev
32+
libpng-dev
33+
libjpeg-dev
34+
libtiff-dev
35+
libopenexr-dev
36+
pkg-config (if its already not there)
37+
```
38+
39+
Steps :
40+
Generating configure and building the code :
41+
```
42+
% cd unix/
43+
% ./prebuild.sh
44+
% cd ../
45+
% ./configure COMPILED_BY="your name <email@address>"
46+
% make
47+
```
48+
49+
To build with icpc :
50+
```
51+
% source /opt/intel/oneapi/setvars.sh
52+
% cd unix/
53+
% ./prebuild.sh
54+
% cd ../
55+
% ./configure COMPILED_BY="your name <email@address>" CXX=icpc
56+
% make
57+
```
58+
59+
Sample commands (inside the unix folder) :
60+
```
61+
General command example - ./povray +Ibenchmark.pov
62+
Single worker thread - ./povray +WT1 benchmark.pov
63+
Output image - benchmark.png
64+
```
65+
Binary file not shown.

platform/x86/avx512/avx512noise.cpp

Lines changed: 1435 additions & 0 deletions
Large diffs are not rendered by default.

platform/x86/avx512/avx512noise.h

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
//******************************************************************************
2+
///
3+
/// @file platform/x86/avx512/avx5123noise.h
4+
///
5+
/// This file contains declarations related to implementations of the noise
6+
/// generator optimized for the AVX512 instruction set.
7+
///
8+
/// @copyright
9+
/// @parblock
10+
///
11+
/// Persistence of Vision Ray Tracer ('POV-Ray') version 3.8.
12+
/// Copyright 1991-2017 Persistence of Vision Raytracer Pty. Ltd.
13+
///
14+
/// POV-Ray is free software: you can redistribute it and/or modify
15+
/// it under the terms of the GNU Affero General Public License as
16+
/// published by the Free Software Foundation, either version 3 of the
17+
/// License, or (at your option) any later version.
18+
///
19+
/// POV-Ray is distributed in the hope that it will be useful,
20+
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
21+
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22+
/// GNU Affero General Public License for more details.
23+
///
24+
/// You should have received a copy of the GNU Affero General Public License
25+
/// along with this program. If not, see <http://www.gnu.org/licenses/>.
26+
///
27+
/// ----------------------------------------------------------------------------
28+
///
29+
/// POV-Ray is based on the popular DKB raytracer version 2.12.
30+
/// DKBTrace was originally written by David K. Buck.
31+
/// DKBTrace Ver 2.0-2.12 were written by David K. Buck & Aaron A. Collins.
32+
///
33+
/// @endparblock
34+
///
35+
//******************************************************************************
36+
37+
#ifndef POVRAY_AVX512NOISE_H
38+
#define POVRAY_AVX512NOISE_H
39+
40+
#include "core/configcore.h"
41+
#include "core/math/vector.h"
42+
43+
#ifdef TRY_OPTIMIZED_NOISE_AVX512
44+
45+
namespace pov
46+
{
47+
48+
extern const bool kAVX512NoiseEnabled;
49+
void AVX512NoiseInit();
50+
51+
/// Optimized Noise function for single input for AVX512 architecture
52+
DBL AVX512Noise(const Vector3d& EPoint, int noise_generator);
53+
54+
/// Optimized DNoise function for single input for AVX512 architecture
55+
void AVX512DNoise(Vector3d& result, const Vector3d& EPoint);
56+
57+
/// Optimized Noise function for two inputs using AVX512 instructions
58+
/// @author Optimized by MCW
59+
void AVX512Noise2D(const Vector3d& EPoint, int noise_generator, double& value);
60+
61+
/// Optimized DNoise function for two inputs using AVX512 instructions
62+
/// @author Optimized by MCW
63+
void AVX512DNoise2D(Vector3d& result, const Vector3d& EPoint);
64+
65+
/// Optimized Noise function for 8 multiples of single input using AVX512 instructions.
66+
/// @author Optimized by MCW
67+
DBL AVX512Noise8D(const Vector3d& EPoint, int noise_generator);
68+
69+
}
70+
71+
#endif // TRY_OPTIMIZED_NOISE_AVX512
72+
73+
#endif // POVRAY_AVX512NOISE_H

platform/x86/cpuid.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ static unsigned long long getXCR0()
114114
#define CPUID_00000001_ECX_AVX_MASK (0x1 << 28)
115115
#define CPUID_00000001_EDX_SSE2_MASK (0x1 << 26)
116116
#define CPUID_00000007_EBX_AVX2_MASK (0x1 << 5)
117+
#define CPUID_00000007_EBX_AVX512_MASK (0x1 << 16)
117118
#define CPUID_80000001_ECX_FMA4_MASK (0x1 << 16)
118119

119120
// Masks for relevant XCR0 register bits.
@@ -170,6 +171,7 @@ struct CPUIDInfo
170171
bool sse2 : 1;
171172
bool avx : 1;
172173
bool avx2 : 1;
174+
bool avx512 : 1;
173175
bool fma3 : 1;
174176
bool fma4 : 1;
175177
#if POV_CPUINFO_DEBUG
@@ -184,6 +186,7 @@ CPUIDInfo::CPUIDInfo() :
184186
sse2(false),
185187
avx(false),
186188
avx2(false),
189+
avx512(false),
187190
fma3(false),
188191
fma4(false),
189192
vendorId(kCPUVendor_Unrecognized)
@@ -220,6 +223,11 @@ CPUIDInfo::CPUIDInfo() :
220223
CPUID(info, 0x7);
221224
avx2 = ((info[CPUID_EBX] & CPUID_00000007_EBX_AVX2_MASK) != 0);
222225
}
226+
if (maxLeaf >= 0x7)
227+
{
228+
CPUID(info, 0x7);
229+
avx512 = ((info[CPUID_EBX] & CPUID_00000007_EBX_AVX512_MASK) != 0);
230+
}
223231
CPUID(info, 0x80000000);
224232
int maxLeafExt = info[CPUID_EAX];
225233
if (maxLeafExt >= (int)0x80000001)
@@ -233,6 +241,7 @@ struct OSInfo
233241
{
234242
bool xcr0_sse : 1;
235243
bool xcr0_avx : 1;
244+
bool xcr0_avx512 : 1;
236245
OSInfo(const CPUIDInfo& cpuinfo);
237246
};
238247

@@ -278,6 +287,16 @@ bool CPUInfo::SupportsAVX()
278287
&& gpData->osInfo.xcr0_avx;
279288
}
280289

290+
bool CPUInfo::SupportsAVX512()
291+
{
292+
return gpData->cpuidInfo.osxsave
293+
&& gpData->cpuidInfo.avx
294+
&& gpData->cpuidInfo.avx2
295+
&& gpData->cpuidInfo.avx512
296+
&& gpData->osInfo.xcr0_sse
297+
&& gpData->osInfo.xcr0_avx;
298+
}
299+
281300
bool CPUInfo::SupportsAVX2()
282301
{
283302
return gpData->cpuidInfo.osxsave
@@ -329,6 +348,8 @@ std::string CPUInfo::GetFeatures()
329348
features.push_back("AVX");
330349
if (SupportsAVX2())
331350
features.push_back("AVX2");
351+
if (SupportsAVX512())
352+
features.push_back("AVX512");
332353
if (SupportsFMA3())
333354
features.push_back("FMA3");
334355
if (SupportsFMA4())
@@ -356,6 +377,8 @@ std::string CPUInfo::GetDetails()
356377
cpuidFeatures.push_back("AVX");
357378
if (gpData->cpuidInfo.avx2)
358379
cpuidFeatures.push_back("AVX2");
380+
if (gpData->cpuidInfo.avx512)
381+
cpuidFeatures.push_back("AVX512");
359382
if (gpData->cpuidInfo.fma3)
360383
cpuidFeatures.push_back("FMA");
361384
if (gpData->cpuidInfo.fma4)
@@ -371,6 +394,8 @@ std::string CPUInfo::GetDetails()
371394

372395
if (gpData->osInfo.xcr0_avx)
373396
xcr0Features.push_back("AVX");
397+
if (gpData->osInfo.xcr0_avx)
398+
xcr0Features.push_back("AVX512");
374399
if (gpData->osInfo.xcr0_sse)
375400
xcr0Features.push_back("SSE");
376401

platform/x86/cpuid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class CPUInfo
4545
static bool SupportsSSE2(); ///< Test whether CPU and OS support SSE2.
4646
static bool SupportsAVX(); ///< Test whether CPU and OS support AVX.
4747
static bool SupportsAVX2(); ///< Test whether CPU and OS support AVX2.
48+
static bool SupportsAVX512(); ///< Test whether CPU and OS support AVX512.
4849
static bool SupportsFMA3(); ///< Test whether CPU and OS support FMA3.
4950
static bool SupportsFMA4(); ///< Test whether CPU and OS support FMA4.
5051
static bool IsIntel(); ///< Test whether CPU is genuine Intel product.

platform/x86/optimizednoise.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939

4040
#include "core/material/noise.h"
4141

42+
#ifdef TRY_OPTIMIZED_NOISE_AVX512
43+
#include "avx512/avx512noise.h"
44+
#endif
45+
4246
#ifdef TRY_OPTIMIZED_NOISE_AVX2FMA3
4347
#include "avx2fma3/avx2fma3noise.h"
4448
#endif
@@ -65,19 +69,48 @@ namespace pov
6569
static bool AVXSupported() { return CPUInfo::SupportsAVX(); }
6670
static bool AVXFMA4Supported() { return CPUInfo::SupportsAVX() && CPUInfo::SupportsFMA4(); }
6771
static bool AVX2FMA3Supported() { return CPUInfo::SupportsAVX2() && CPUInfo::SupportsFMA3(); }
72+
static bool AVX512Supported() { return CPUInfo::SupportsAVX512(); }
6873

6974
/// List of optimized noise implementations.
7075
///
7176
/// @note
7277
/// Entries must be listed in descending order of preference.
7378
///
7479
OptimizedNoiseInfo gaOptimizedNoiseInfo[] = {
80+
#ifdef TRY_OPTIMIZED_NOISE_AVX512
81+
{
82+
"avx512-mcw", // name,
83+
"hand-optimized by MCW", // info,
84+
AVX512Noise, // noise,
85+
AVX512DNoise, // dNoise,
86+
AVX512Noise2D, // noise2D,
87+
AVX512DNoise2D, // dNoise2D,
88+
AVX512Noise8D, // noise8D,
89+
DTurbulenceAVX512, // DTurbulence
90+
Initialize_WavesAVX512, // Initalize Waves
91+
TurbulenceAVX512, // Turbulence
92+
wrinklesAVX512, // wrinkles
93+
true, // value to set versions of WrinklesPattern and GranitePattern
94+
&kAVX512NoiseEnabled, // enabled,
95+
AVX512Supported, // supported,
96+
nullptr, // recommended,
97+
AVX512NoiseInit // init
98+
},
99+
#endif
75100
#ifdef TRY_OPTIMIZED_NOISE_AVX2FMA3
76101
{
77102
"avx2fma3-intel", // name,
78103
"hand-optimized by Intel", // info,
79104
AVX2FMA3Noise, // noise,
80105
AVX2FMA3DNoise, // dNoise,
106+
nullptr, // noise2D
107+
nullptr, // dnoise2D,
108+
nullptr, // noise8D,
109+
DTurbulenceAVX, // DTurbulence
110+
Initialize_WavesAVX, // Initalize Waves
111+
TurbulenceAVX, // Turbulence
112+
wrinklesAVX, // wrinkles
113+
false, // value to set versions of WrinklesPattern and GranitePattern
81114
&kAVX2FMA3NoiseEnabled, // enabled,
82115
AVX2FMA3Supported, // supported,
83116
CPUInfo::IsIntel, // recommended,
@@ -90,6 +123,14 @@ OptimizedNoiseInfo gaOptimizedNoiseInfo[] = {
90123
"hand-optimized by AMD, 2017-04 update", // info,
91124
AVXFMA4Noise, // noise,
92125
AVXFMA4DNoise, // dNoise,
126+
nullptr, // noise2D
127+
nullptr, // dnoise2D,
128+
nullptr, // noise8D,
129+
DTurbulenceAVX, // DTurbulence
130+
Initialize_WavesAVX, // Initalize Waves
131+
TurbulenceAVX, // Turbulence
132+
wrinklesAVX, // wrinkles
133+
false, // value to set versions of WrinklesPattern and GranitePattern
93134
&kAVXFMA4NoiseEnabled, // enabled,
94135
AVXFMA4Supported, // supported,
95136
nullptr, // recommended,
@@ -102,6 +143,14 @@ OptimizedNoiseInfo gaOptimizedNoiseInfo[] = {
102143
"hand-optimized by Intel", // info,
103144
AVXNoise, // noise,
104145
AVXDNoise, // dNoise,
146+
nullptr, // noise2D
147+
nullptr, // dnoise2D,
148+
nullptr, // noise8D,
149+
DTurbulenceAVX, // DTurbulence
150+
Initialize_WavesAVX, // Initalize Waves
151+
TurbulenceAVX, // Turbulence
152+
wrinklesAVX, // wrinkles
153+
false, // value to set versions of WrinklesPattern and GranitePattern
105154
&kAVXNoiseEnabled, // enabled,
106155
AVXSupported, // supported,
107156
CPUInfo::IsIntel, // recommended,
@@ -114,6 +163,14 @@ OptimizedNoiseInfo gaOptimizedNoiseInfo[] = {
114163
"compiler-optimized", // info,
115164
AVXPortableNoise, // noise,
116165
AVXPortableDNoise, // dNoise,
166+
nullptr, // noise2D
167+
nullptr, // dnoise2D,
168+
nullptr, // noise8D,
169+
DTurbulenceAVX, // DTurbulence
170+
Initialize_WavesAVX, // Initalize Waves
171+
TurbulenceAVX, // Turbulence
172+
wrinklesAVX, // wrinkles
173+
false, // value to set versions of WrinklesPattern and GranitePattern
117174
&kAVXPortableNoiseEnabled, // enabled,
118175
AVXSupported, // supported,
119176
nullptr, // recommended,

0 commit comments

Comments
 (0)