diff --git a/CMakeLists.txt b/CMakeLists.txt index 29b152c..aa821e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,4 +6,6 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() +add_compile_options(-O3 -ffast-math -march=native -mtune=native -fopenmp ) + add_executable(main main.cpp) diff --git a/main.cpp b/main.cpp index cf6369b..01f6c98 100644 --- a/main.cpp +++ b/main.cpp @@ -4,11 +4,13 @@ #include #include -float frand() { +static float frand() +{ return (float)rand() / RAND_MAX * 2 - 1; } -struct Star { +struct alignas(32) Star +{ float px, py, pz; float vx, vy, vz; float mass; @@ -16,11 +18,17 @@ struct Star { std::vector stars; -void init() { - for (int i = 0; i < 48; i++) { +void init() +{ + for (size_t i = 0; i < 48; i++) + { stars.push_back({ - frand(), frand(), frand(), - frand(), frand(), frand(), + frand(), + frand(), + frand(), + frand(), + frand(), + frand(), frand() + 1, }); } @@ -29,45 +37,58 @@ void init() { float G = 0.001; float eps = 0.001; float dt = 0.01; +float eps2 = eps * eps; +float mm = G * dt; -void step() { - for (auto &star: stars) { - for (auto &other: stars) { +void step() +{ +#pragma GCC ivdep + // #pragma omp simd + for (auto &star : stars) + { + for (auto &other : stars) + { float dx = other.px - star.px; float dy = other.py - star.py; float dz = other.pz - star.pz; - float d2 = dx * dx + dy * dy + dz * dz + eps * eps; - d2 *= sqrt(d2); - star.vx += dx * other.mass * G * dt / d2; - star.vy += dy * other.mass * G * dt / d2; - star.vz += dz * other.mass * G * dt / d2; + float d2 = dx * dx + dy * dy + dz * dz + eps2; + d2 *= std::sqrt(d2); + star.vx += dx * other.mass * mm / d2; + star.vy += dy * other.mass * mm / d2; + star.vz += dz * other.mass * mm / d2; } } - for (auto &star: stars) { + for (auto &star : stars) + + { star.px += star.vx * dt; star.py += star.vy * dt; star.pz += star.vz * dt; } } -float calc() { +float calc() +{ float energy = 0; - for (auto &star: stars) { + for (auto &star : stars) + { float v2 = star.vx * star.vx + star.vy * star.vy + star.vz * star.vz; - energy += star.mass * v2 / 2; - for (auto &other: stars) { + energy += star.mass * v2 * 0.5f; + for (auto &other : stars) + { float dx = other.px - star.px; float dy = other.py - star.py; float dz = other.pz - star.pz; - float d2 = dx * dx + dy * dy + dz * dz + eps * eps; - energy -= other.mass * star.mass * G / sqrt(d2) / 2; + float d2 = dx * dx + dy * dy + dz * dz + eps2; + energy -= other.mass * star.mass * G / std::sqrt(d2) * 0.5f; } } return energy; } template -long benchmark(Func const &func) { +long benchmark(Func const &func) +{ auto t0 = std::chrono::steady_clock::now(); func(); auto t1 = std::chrono::steady_clock::now(); @@ -75,13 +96,14 @@ long benchmark(Func const &func) { return dt.count(); } -int main() { +int main() +{ init(); printf("Initial energy: %f\n", calc()); - auto dt = benchmark([&] { - for (int i = 0; i < 100000; i++) - step(); - }); + auto dt = benchmark([&] + { + for (size_t i = 0; i < 100000; i++) + step(); }); printf("Final energy: %f\n", calc()); printf("Time elapsed: %ld ms\n", dt); return 0;