From f3f199713e63857437eb3cf4b678274080123ce1 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Tue, 20 Jul 2021 19:58:32 +0200 Subject: [PATCH 01/64] add vim temp files to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a53d962..35aea67 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -*.mp4 \ No newline at end of file +*.mp4 +*.swp From 7a5cc305dfbb548c7c4ee03dc77f8d4406710006 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Tue, 20 Jul 2021 19:59:15 +0200 Subject: [PATCH 02/64] add initial rough integration of all lb --- CMakeLists.txt | 1 + src/CMakeLists.txt | 3 ++- src/ExaMPM_Solver.hpp | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e087c98..5a01ab7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ if( NOT Cabana_ENABLE_CAJITA ) message( FATAL_ERROR "Cabana must be compiled with Cajita" ) endif() find_package(Silo REQUIRED) +find_package(ALL 0.9.1 REQUIRED) # library add_subdirectory(src) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1dd8405..14e9e0d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,7 +20,8 @@ add_library(exampm ${SOURCES}) target_link_libraries(exampm Cabana::cabanacore Cabana::Cajita - Silo::silo ) + Silo::silo + ALL::ALL ) target_include_directories(exampm PUBLIC diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 83a45f2..845aeeb 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -20,6 +20,8 @@ #include +#include + #include #include @@ -74,6 +76,23 @@ class Solver : public SolverBase bulk_modulus, density, gamma, kappa ); MPI_Comm_rank( comm, &_rank ); + + _liball = std::make_shared>(ALL::STAGGERED, 3, 0); + auto global_grid = _mesh->localGrid()->globalGrid(); + std::vector block_id(3,0); + for(std::size_t i=0; i<3; ++i) + block_id.at(i) = global_grid.dimBlockId(i); + std::vector blocks_per_dim(3,0); + for(std::size_t i=0; i<3; ++i) + blocks_per_dim.at(i) = global_grid.dimNumBlock(i); + _liball->setProcGridParams(block_id, blocks_per_dim); + std::vector min_domain_size(3,0); + for(std::size_t i=0; i<3; ++i) + min_domain_size.at(i) = 3.*_mesh->cellSize(); + _liball->setMinDomainSize(min_domain_size); + _liball->setCommunicator(MPI_COMM_WORLD); + _liball->setProcTag(_rank); + _liball->setup(); } void solve( const double t_final, const int write_freq ) override @@ -96,9 +115,20 @@ class Solver : public SolverBase TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); + auto min_index = _mesh->minDomainGlobalNodeIndex(); + auto max_index = _mesh->maxDomainGlobalNodeIndex(); + std::vector> vertices(2, ALL::Point(3)); + for(std::size_t d=0; d<3; ++d) + vertices.at(0)[d] = min_index[d]; + for(std::size_t d=0; d<3; ++d) + vertices.at(1)[d] = max_index[d]; + _liball->setVertices(vertices); + _liball->setWork(0.); + _pm->communicateParticles( _halo_min ); if ( 0 == t % write_freq ) + { SiloParticleWriter::writeTimeStep( _mesh->localGrid()->globalGrid(), t+1, @@ -106,6 +136,8 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); + _liball->printVTKoutlines(t); + } time += delta_t; } @@ -120,6 +152,7 @@ class Solver : public SolverBase std::shared_ptr> _mesh; std::shared_ptr> _pm; int _rank; + std::shared_ptr> _liball; }; //---------------------------------------------------------------------------// From cf45af4aeb39ad2046c37fbe22a53098a02ff23a Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Tue, 20 Jul 2021 21:23:28 +0200 Subject: [PATCH 03/64] Create proper domain output using all lb --- src/ExaMPM_Solver.hpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 845aeeb..1b490e2 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -78,13 +78,14 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); _liball = std::make_shared>(ALL::STAGGERED, 3, 0); - auto global_grid = _mesh->localGrid()->globalGrid(); + // For some reason only(!) the following line causes the code to crash + //auto global_grid = _mesh->localGrid()->globalGrid(); std::vector block_id(3,0); for(std::size_t i=0; i<3; ++i) - block_id.at(i) = global_grid.dimBlockId(i); + block_id.at(i) = _mesh->localGrid()->globalGrid().dimBlockId(i); std::vector blocks_per_dim(3,0); for(std::size_t i=0; i<3; ++i) - blocks_per_dim.at(i) = global_grid.dimNumBlock(i); + blocks_per_dim.at(i) = _mesh->localGrid()->globalGrid().dimNumBlock(i); _liball->setProcGridParams(block_id, blocks_per_dim); std::vector min_domain_size(3,0); for(std::size_t i=0; i<3; ++i) @@ -115,13 +116,13 @@ class Solver : public SolverBase TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); - auto min_index = _mesh->minDomainGlobalNodeIndex(); - auto max_index = _mesh->maxDomainGlobalNodeIndex(); + const auto& local_mesh = + Cajita::createLocalMesh( *(_mesh->localGrid()) ); std::vector> vertices(2, ALL::Point(3)); for(std::size_t d=0; d<3; ++d) - vertices.at(0)[d] = min_index[d]; + vertices.at(0)[d] = local_mesh.lowCorner(Cajita::Own(), d); for(std::size_t d=0; d<3; ++d) - vertices.at(1)[d] = max_index[d]; + vertices.at(1)[d] = local_mesh.highCorner(Cajita::Own(), d); _liball->setVertices(vertices); _liball->setWork(0.); From fb54b27d93030d74b43cc120424eb33fe140b2cd Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Wed, 21 Jul 2021 16:33:31 +0200 Subject: [PATCH 04/64] retrieve local grid dimensions and location and output via liball vtk output --- src/ExaMPM_Solver.hpp | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 1b490e2..f992b03 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -98,13 +98,13 @@ class Solver : public SolverBase void solve( const double t_final, const int write_freq ) override { - SiloParticleWriter::writeTimeStep( - _mesh->localGrid()->globalGrid(), - 0, - 0.0, - _pm->get( Location::Particle(), Field::Position() ), - _pm->get( Location::Particle(), Field::Velocity() ), - _pm->get( Location::Particle(), Field::J() ) ); + //SiloParticleWriter::writeTimeStep( + // _mesh->localGrid()->globalGrid(), + // 0, + // 0.0, + // _pm->get( Location::Particle(), Field::Position() ), + // _pm->get( Location::Particle(), Field::Velocity() ), + // _pm->get( Location::Particle(), Field::J() ) ); int num_step = t_final / _dt; double delta_t = t_final / num_step; @@ -116,13 +116,17 @@ class Solver : public SolverBase TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); - const auto& local_mesh = - Cajita::createLocalMesh( *(_mesh->localGrid()) ); + //const auto& local_mesh = + // Cajita::createLocalMesh( *(_mesh->localGrid()) ); std::vector> vertices(2, ALL::Point(3)); + //for(std::size_t d=0; d<3; ++d) + // vertices.at(0)[d] = local_mesh.lowCorner(Cajita::Own(), d); + //for(std::size_t d=0; d<3; ++d) + // vertices.at(1)[d] = local_mesh.highCorner(Cajita::Own(), d); for(std::size_t d=0; d<3; ++d) - vertices.at(0)[d] = local_mesh.lowCorner(Cajita::Own(), d); + vertices.at(0)[d] = _mesh->localGrid()->globalGrid().globalOffset(d) * _mesh->cellSize(); for(std::size_t d=0; d<3; ++d) - vertices.at(1)[d] = local_mesh.highCorner(Cajita::Own(), d); + vertices.at(1)[d] = vertices.at(0)[d] + _mesh->localGrid()->globalGrid().ownedNumCell(d) * _mesh->cellSize(); _liball->setVertices(vertices); _liball->setWork(0.); @@ -130,13 +134,13 @@ class Solver : public SolverBase if ( 0 == t % write_freq ) { - SiloParticleWriter::writeTimeStep( - _mesh->localGrid()->globalGrid(), - t+1, - time, - _pm->get( Location::Particle(), Field::Position() ), - _pm->get( Location::Particle(), Field::Velocity() ), - _pm->get( Location::Particle(), Field::J() ) ); + //SiloParticleWriter::writeTimeStep( + // _mesh->localGrid()->globalGrid(), + // t+1, + // time, + // _pm->get( Location::Particle(), Field::Position() ), + // _pm->get( Location::Particle(), Field::Velocity() ), + // _pm->get( Location::Particle(), Field::J() ) ); _liball->printVTKoutlines(t); } From aeace1e6c0e6251c9949e99e788847561e3e7bf0 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Wed, 21 Jul 2021 20:03:18 +0200 Subject: [PATCH 05/64] very dirty incorporation of lb --- examples/dam_break.cpp | 2 +- src/ExaMPM_Mesh.hpp | 69 +++++++++++++++++++++++++++++++++++++++--- src/ExaMPM_Solver.hpp | 38 +++++++++++++---------- 3 files changed, 88 insertions(+), 21 deletions(-) diff --git a/examples/dam_break.cpp b/examples/dam_break.cpp index 0a5ee60..8111f64 100644 --- a/examples/dam_break.cpp +++ b/examples/dam_break.cpp @@ -90,7 +90,7 @@ void damBreak( const double cell_size, // little movement in Y. int comm_size; MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); - std::array ranks_per_dim = { 1, comm_size, 1 }; + std::array ranks_per_dim = { 1, 1, comm_size }; Cajita::ManualPartitioner partitioner( ranks_per_dim ); // Material properties. diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index 0ede7f1..b7d030c 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -40,10 +40,17 @@ class Mesh Mesh( const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::BlockPartitioner<3>& partitioner, + const Cajita::ManualBlockPartitioner<3>& partitioner, //todo(sschulz): should work with Cajita::BlockPartitioner<3>! const int halo_cell_width, const int minimum_halo_cell_width, MPI_Comm comm ) + : _global_bounding_box( global_bounding_box ) + , _global_num_cell( global_num_cell ) + , _periodic( periodic ) + , _partitioner( partitioner ) + , _halo_cell_width( halo_cell_width ) + , _minimum_halo_cell_width( minimum_halo_cell_width ) + , _comm( comm ) { // Make a copy of the global number of cells so we can modify it. std::array num_cell = global_num_cell; @@ -99,20 +106,64 @@ class Mesh global_low_corner, global_high_corner, num_cell ); // Build the global grid. - auto global_grid = Cajita::createGlobalGrid( + _global_grid = Cajita::createGlobalGrid( comm, global_mesh, periodic, partitioner ); // Build the local grid. int halo_width = std::max( minimum_halo_cell_width, halo_cell_width ); - _local_grid = Cajita::createLocalGrid( global_grid, halo_width ); + _local_grid = Cajita::createLocalGrid( _global_grid, halo_width ); } + void updateGeometry( + std::array& local_offset, + std::array& local_num_cell ) + { + // Create global mesh bounds. + std::array global_low_corner = { _global_bounding_box[0], + _global_bounding_box[1], + _global_bounding_box[2] }; + std::array global_high_corner = { _global_bounding_box[3], + _global_bounding_box[4], + _global_bounding_box[5] }; + // For dimensions that are not periodic we pad by the minimum halo + // cell width to allow for projections outside of the domain. + double cell_size = _local_grid->globalGrid().globalMesh().cellSize(0); + for ( int d = 0; d < 3; ++d ) + { + if ( !_periodic[d] ) + { + global_low_corner[d] -= cell_size*_minimum_halo_cell_width; + global_high_corner[d] += cell_size*_minimum_halo_cell_width; + } + } + + // Create the global mesh. + auto global_mesh = Cajita::createUniformGlobalMesh( + global_low_corner, global_high_corner, _global_num_cell ); + + // Build the global grid. + _global_grid = Cajita::createGlobalGrid( + _comm, global_mesh, _periodic, _partitioner, local_offset, local_num_cell ); + + // Build the local grid. + int halo_width = std::max( _minimum_halo_cell_width, _halo_cell_width ); + _local_grid = Cajita::createLocalGrid( _global_grid, halo_width ); + // todo(sschulz): Can we just reassing without causing a memory leak? + } + + // Get the local grid. - const std::shared_ptr>>& localGrid() const + const std::shared_ptr>>& localGrid() { return _local_grid; } + // Get the global grid. + const std::shared_ptr>>& globalGrid() + { + return _global_grid; + } + // Get the cell size. double cellSize() const { @@ -133,8 +184,18 @@ class Mesh public: + Kokkos::Array _global_bounding_box; + std::array _global_num_cell; + std::array _periodic; + Cajita::ManualBlockPartitioner<3> _partitioner; + int _halo_cell_width; + int _minimum_halo_cell_width; + MPI_Comm _comm; + std::shared_ptr< Cajita::LocalGrid>> _local_grid; + std::shared_ptr< + Cajita::GlobalGrid>> _global_grid; Kokkos::Array _min_domain_global_node_index; Kokkos::Array _max_domain_global_node_index; diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index f992b03..89400e8 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -48,7 +48,7 @@ class Solver : public SolverBase const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::BlockPartitioner<3>& partitioner, + const Cajita::ManualBlockPartitioner<3>& partitioner, //todo(sschulz): should work with Cajita::BlockPartitioner<3>! const int halo_cell_width, const InitFunc& create_functor, const int particles_per_cell, @@ -77,15 +77,15 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); - _liball = std::make_shared>(ALL::STAGGERED, 3, 0); + _liball = std::make_shared>(ALL::TENSOR, 3, 0); // For some reason only(!) the following line causes the code to crash //auto global_grid = _mesh->localGrid()->globalGrid(); std::vector block_id(3,0); for(std::size_t i=0; i<3; ++i) - block_id.at(i) = _mesh->localGrid()->globalGrid().dimBlockId(i); + block_id.at(i) = _mesh->globalGrid()->dimBlockId(i); std::vector blocks_per_dim(3,0); for(std::size_t i=0; i<3; ++i) - blocks_per_dim.at(i) = _mesh->localGrid()->globalGrid().dimNumBlock(i); + blocks_per_dim.at(i) = _mesh->globalGrid()->dimNumBlock(i); _liball->setProcGridParams(block_id, blocks_per_dim); std::vector min_domain_size(3,0); for(std::size_t i=0; i<3; ++i) @@ -106,6 +106,13 @@ class Solver : public SolverBase // _pm->get( Location::Particle(), Field::Velocity() ), // _pm->get( Location::Particle(), Field::J() ) ); + std::vector> lb_vertices(2, ALL::Point(3)); + for(std::size_t d=0; d<3; ++d) + lb_vertices.at(0)[d] = _mesh->localGrid()->globalGrid().globalOffset(d) * _mesh->cellSize(); + for(std::size_t d=0; d<3; ++d) + lb_vertices.at(1)[d] = lb_vertices.at(0)[d] + _mesh->localGrid()->globalGrid().ownedNumCell(d) * _mesh->cellSize(); + _liball->setVertices(lb_vertices); + int num_step = t_final / _dt; double delta_t = t_final / num_step; double time = 0.0; @@ -116,19 +123,17 @@ class Solver : public SolverBase TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); - //const auto& local_mesh = - // Cajita::createLocalMesh( *(_mesh->localGrid()) ); - std::vector> vertices(2, ALL::Point(3)); - //for(std::size_t d=0; d<3; ++d) - // vertices.at(0)[d] = local_mesh.lowCorner(Cajita::Own(), d); - //for(std::size_t d=0; d<3; ++d) - // vertices.at(1)[d] = local_mesh.highCorner(Cajita::Own(), d); + _liball->setWork(_pm->numParticle()); + _liball->balance(); + std::vector> updated_vertices = _liball->getVertices(); for(std::size_t d=0; d<3; ++d) - vertices.at(0)[d] = _mesh->localGrid()->globalGrid().globalOffset(d) * _mesh->cellSize(); + _mesh->globalGrid()->setGlobalOffset(d, + std::rint( updated_vertices.at(0)[d] / _mesh->cellSize() )); for(std::size_t d=0; d<3; ++d) - vertices.at(1)[d] = vertices.at(0)[d] + _mesh->localGrid()->globalGrid().ownedNumCell(d) * _mesh->cellSize(); - _liball->setVertices(vertices); - _liball->setWork(0.); + _mesh->globalGrid()->setOwnedNumCell(d, + std::rint( (updated_vertices.at(1)[d] - updated_vertices.at(0)[d])/_mesh->cellSize() )); + _liball->setVertices(updated_vertices); + _pm->communicateParticles( _halo_min ); @@ -146,6 +151,7 @@ class Solver : public SolverBase time += delta_t; } + printf("Finished\n"); } private: @@ -169,7 +175,7 @@ createSolver( const std::string& device, const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::BlockPartitioner<3>& partitioner, + const Cajita::ManualBlockPartitioner<3>& partitioner, const int halo_cell_width, const InitFunc& create_functor, const int particles_per_cell, From 09df0c91f61c053da635469ed7555ee33cdf18c9 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 22 Jul 2021 17:09:05 +0200 Subject: [PATCH 06/64] make sure the created arrays are also updated --- src/ExaMPM_ProblemManager.hpp | 36 +++++++++++++++++++++++++++++++++++ src/ExaMPM_Solver.hpp | 1 + 2 files changed, 37 insertions(+) diff --git a/src/ExaMPM_ProblemManager.hpp b/src/ExaMPM_ProblemManager.hpp index b070e1b..e8e8598 100644 --- a/src/ExaMPM_ProblemManager.hpp +++ b/src/ExaMPM_ProblemManager.hpp @@ -135,6 +135,42 @@ class ProblemManager Cajita::FullHaloPattern() ); } + void updateMesh( const std::shared_ptr& mesh ) + { + auto node_vector_layout = + Cajita::createArrayLayout( _mesh->localGrid(), 3, Cajita::Node() ); + auto node_scalar_layout = + Cajita::createArrayLayout( _mesh->localGrid(), 1, Cajita::Node() ); + auto cell_scalar_layout = + Cajita::createArrayLayout( _mesh->localGrid(), 1, Cajita::Cell() ); + + _momentum = + Cajita::createArray( "momentum", node_vector_layout ); + _mass = + Cajita::createArray( "mass", node_scalar_layout ); + _force = + Cajita::createArray( "force", node_vector_layout ); + _velocity = + Cajita::createArray( "velocity", node_vector_layout ); + _position_correction = + Cajita::createArray( "position_correction", node_vector_layout ); + _density = + Cajita::createArray( "density", cell_scalar_layout ); + + _mark = + Cajita::createArray( "mark", cell_scalar_layout ); + + _node_vector_halo = + Cajita::createHalo( *node_vector_layout, + Cajita::FullHaloPattern() ); + _node_scalar_halo = + Cajita::createHalo( *node_scalar_layout, + Cajita::FullHaloPattern() ); + _cell_scalar_halo = + Cajita::createHalo( *cell_scalar_layout, + Cajita::FullHaloPattern() ); + } + std::size_t numParticle() const { return _particles.size(); diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 89400e8..22a2ad3 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -133,6 +133,7 @@ class Solver : public SolverBase _mesh->globalGrid()->setOwnedNumCell(d, std::rint( (updated_vertices.at(1)[d] - updated_vertices.at(0)[d])/_mesh->cellSize() )); _liball->setVertices(updated_vertices); + _pm->updateMesh(_mesh); _pm->communicateParticles( _halo_min ); From 09f370497aecad62f2ab30c9393f041615df5a2a Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 22 Jul 2021 21:31:13 +0200 Subject: [PATCH 07/64] add primitive VTK writer for domains --- src/ExaMPM_Solver.hpp | 12 ++++ src/ExaMPM_VTKDomainWriter.hpp | 121 +++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 src/ExaMPM_VTKDomainWriter.hpp diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 22a2ad3..a19d82c 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -63,6 +64,7 @@ class Solver : public SolverBase , _gravity( gravity ) , _bc( bc ) , _halo_min( 3 ) + , _comm( comm ) { _mesh = std::make_shared>( global_bounding_box, global_num_cell, periodic, partitioner, @@ -148,6 +150,15 @@ class Solver : public SolverBase // _pm->get( Location::Particle(), Field::Velocity() ), // _pm->get( Location::Particle(), Field::J() ) ); _liball->printVTKoutlines(t); + std::array vertices; + for(std::size_t d=0; d<3; ++d) + vertices[d] = _mesh->mutGlobalGrid().globalOffset(d) * _mesh->cellSize(); + for(std::size_t d=3; d<6; ++d) + vertices[d] = vertices[d-3] + _mesh->mutGlobalGrid().ownedNumCell(d) * _mesh->cellSize(); + printf(">> %d, %g %g %g | %g %g %g\n", _rank, + vertices[0], vertices[1], vertices[2], + vertices[3], vertices[4], vertices[5]); + VTKDomainWriter::writeDomain(_comm, t, vertices); } time += delta_t; @@ -161,6 +172,7 @@ class Solver : public SolverBase double _gravity; BoundaryCondition _bc; int _halo_min; + MPI_Comm _comm; std::shared_ptr> _mesh; std::shared_ptr> _pm; int _rank; diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp new file mode 100644 index 0000000..3a70ce2 --- /dev/null +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -0,0 +1,121 @@ +/**************************************************************************** + * Copyright (c) 2018-2020 by the ExaMPM authors * + * All rights reserved. * + * * + * This file is part of the ExaMPM library. ExaMPM is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef EXAMPM_VTKDOMAINWRITER_HPP +#define EXAMPM_VTKDOMAINWRITER_HPP +#include + +#include + +namespace ExaMPM +{ +namespace VTKDomainWriter +{ +// Create filename +void writeDomainParallelFile(MPI_Comm comm, int time_step) +{ + // Should only be called from a single rank + int size; + MPI_Comm_size(comm, &size); + std::stringstream filename; + filename << "domain" << "_" << time_step << ".pvtu"; + FILE* file = fopen(filename.str().c_str(), "w"); + fprintf(file, "\n"); + fprintf(file, "\n"); + fprintf(file, "\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\t\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\t\n"); + fprintf(file, "\t\n"); + for(std::size_t i=0; i\n", time_step, i); + fprintf(file, "\n"); + fprintf(file, "\n"); + fclose(file); +} +// Write VTU for domain (low corner, high corner) +void writeDomain(MPI_Comm comm, int time_step, std::array& domain_vertices) +{ + int rank; + MPI_Comm_rank(comm, &rank); + if(rank==1) + writeDomainParallelFile(comm, time_step); + std::stringstream filename; + // todo(sschulz): properly format, according to max rank + filename << "domain" << "_" << time_step << "_" << rank << ".vtu"; + FILE* file = fopen(filename.str().c_str(), "w"); + fprintf(file, "\n"); + fprintf(file, "\n"); + fprintf(file, "\n"); + std::array vertices; + vertices[0*3+0] = domain_vertices[0]; + vertices[2*3+0] = domain_vertices[0]; + vertices[4*3+0] = domain_vertices[0]; + vertices[6*3+0] = domain_vertices[0]; + vertices[0*3+1] = domain_vertices[1]; + vertices[1*3+1] = domain_vertices[1]; + vertices[4*3+1] = domain_vertices[1]; + vertices[5*3+1] = domain_vertices[1]; + vertices[0*3+2] = domain_vertices[2]; + vertices[1*3+2] = domain_vertices[2]; + vertices[2*3+2] = domain_vertices[2]; + vertices[3*3+2] = domain_vertices[2]; + vertices[1*3+0] = domain_vertices[3]; + vertices[3*3+0] = domain_vertices[3]; + vertices[5*3+0] = domain_vertices[3]; + vertices[7*3+0] = domain_vertices[3]; + vertices[2*3+1] = domain_vertices[4]; + vertices[3*3+1] = domain_vertices[4]; + vertices[6*3+1] = domain_vertices[4]; + vertices[7*3+1] = domain_vertices[4]; + vertices[4*3+2] = domain_vertices[5]; + vertices[5*3+2] = domain_vertices[5]; + vertices[6*3+2] = domain_vertices[5]; + vertices[7*3+2] = domain_vertices[5]; + std::array connectivity = {0,1,2,3,4,5,6,7}; + fprintf(file, "\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\t\n"); + fprintf(file, "%d", rank); + fprintf(file, "\n\t\t\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\t\n"); + for(const double &vert : vertices) + fprintf(file, "%g ", vert); + fprintf(file, "\n\t\t\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\n"); + fprintf(file, "\t\t\n"); + for(const int &conn : connectivity) + fprintf(file, "%d ", conn); + fprintf(file, "\n\t\t\n"); + fprintf(file, "\t\t\n"); + fprintf(file, "8\n"); + fprintf(file, "\n\t\t\n"); + fprintf(file, "\t\t\n"); + fprintf(file, "11\n"); + fprintf(file, "\n\t\t\n"); + fprintf(file, "\t\n"); + fprintf(file, "\n"); + fprintf(file, "\n"); + fprintf(file, "\n"); + fclose(file); +} + +// Write PVTU +} // end namespace VTKDomainWriter +} // end namespace ExaMPM +#endif // EXAMPM_VTKDOMAINWRITER_HPP From fad1d7329b2310e4ec58fd786f2e6c0bc70cf066 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 22 Jul 2021 21:31:51 +0200 Subject: [PATCH 08/64] use the mutable version of global grid and some debug output --- src/ExaMPM_Mesh.hpp | 8 ++++---- src/ExaMPM_Solver.hpp | 15 +++++++++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index b7d030c..b8d9453 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -153,15 +153,15 @@ class Mesh // Get the local grid. - const std::shared_ptr>>& localGrid() + const std::shared_ptr>>& localGrid() const { return _local_grid; } - // Get the global grid. - const std::shared_ptr>>& globalGrid() + // Get the mutable global grid. + Cajita::GlobalGrid>& mutGlobalGrid() const { - return _global_grid; + return _local_grid->mutGlobalGrid(); } // Get the cell size. diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index a19d82c..caeaf91 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -84,10 +84,10 @@ class Solver : public SolverBase //auto global_grid = _mesh->localGrid()->globalGrid(); std::vector block_id(3,0); for(std::size_t i=0; i<3; ++i) - block_id.at(i) = _mesh->globalGrid()->dimBlockId(i); + block_id.at(i) = _mesh->mutGlobalGrid().dimBlockId(i); std::vector blocks_per_dim(3,0); for(std::size_t i=0; i<3; ++i) - blocks_per_dim.at(i) = _mesh->globalGrid()->dimNumBlock(i); + blocks_per_dim.at(i) = _mesh->mutGlobalGrid().dimNumBlock(i); _liball->setProcGridParams(block_id, blocks_per_dim); std::vector min_domain_size(3,0); for(std::size_t i=0; i<3; ++i) @@ -114,6 +114,9 @@ class Solver : public SolverBase for(std::size_t d=0; d<3; ++d) lb_vertices.at(1)[d] = lb_vertices.at(0)[d] + _mesh->localGrid()->globalGrid().ownedNumCell(d) * _mesh->cellSize(); _liball->setVertices(lb_vertices); + printf(">> %d, %g %g %g | %g %g %g\n", _rank, + lb_vertices.at(0)[0],lb_vertices.at(0)[1],lb_vertices.at(0)[2], + lb_vertices.at(1)[0],lb_vertices.at(1)[1],lb_vertices.at(1)[2]); int num_step = t_final / _dt; double delta_t = t_final / num_step; @@ -129,12 +132,16 @@ class Solver : public SolverBase _liball->balance(); std::vector> updated_vertices = _liball->getVertices(); for(std::size_t d=0; d<3; ++d) - _mesh->globalGrid()->setGlobalOffset(d, + _mesh->mutGlobalGrid().setGlobalOffset(d, std::rint( updated_vertices.at(0)[d] / _mesh->cellSize() )); for(std::size_t d=0; d<3; ++d) - _mesh->globalGrid()->setOwnedNumCell(d, + _mesh->mutGlobalGrid().setOwnedNumCell(d, std::rint( (updated_vertices.at(1)[d] - updated_vertices.at(0)[d])/_mesh->cellSize() )); _liball->setVertices(updated_vertices); + printf(">> %d, balanced vertices: %g %g %g, %g %g %g\n", _rank, + updated_vertices.at(0)[0],updated_vertices.at(0)[1],updated_vertices.at(0)[2], + updated_vertices.at(1)[0],updated_vertices.at(1)[1],updated_vertices.at(1)[2]); + _pm->updateMesh(_mesh); From 408bab24ce430822f42b3b05a6b9e31077743a15 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 13:18:08 +0200 Subject: [PATCH 09/64] reenable silo particle output --- src/ExaMPM_Solver.hpp | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index caeaf91..1ea877b 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -100,13 +100,14 @@ class Solver : public SolverBase void solve( const double t_final, const int write_freq ) override { - //SiloParticleWriter::writeTimeStep( - // _mesh->localGrid()->globalGrid(), - // 0, - // 0.0, - // _pm->get( Location::Particle(), Field::Position() ), - // _pm->get( Location::Particle(), Field::Velocity() ), - // _pm->get( Location::Particle(), Field::J() ) ); + std::string vtk_actual_domain_basename("domain"); + SiloParticleWriter::writeTimeStep( + _mesh->localGrid()->globalGrid(), + 0, + 0.0, + _pm->get( Location::Particle(), Field::Position() ), + _pm->get( Location::Particle(), Field::Velocity() ), + _pm->get( Location::Particle(), Field::J() ) ); std::vector> lb_vertices(2, ALL::Point(3)); for(std::size_t d=0; d<3; ++d) @@ -149,13 +150,13 @@ class Solver : public SolverBase if ( 0 == t % write_freq ) { - //SiloParticleWriter::writeTimeStep( - // _mesh->localGrid()->globalGrid(), - // t+1, - // time, - // _pm->get( Location::Particle(), Field::Position() ), - // _pm->get( Location::Particle(), Field::Velocity() ), - // _pm->get( Location::Particle(), Field::J() ) ); + SiloParticleWriter::writeTimeStep( + _mesh->localGrid()->globalGrid(), + t+1, + time, + _pm->get( Location::Particle(), Field::Position() ), + _pm->get( Location::Particle(), Field::Velocity() ), + _pm->get( Location::Particle(), Field::J() ) ); _liball->printVTKoutlines(t); std::array vertices; for(std::size_t d=0; d<3; ++d) From a7f86d58482d8023b5184a3519a097d91b221e8d Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 13:19:11 +0200 Subject: [PATCH 10/64] remove debug output --- src/ExaMPM_Solver.hpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 1ea877b..0dc3986 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -115,9 +115,6 @@ class Solver : public SolverBase for(std::size_t d=0; d<3; ++d) lb_vertices.at(1)[d] = lb_vertices.at(0)[d] + _mesh->localGrid()->globalGrid().ownedNumCell(d) * _mesh->cellSize(); _liball->setVertices(lb_vertices); - printf(">> %d, %g %g %g | %g %g %g\n", _rank, - lb_vertices.at(0)[0],lb_vertices.at(0)[1],lb_vertices.at(0)[2], - lb_vertices.at(1)[0],lb_vertices.at(1)[1],lb_vertices.at(1)[2]); int num_step = t_final / _dt; double delta_t = t_final / num_step; @@ -139,13 +136,8 @@ class Solver : public SolverBase _mesh->mutGlobalGrid().setOwnedNumCell(d, std::rint( (updated_vertices.at(1)[d] - updated_vertices.at(0)[d])/_mesh->cellSize() )); _liball->setVertices(updated_vertices); - printf(">> %d, balanced vertices: %g %g %g, %g %g %g\n", _rank, - updated_vertices.at(0)[0],updated_vertices.at(0)[1],updated_vertices.at(0)[2], - updated_vertices.at(1)[0],updated_vertices.at(1)[1],updated_vertices.at(1)[2]); - _pm->updateMesh(_mesh); - _pm->communicateParticles( _halo_min ); if ( 0 == t % write_freq ) @@ -163,9 +155,6 @@ class Solver : public SolverBase vertices[d] = _mesh->mutGlobalGrid().globalOffset(d) * _mesh->cellSize(); for(std::size_t d=3; d<6; ++d) vertices[d] = vertices[d-3] + _mesh->mutGlobalGrid().ownedNumCell(d) * _mesh->cellSize(); - printf(">> %d, %g %g %g | %g %g %g\n", _rank, - vertices[0], vertices[1], vertices[2], - vertices[3], vertices[4], vertices[5]); VTKDomainWriter::writeDomain(_comm, t, vertices); } From 2befc540d003a9d693978e28c436af0d0a1cd080 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 13:19:35 +0200 Subject: [PATCH 11/64] update VTKDomainWriter to allow for arbitrary basename --- src/ExaMPM_Solver.hpp | 6 +++--- src/ExaMPM_VTKDomainWriter.hpp | 11 ++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 0dc3986..e7c004f 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -152,10 +152,10 @@ class Solver : public SolverBase _liball->printVTKoutlines(t); std::array vertices; for(std::size_t d=0; d<3; ++d) - vertices[d] = _mesh->mutGlobalGrid().globalOffset(d) * _mesh->cellSize(); + vertices[d] = static_cast(_mesh->mutGlobalGrid().globalOffset(d)) * _mesh->cellSize(); for(std::size_t d=3; d<6; ++d) - vertices[d] = vertices[d-3] + _mesh->mutGlobalGrid().ownedNumCell(d) * _mesh->cellSize(); - VTKDomainWriter::writeDomain(_comm, t, vertices); + vertices[d] = vertices[d-3] + static_cast(_mesh->mutGlobalGrid().ownedNumCell(d-3)) * _mesh->cellSize(); + VTKDomainWriter::writeDomain(_comm, t, vertices, vtk_actual_domain_basename); } time += delta_t; diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp index 3a70ce2..5b6de3e 100644 --- a/src/ExaMPM_VTKDomainWriter.hpp +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -14,19 +14,20 @@ #include #include +#include namespace ExaMPM { namespace VTKDomainWriter { // Create filename -void writeDomainParallelFile(MPI_Comm comm, int time_step) +void writeDomainParallelFile(MPI_Comm comm, int time_step, std::string& basename) { // Should only be called from a single rank int size; MPI_Comm_size(comm, &size); std::stringstream filename; - filename << "domain" << "_" << time_step << ".pvtu"; + filename << basename << "_" << time_step << ".pvtu"; FILE* file = fopen(filename.str().c_str(), "w"); fprintf(file, "\n"); fprintf(file, "\n"); @@ -44,15 +45,15 @@ void writeDomainParallelFile(MPI_Comm comm, int time_step) fclose(file); } // Write VTU for domain (low corner, high corner) -void writeDomain(MPI_Comm comm, int time_step, std::array& domain_vertices) +void writeDomain(MPI_Comm comm, int time_step, std::array& domain_vertices, std::string& basename) { int rank; MPI_Comm_rank(comm, &rank); if(rank==1) - writeDomainParallelFile(comm, time_step); + writeDomainParallelFile(comm, time_step, basename); std::stringstream filename; // todo(sschulz): properly format, according to max rank - filename << "domain" << "_" << time_step << "_" << rank << ".vtu"; + filename << basename << "_" << time_step << "_" << rank << ".vtu"; FILE* file = fopen(filename.str().c_str(), "w"); fprintf(file, "\n"); fprintf(file, "\n"); From dba154b9e7221b8016fbc350b00e59b18f107b68 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 13:21:58 +0200 Subject: [PATCH 12/64] output actual and load balancing domain structure --- src/ExaMPM_Solver.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index e7c004f..c510388 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -100,7 +100,8 @@ class Solver : public SolverBase void solve( const double t_final, const int write_freq ) override { - std::string vtk_actual_domain_basename("domain"); + std::string vtk_actual_domain_basename("domain_act"); + std::string vtk_lb_domain_basename("domain_lb"); SiloParticleWriter::writeTimeStep( _mesh->localGrid()->globalGrid(), 0, @@ -156,6 +157,11 @@ class Solver : public SolverBase for(std::size_t d=3; d<6; ++d) vertices[d] = vertices[d-3] + static_cast(_mesh->mutGlobalGrid().ownedNumCell(d-3)) * _mesh->cellSize(); VTKDomainWriter::writeDomain(_comm, t, vertices, vtk_actual_domain_basename); + for(std::size_t d=0; d<3; ++d) + vertices[d] = updated_vertices.at(0)[d]; + for(std::size_t d=3; d<6; ++d) + vertices[d] = updated_vertices.at(1)[d-3]; + VTKDomainWriter::writeDomain(_comm, t, vertices, vtk_lb_domain_basename); } time += delta_t; From d4877b85526f13468a1cf44b9cbdb478c19a9ee6 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 13:24:55 +0200 Subject: [PATCH 13/64] VTKDomainWriter fix parallel file piece filename composition --- src/ExaMPM_VTKDomainWriter.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp index 5b6de3e..1847338 100644 --- a/src/ExaMPM_VTKDomainWriter.hpp +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -39,7 +39,7 @@ void writeDomainParallelFile(MPI_Comm comm, int time_step, std::string& basename fprintf(file, "\t\t\n"); fprintf(file, "\t\n"); for(std::size_t i=0; i\n", time_step, i); + fprintf(file, "\t\n", basename.c_str(), time_step, i); fprintf(file, "\n"); fprintf(file, "\n"); fclose(file); From f0144690408b3b3ec9f2d5cc66729fe7fadfdca3 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 13:38:24 +0200 Subject: [PATCH 14/64] to not miss cells, first convert lb vertices to cells, then apply to global grid --- src/ExaMPM_Solver.hpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index c510388..1da1b5e 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -130,12 +130,15 @@ class Solver : public SolverBase _liball->setWork(_pm->numParticle()); _liball->balance(); std::vector> updated_vertices = _liball->getVertices(); + std::array cell_index_lo, cell_index_hi; for(std::size_t d=0; d<3; ++d) - _mesh->mutGlobalGrid().setGlobalOffset(d, - std::rint( updated_vertices.at(0)[d] / _mesh->cellSize() )); + cell_index_lo[d] = std::rint( updated_vertices.at(0)[d] / _mesh->cellSize() ); for(std::size_t d=0; d<3; ++d) - _mesh->mutGlobalGrid().setOwnedNumCell(d, - std::rint( (updated_vertices.at(1)[d] - updated_vertices.at(0)[d])/_mesh->cellSize() )); + cell_index_hi[d] = std::rint( updated_vertices.at(1)[d] / _mesh->cellSize() ); + for(std::size_t d=0; d<3; ++d) + _mesh->mutGlobalGrid().setGlobalOffset(d, cell_index_lo[d]); + for(std::size_t d=0; d<3; ++d) + _mesh->mutGlobalGrid().setOwnedNumCell(d, (cell_index_hi[d]-cell_index_lo[d])); _liball->setVertices(updated_vertices); _pm->updateMesh(_mesh); From cb15e1f871f093777694b5a26d3f9b75c9b1875a Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 13:47:38 +0200 Subject: [PATCH 15/64] ci: include ALL checkout and build --- .github/workflows/CI.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index aad7215..73c8996 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -42,6 +42,15 @@ jobs: cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos" -DCabana_REQUIRE_${{ matrix.backend }}=ON cmake --build build --parallel 2 cmake --install build + - name: Checkout ALL + run: | + git clone --depth 1 --branch v0.9.1 https://gitlab.jsc.fz-juelich.de/SLMS/loadbalancing ALL + - name: Build ALL + working-directory: ALL + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release + cmake --build build --parallel 2 + cmake --install build - name: Checkout code uses: actions/checkout@v2.2.0 - name: Build From 5a9bf54aab3403b7b6cb222c1b4454d6897ad4d8 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 13:49:31 +0200 Subject: [PATCH 16/64] disable ALL internal VTK output for now --- src/ExaMPM_Solver.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 1da1b5e..ef71b9b 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -153,7 +153,7 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); - _liball->printVTKoutlines(t); + //_liball->printVTKoutlines(t); std::array vertices; for(std::size_t d=0; d<3; ++d) vertices[d] = static_cast(_mesh->mutGlobalGrid().globalOffset(d)) * _mesh->cellSize(); From 9d0a2aa5de0422996525d021a4c4e53944ab3bb8 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 18:06:49 +0200 Subject: [PATCH 17/64] move load balancing interface into LoadBalancer --- src/ExaMPM_LoadBalancer.hpp | 115 ++++++++++++++++++++++++++++++++++++ src/ExaMPM_Solver.hpp | 63 ++------------------ 2 files changed, 121 insertions(+), 57 deletions(-) create mode 100644 src/ExaMPM_LoadBalancer.hpp diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp new file mode 100644 index 0000000..1bf729d --- /dev/null +++ b/src/ExaMPM_LoadBalancer.hpp @@ -0,0 +1,115 @@ +/**************************************************************************** + * Copyright (c) 2018-2020 by the ExaMPM authors * + * All rights reserved. * + * * + * This file is part of the ExaMPM library. ExaMPM is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef EXAMPM_LOADBALANCER_HPP +#define EXAMPM_LOADBALANCER_HPP + +#include +#include +#include + +#include + +#include + +#include + +namespace ExaMPM +{ +template +class LoadBalancer +{ + public: + + using mesh_type = Mesh; + // todo(sschulz): Allow for arbitrary dimension + LoadBalancer( MPI_comm comm, + const std::shared_ptr& mesh ) + : _comm( comm ) + , _mesh( mesh ) + { + MPI_Comm_rank( comm, &_rank ); + _liball = std::make_shared>(ALL::TENSOR, 3, 0); + // todo(sschulz): Check if the code still crashes. + // For some reason only(!) the following line causes the code to crash + //auto global_grid = _mesh->localGrid()->globalGrid(); + std::vector block_id(3,0); + for(std::size_t i=0; i<3; ++i) + block_id.at(i) = _mesh->mutGlobalGrid().dimBlockId(i); + std::vector blocks_per_dim(3,0); + for(std::size_t i=0; i<3; ++i) + blocks_per_dim.at(i) = _mesh->mutGlobalGrid().dimNumBlock(i); + _liball->setProcGridParams(block_id, blocks_per_dim); + std::vector min_domain_size(3,0); + for(std::size_t i=0; i<3; ++i) + min_domain_size.at(i) = 3.*_mesh->cellSize(); + _liball->setMinDomainSize(min_domain_size); + _liball->setCommunicator(_comm); + _liball->setProcTag(_rank); + _liball->setup(); + std::vector> lb_vertices(2, ALL::Point(3)); + for(std::size_t d=0; d<3; ++d) + lb_vertices.at(0)[d] = _mesh->localGrid()->globalGrid().globalOffset(d) * _mesh->cellSize(); + for(std::size_t d=0; d<3; ++d) + lb_vertices.at(1)[d] = lb_vertices.at(0)[d] + _mesh->localGrid()->globalGrid().ownedNumCell(d) * _mesh->cellSize(); + _liball->setVertices(lb_vertices); + } + + // This will update the domain decomposition and also update the mesh + void balance(std::shared_ptr> pm ) + { + _liball->setWork(pm->numParticle()); + _liball->balance(); + std::vector> updated_vertices = _liball->getVertices(); + std::array cell_index_lo, cell_index_hi; + for(std::size_t d=0; d<3; ++d) + cell_index_lo[d] = std::rint( updated_vertices.at(0)[d] / _mesh->cellSize() ); + for(std::size_t d=0; d<3; ++d) + cell_index_hi[d] = std::rint( updated_vertices.at(1)[d] / _mesh->cellSize() ); + for(std::size_t d=0; d<3; ++d) + _mesh->mutGlobalGrid().setGlobalOffset(d, cell_index_lo[d]); + for(std::size_t d=0; d<3; ++d) + _mesh->mutGlobalGrid().setOwnedNumCell(d, (cell_index_hi[d]-cell_index_lo[d])); + _liball->setVertices(updated_vertices); + pm->updateMesh(_mesh); + } + + // Output the actual and internal load balancing grid to VTK files + void output(const int t) const + { + std::string vtk_actual_domain_basename("domain_act"); + std::string vtk_lb_domain_basename("domain_lb"); + // todo(sschulz): The official VTK routine seems to create mangled files on my system. + //_liball->printVTKoutlines(t); + std::array vertices; + for(std::size_t d=0; d<3; ++d) + vertices[d] = static_cast(_mesh->mutGlobalGrid().globalOffset(d)) * _mesh->cellSize(); + for(std::size_t d=3; d<6; ++d) + vertices[d] = vertices[d-3] + static_cast(_mesh->mutGlobalGrid().ownedNumCell(d-3)) * _mesh->cellSize(); + VTKDomainWriter::writeDomain(_comm, t, vertices, vtk_actual_domain_basename); + for(std::size_t d=0; d<3; ++d) + vertices[d] = updated_vertices.at(0)[d]; + for(std::size_t d=3; d<6; ++d) + vertices[d] = updated_vertices.at(1)[d-3]; + VTKDomainWriter::writeDomain(_comm, t, vertices, vtk_lb_domain_basename); + } + + + private: + + MPI_Comm _comm; + std::shared_ptr& _mesh; + std::shared_ptr> _liball; + int _rank; + +} +} // end namespace ExaMPM +#endif // EXAMPM_LOADBALANCER_HPP diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index ef71b9b..30e02af 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -17,14 +17,11 @@ #include #include #include -#include +#include #include -#include - #include -#include #include @@ -64,7 +61,6 @@ class Solver : public SolverBase , _gravity( gravity ) , _bc( bc ) , _halo_min( 3 ) - , _comm( comm ) { _mesh = std::make_shared>( global_bounding_box, global_num_cell, periodic, partitioner, @@ -79,29 +75,12 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); - _liball = std::make_shared>(ALL::TENSOR, 3, 0); - // For some reason only(!) the following line causes the code to crash - //auto global_grid = _mesh->localGrid()->globalGrid(); - std::vector block_id(3,0); - for(std::size_t i=0; i<3; ++i) - block_id.at(i) = _mesh->mutGlobalGrid().dimBlockId(i); - std::vector blocks_per_dim(3,0); - for(std::size_t i=0; i<3; ++i) - blocks_per_dim.at(i) = _mesh->mutGlobalGrid().dimNumBlock(i); - _liball->setProcGridParams(block_id, blocks_per_dim); - std::vector min_domain_size(3,0); - for(std::size_t i=0; i<3; ++i) - min_domain_size.at(i) = 3.*_mesh->cellSize(); - _liball->setMinDomainSize(min_domain_size); - _liball->setCommunicator(MPI_COMM_WORLD); - _liball->setProcTag(_rank); - _liball->setup(); + _lb = LoadBalancer(comm, _mesh); + } void solve( const double t_final, const int write_freq ) override { - std::string vtk_actual_domain_basename("domain_act"); - std::string vtk_lb_domain_basename("domain_lb"); SiloParticleWriter::writeTimeStep( _mesh->localGrid()->globalGrid(), 0, @@ -109,13 +88,8 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); + _lb.output(0); - std::vector> lb_vertices(2, ALL::Point(3)); - for(std::size_t d=0; d<3; ++d) - lb_vertices.at(0)[d] = _mesh->localGrid()->globalGrid().globalOffset(d) * _mesh->cellSize(); - for(std::size_t d=0; d<3; ++d) - lb_vertices.at(1)[d] = lb_vertices.at(0)[d] + _mesh->localGrid()->globalGrid().ownedNumCell(d) * _mesh->cellSize(); - _liball->setVertices(lb_vertices); int num_step = t_final / _dt; double delta_t = t_final / num_step; @@ -127,20 +101,7 @@ class Solver : public SolverBase TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); - _liball->setWork(_pm->numParticle()); - _liball->balance(); - std::vector> updated_vertices = _liball->getVertices(); - std::array cell_index_lo, cell_index_hi; - for(std::size_t d=0; d<3; ++d) - cell_index_lo[d] = std::rint( updated_vertices.at(0)[d] / _mesh->cellSize() ); - for(std::size_t d=0; d<3; ++d) - cell_index_hi[d] = std::rint( updated_vertices.at(1)[d] / _mesh->cellSize() ); - for(std::size_t d=0; d<3; ++d) - _mesh->mutGlobalGrid().setGlobalOffset(d, cell_index_lo[d]); - for(std::size_t d=0; d<3; ++d) - _mesh->mutGlobalGrid().setOwnedNumCell(d, (cell_index_hi[d]-cell_index_lo[d])); - _liball->setVertices(updated_vertices); - _pm->updateMesh(_mesh); + _lb.balance(_pm); _pm->communicateParticles( _halo_min ); @@ -153,18 +114,7 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); - //_liball->printVTKoutlines(t); - std::array vertices; - for(std::size_t d=0; d<3; ++d) - vertices[d] = static_cast(_mesh->mutGlobalGrid().globalOffset(d)) * _mesh->cellSize(); - for(std::size_t d=3; d<6; ++d) - vertices[d] = vertices[d-3] + static_cast(_mesh->mutGlobalGrid().ownedNumCell(d-3)) * _mesh->cellSize(); - VTKDomainWriter::writeDomain(_comm, t, vertices, vtk_actual_domain_basename); - for(std::size_t d=0; d<3; ++d) - vertices[d] = updated_vertices.at(0)[d]; - for(std::size_t d=3; d<6; ++d) - vertices[d] = updated_vertices.at(1)[d-3]; - VTKDomainWriter::writeDomain(_comm, t, vertices, vtk_lb_domain_basename); + _lb.output(t); } time += delta_t; @@ -178,7 +128,6 @@ class Solver : public SolverBase double _gravity; BoundaryCondition _bc; int _halo_min; - MPI_Comm _comm; std::shared_ptr> _mesh; std::shared_ptr> _pm; int _rank; From e83988bc8e549eecae569cb148e8297a91181e46 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 23 Jul 2021 18:08:38 +0200 Subject: [PATCH 18/64] remove spurious debug output --- src/ExaMPM_Solver.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 30e02af..9dd5570 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -119,7 +119,6 @@ class Solver : public SolverBase time += delta_t; } - printf("Finished\n"); } private: From 897ff8ad002e0a171db286974ba7c4c680d8bd67 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 16:25:42 +0200 Subject: [PATCH 19/64] fix errors and re-enable vtk output --- src/ExaMPM_LoadBalancer.hpp | 9 +++++---- src/ExaMPM_Solver.hpp | 10 +++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index 1bf729d..0be07fd 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -31,7 +31,7 @@ class LoadBalancer using mesh_type = Mesh; // todo(sschulz): Allow for arbitrary dimension - LoadBalancer( MPI_comm comm, + LoadBalancer( MPI_Comm comm, const std::shared_ptr& mesh ) : _comm( comm ) , _mesh( mesh ) @@ -87,8 +87,9 @@ class LoadBalancer { std::string vtk_actual_domain_basename("domain_act"); std::string vtk_lb_domain_basename("domain_lb"); + std::vector> updated_vertices = _liball->getVertices(); // todo(sschulz): The official VTK routine seems to create mangled files on my system. - //_liball->printVTKoutlines(t); + _liball->printVTKoutlines(t); std::array vertices; for(std::size_t d=0; d<3; ++d) vertices[d] = static_cast(_mesh->mutGlobalGrid().globalOffset(d)) * _mesh->cellSize(); @@ -106,10 +107,10 @@ class LoadBalancer private: MPI_Comm _comm; - std::shared_ptr& _mesh; + std::shared_ptr _mesh; std::shared_ptr> _liball; int _rank; -} +}; } // end namespace ExaMPM #endif // EXAMPM_LOADBALANCER_HPP diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 9dd5570..d3531c6 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -75,7 +75,7 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); - _lb = LoadBalancer(comm, _mesh); + _lb = std::make_shared>(comm, _mesh); } @@ -88,7 +88,7 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); - _lb.output(0); + _lb->output(0); int num_step = t_final / _dt; @@ -101,7 +101,7 @@ class Solver : public SolverBase TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); - _lb.balance(_pm); + _lb->balance(_pm); _pm->communicateParticles( _halo_min ); @@ -114,7 +114,7 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); - _lb.output(t); + _lb->output(t); } time += delta_t; @@ -130,7 +130,7 @@ class Solver : public SolverBase std::shared_ptr> _mesh; std::shared_ptr> _pm; int _rank; - std::shared_ptr> _liball; + std::shared_ptr> _lb; }; //---------------------------------------------------------------------------// From 41fe6927ac7b3c68dc6bf6a4d51c576a6e959a7b Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 16:26:11 +0200 Subject: [PATCH 20/64] change decomposition to y direction --- examples/dam_break.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/dam_break.cpp b/examples/dam_break.cpp index 8111f64..0a5ee60 100644 --- a/examples/dam_break.cpp +++ b/examples/dam_break.cpp @@ -90,7 +90,7 @@ void damBreak( const double cell_size, // little movement in Y. int comm_size; MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); - std::array ranks_per_dim = { 1, 1, comm_size }; + std::array ranks_per_dim = { 1, comm_size, 1 }; Cajita::ManualPartitioner partitioner( ranks_per_dim ); // Material properties. From 9ab5cf116918bd57047a8a451a3d4cbf4546ee8a Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 16:27:51 +0200 Subject: [PATCH 21/64] also enable ALL vertex output --- src/ExaMPM_LoadBalancer.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index 0be07fd..80ad390 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -90,6 +90,7 @@ class LoadBalancer std::vector> updated_vertices = _liball->getVertices(); // todo(sschulz): The official VTK routine seems to create mangled files on my system. _liball->printVTKoutlines(t); + _liball->printVTKvertices(t); std::array vertices; for(std::size_t d=0; d<3; ++d) vertices[d] = static_cast(_mesh->mutGlobalGrid().globalOffset(d)) * _mesh->cellSize(); From 25e21aaf3738ce05e93aa3cb7ab9c12649d36419 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 16:28:52 +0200 Subject: [PATCH 22/64] Revert "also enable ALL vertex output" This reverts commit 9ab5cf116918bd57047a8a451a3d4cbf4546ee8a. --- src/ExaMPM_LoadBalancer.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index 80ad390..0be07fd 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -90,7 +90,6 @@ class LoadBalancer std::vector> updated_vertices = _liball->getVertices(); // todo(sschulz): The official VTK routine seems to create mangled files on my system. _liball->printVTKoutlines(t); - _liball->printVTKvertices(t); std::array vertices; for(std::size_t d=0; d<3; ++d) vertices[d] = static_cast(_mesh->mutGlobalGrid().globalOffset(d)) * _mesh->cellSize(); From 64b55bfc25a7867d1c35103f9e189abd21fbefc7 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 16:43:34 +0200 Subject: [PATCH 23/64] try new CI workflow to update apt-get --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 73c8996..2c2efcf 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Install deps - run: sudo apt-get install libsilo-dev openmpi-bin libopenmpi-dev + run: sudo apt-get update && sudo apt-get install libsilo-dev openmpi-bin libopenmpi-dev - name: Checkout kokkos uses: actions/checkout@v2.2.0 with: From 9a7bda161e45a24c3d2728958d9c17e235e5c334 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Mon, 26 Jul 2021 09:29:07 -0600 Subject: [PATCH 24/64] CI: install ALL in home --- .github/workflows/CI.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2c2efcf..21ccd13 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -48,13 +48,13 @@ jobs: - name: Build ALL working-directory: ALL run: | - cmake -B build -DCMAKE_BUILD_TYPE=Release + cmake -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$HOME/ALL cmake --build build --parallel 2 cmake --install build - name: Checkout code uses: actions/checkout@v2.2.0 - name: Build run: | - cmake -B build -DCMAKE_PREFIX_PATH="$HOME/kokkos;$HOME/Cabana" + cmake -B build -DCMAKE_PREFIX_PATH="$HOME/kokkos;$HOME/Cabana;$HOME/ALL" cmake --build build --parallel 2 cd build && ctest --output-on-failure From 5c11bf11868b6f4818d5d1f20c3adfe8fc5c490e Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 17:41:11 +0200 Subject: [PATCH 25/64] use more recent ALL version --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 21ccd13..65349a7 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -44,7 +44,7 @@ jobs: cmake --install build - name: Checkout ALL run: | - git clone --depth 1 --branch v0.9.1 https://gitlab.jsc.fz-juelich.de/SLMS/loadbalancing ALL + git clone --depth 1 --branch master https://gitlab.jsc.fz-juelich.de/SLMS/loadbalancing ALL - name: Build ALL working-directory: ALL run: | From d53e661093da8fe2354c6f681085c644942bb909 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 17:54:15 +0200 Subject: [PATCH 26/64] remove unneeded methods --- src/ExaMPM_Mesh.hpp | 61 +++---------------------------------------- src/ExaMPM_Solver.hpp | 4 +-- 2 files changed, 5 insertions(+), 60 deletions(-) diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index b8d9453..75ccb30 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -40,17 +40,10 @@ class Mesh Mesh( const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::ManualBlockPartitioner<3>& partitioner, //todo(sschulz): should work with Cajita::BlockPartitioner<3>! + const Cajita::BlockPartitioner<3>& partitioner, const int halo_cell_width, const int minimum_halo_cell_width, MPI_Comm comm ) - : _global_bounding_box( global_bounding_box ) - , _global_num_cell( global_num_cell ) - , _periodic( periodic ) - , _partitioner( partitioner ) - , _halo_cell_width( halo_cell_width ) - , _minimum_halo_cell_width( minimum_halo_cell_width ) - , _comm( comm ) { // Make a copy of the global number of cells so we can modify it. std::array num_cell = global_num_cell; @@ -106,51 +99,13 @@ class Mesh global_low_corner, global_high_corner, num_cell ); // Build the global grid. - _global_grid = Cajita::createGlobalGrid( + auto global_grid = Cajita::createGlobalGrid( comm, global_mesh, periodic, partitioner ); // Build the local grid. int halo_width = std::max( minimum_halo_cell_width, halo_cell_width ); - _local_grid = Cajita::createLocalGrid( _global_grid, halo_width ); + _local_grid = Cajita::createLocalGrid( global_grid, halo_width ); } - - void updateGeometry( - std::array& local_offset, - std::array& local_num_cell ) - { - // Create global mesh bounds. - std::array global_low_corner = { _global_bounding_box[0], - _global_bounding_box[1], - _global_bounding_box[2] }; - std::array global_high_corner = { _global_bounding_box[3], - _global_bounding_box[4], - _global_bounding_box[5] }; - // For dimensions that are not periodic we pad by the minimum halo - // cell width to allow for projections outside of the domain. - double cell_size = _local_grid->globalGrid().globalMesh().cellSize(0); - for ( int d = 0; d < 3; ++d ) - { - if ( !_periodic[d] ) - { - global_low_corner[d] -= cell_size*_minimum_halo_cell_width; - global_high_corner[d] += cell_size*_minimum_halo_cell_width; - } - } - - // Create the global mesh. - auto global_mesh = Cajita::createUniformGlobalMesh( - global_low_corner, global_high_corner, _global_num_cell ); - - // Build the global grid. - _global_grid = Cajita::createGlobalGrid( - _comm, global_mesh, _periodic, _partitioner, local_offset, local_num_cell ); - - // Build the local grid. - int halo_width = std::max( _minimum_halo_cell_width, _halo_cell_width ); - _local_grid = Cajita::createLocalGrid( _global_grid, halo_width ); - // todo(sschulz): Can we just reassing without causing a memory leak? - } - // Get the local grid. const std::shared_ptr>>& localGrid() const @@ -184,18 +139,8 @@ class Mesh public: - Kokkos::Array _global_bounding_box; - std::array _global_num_cell; - std::array _periodic; - Cajita::ManualBlockPartitioner<3> _partitioner; - int _halo_cell_width; - int _minimum_halo_cell_width; - MPI_Comm _comm; - std::shared_ptr< Cajita::LocalGrid>> _local_grid; - std::shared_ptr< - Cajita::GlobalGrid>> _global_grid; Kokkos::Array _min_domain_global_node_index; Kokkos::Array _max_domain_global_node_index; diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index d3531c6..4cc3197 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -46,7 +46,7 @@ class Solver : public SolverBase const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::ManualBlockPartitioner<3>& partitioner, //todo(sschulz): should work with Cajita::BlockPartitioner<3>! + const Cajita::BlockPartitioner<3>& partitioner, const int halo_cell_width, const InitFunc& create_functor, const int particles_per_cell, @@ -142,7 +142,7 @@ createSolver( const std::string& device, const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::ManualBlockPartitioner<3>& partitioner, + const Cajita::BlockPartitioner<3>& partitioner, const int halo_cell_width, const InitFunc& create_functor, const int particles_per_cell, From 662eae5fa246b23b3fe16e64ad1dbe05c67cf533 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 18:02:38 +0200 Subject: [PATCH 27/64] remove trailing whitespace --- src/ExaMPM_Mesh.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index 75ccb30..c3748b4 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -106,7 +106,7 @@ class Mesh int halo_width = std::max( minimum_halo_cell_width, halo_cell_width ); _local_grid = Cajita::createLocalGrid( global_grid, halo_width ); } - + // Get the local grid. const std::shared_ptr>>& localGrid() const { From c1e53ccc1e2956eedc36346acb3038b7ad7ce29d Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 18:04:56 +0200 Subject: [PATCH 28/64] run clang-format --- .clang-format | 10 + src/ExaMPM_BoundaryConditions.hpp | 10 +- src/ExaMPM_DenseLinearAlgebra.hpp | 78 ++++---- src/ExaMPM_LoadBalancer.hpp | 131 +++++++------ src/ExaMPM_Mesh.hpp | 61 +++--- src/ExaMPM_ParticleInit.hpp | 93 +++++---- src/ExaMPM_ProblemManager.hpp | 281 ++++++++++++--------------- src/ExaMPM_SiloParticleWriter.hpp | 278 ++++++++++++-------------- src/ExaMPM_Solver.hpp | 183 ++++++----------- src/ExaMPM_TimeIntegrator.hpp | 185 +++++++++--------- src/ExaMPM_VTKDomainWriter.hpp | 174 +++++++++-------- src/ExaMPM_VelocityInterpolation.hpp | 140 ++++++------- 12 files changed, 754 insertions(+), 870 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..b8c0e2c --- /dev/null +++ b/.clang-format @@ -0,0 +1,10 @@ +BasedOnStyle: LLVM +--- +Language: Cpp +AlwaysBreakTemplateDeclarations: true +BreakBeforeBraces: Allman +BinPackParameters: true +IndentWidth: 4 +SpacesInParentheses: true +BreakConstructorInitializersBeforeComma: true +PointerAlignment: Left diff --git a/src/ExaMPM_BoundaryConditions.hpp b/src/ExaMPM_BoundaryConditions.hpp index f75d0b8..2c856aa 100644 --- a/src/ExaMPM_BoundaryConditions.hpp +++ b/src/ExaMPM_BoundaryConditions.hpp @@ -31,8 +31,8 @@ struct BoundaryType struct BoundaryCondition { KOKKOS_INLINE_FUNCTION - void operator()( const int gi, const int gj, const int gk, - double& ux, double& uy, double& uz ) const + void operator()( const int gi, const int gj, const int gk, double& ux, + double& uy, double& uz ) const { // Low x if ( gi <= min[0] ) @@ -125,9 +125,9 @@ struct BoundaryCondition } } - Kokkos::Array boundary; - Kokkos::Array min; - Kokkos::Array max; + Kokkos::Array boundary; + Kokkos::Array min; + Kokkos::Array max; }; //---------------------------------------------------------------------------// diff --git a/src/ExaMPM_DenseLinearAlgebra.hpp b/src/ExaMPM_DenseLinearAlgebra.hpp index 4ba0fcf..12586e3 100644 --- a/src/ExaMPM_DenseLinearAlgebra.hpp +++ b/src/ExaMPM_DenseLinearAlgebra.hpp @@ -20,45 +20,39 @@ namespace DenseLinearAlgebra { //---------------------------------------------------------------------------// // Compute the determinant of a 3x3 matrix. -template -KOKKOS_INLINE_FUNCTION -Real determinant( const Real m[3][3] ) +template +KOKKOS_INLINE_FUNCTION Real determinant( const Real m[3][3] ) { - return - m[0][0] * m[1][1] * m[2][2] + - m[0][1] * m[1][2] * m[2][0] + - m[0][2] * m[1][0] * m[2][1] - - m[0][2] * m[1][1] * m[2][0] - - m[0][1] * m[1][0] * m[2][2] - - m[0][0] * m[1][2] * m[2][1]; + return m[0][0] * m[1][1] * m[2][2] + m[0][1] * m[1][2] * m[2][0] + + m[0][2] * m[1][0] * m[2][1] - m[0][2] * m[1][1] * m[2][0] - + m[0][1] * m[1][0] * m[2][2] - m[0][0] * m[1][2] * m[2][1]; } //---------------------------------------------------------------------------// // Compute the inverse of a 3x3 matrix with a precomputed determinant. -template -KOKKOS_INLINE_FUNCTION -void inverse( const Real m[3][3], Real det_m, Real m_inv[3][3] ) +template +KOKKOS_INLINE_FUNCTION void inverse( const Real m[3][3], Real det_m, + Real m_inv[3][3] ) { Real det_m_inv = 1.0 / det_m; - m_inv[0][0] = (m[1][1]*m[2][2] - m[1][2]*m[2][1]) * det_m_inv; - m_inv[0][1] = (m[0][2]*m[2][1] - m[0][1]*m[2][2]) * det_m_inv; - m_inv[0][2] = (m[0][1]*m[1][2] - m[0][2]*m[1][1]) * det_m_inv; + m_inv[0][0] = ( m[1][1] * m[2][2] - m[1][2] * m[2][1] ) * det_m_inv; + m_inv[0][1] = ( m[0][2] * m[2][1] - m[0][1] * m[2][2] ) * det_m_inv; + m_inv[0][2] = ( m[0][1] * m[1][2] - m[0][2] * m[1][1] ) * det_m_inv; - m_inv[1][0] = (m[1][2]*m[2][0] - m[1][0]*m[2][2]) * det_m_inv; - m_inv[1][1] = (m[0][0]*m[2][2] - m[0][2]*m[2][0]) * det_m_inv; - m_inv[1][2] = (m[0][2]*m[1][0] - m[0][0]*m[1][2]) * det_m_inv; + m_inv[1][0] = ( m[1][2] * m[2][0] - m[1][0] * m[2][2] ) * det_m_inv; + m_inv[1][1] = ( m[0][0] * m[2][2] - m[0][2] * m[2][0] ) * det_m_inv; + m_inv[1][2] = ( m[0][2] * m[1][0] - m[0][0] * m[1][2] ) * det_m_inv; - m_inv[2][0] = (m[1][0]*m[2][1] - m[1][1]*m[2][0]) * det_m_inv; - m_inv[2][1] = (m[0][1]*m[2][0] - m[0][0]*m[2][1]) * det_m_inv; - m_inv[2][2] = (m[0][0]*m[1][1] - m[0][1]*m[1][0]) * det_m_inv; + m_inv[2][0] = ( m[1][0] * m[2][1] - m[1][1] * m[2][0] ) * det_m_inv; + m_inv[2][1] = ( m[0][1] * m[2][0] - m[0][0] * m[2][1] ) * det_m_inv; + m_inv[2][2] = ( m[0][0] * m[1][1] - m[0][1] * m[1][0] ) * det_m_inv; } //---------------------------------------------------------------------------// // Compute the inverse of a 3x3 matrix. -template -KOKKOS_INLINE_FUNCTION -void inverse( const Real m[3][3], Real m_inv[3][3] ) +template +KOKKOS_INLINE_FUNCTION void inverse( const Real m[3][3], Real m_inv[3][3] ) { Real det_m = determinant( m ); inverse( m, det_m, m_inv ); @@ -66,9 +60,9 @@ void inverse( const Real m[3][3], Real m_inv[3][3] ) //---------------------------------------------------------------------------// // Matrix vector multiply. A*x = y -template -KOKKOS_INLINE_FUNCTION -void matVecMultiply( const Real a[3][3], const Real x[3], Real y[3] ) +template +KOKKOS_INLINE_FUNCTION void matVecMultiply( const Real a[3][3], const Real x[3], + Real y[3] ) { for ( int i = 0; i < 3; ++i ) { @@ -80,32 +74,32 @@ void matVecMultiply( const Real a[3][3], const Real x[3], Real y[3] ) //---------------------------------------------------------------------------// // Matrix Matrix multiply. A*B = C -template -KOKKOS_INLINE_FUNCTION -void matMatMultiply( const Real a[3][3], const Real b[3][3], Real c[3][3] ) +template +KOKKOS_INLINE_FUNCTION void matMatMultiply( const Real a[3][3], + const Real b[3][3], Real c[3][3] ) { - for ( int i = 0; i < 3; ++i ) - { + for ( int i = 0; i < 3; ++i ) + { for ( int j = 0; j < 3; ++j ) { c[i][j] = 0.0; for ( int k = 0; k < 3; ++k ) c[i][j] += a[i][k] * b[k][j]; } - } + } } //---------------------------------------------------------------------------// // Transpose Matrix A^{T} -template -KOKKOS_INLINE_FUNCTION -void transpose( const Real a[3][3], Real transpose_a[3][3] ) +template +KOKKOS_INLINE_FUNCTION void transpose( const Real a[3][3], + Real transpose_a[3][3] ) { - for ( int i = 0; i < 3; ++i ) - { - for ( int j = 0; j < 3; ++j ) - transpose_a[i][j] = a[j][i]; - } + for ( int i = 0; i < 3; ++i ) + { + for ( int j = 0; j < 3; ++j ) + transpose_a[i][j] = a[j][i]; + } } //---------------------------------------------------------------------------// diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index 0be07fd..06e4490 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -12,8 +12,8 @@ #ifndef EXAMPM_LOADBALANCER_HPP #define EXAMPM_LOADBALANCER_HPP -#include #include +#include #include #include @@ -24,93 +24,108 @@ namespace ExaMPM { -template +template class LoadBalancer { public: - using mesh_type = Mesh; // todo(sschulz): Allow for arbitrary dimension - LoadBalancer( MPI_Comm comm, - const std::shared_ptr& mesh ) + LoadBalancer( MPI_Comm comm, const std::shared_ptr& mesh ) : _comm( comm ) , _mesh( mesh ) { MPI_Comm_rank( comm, &_rank ); - _liball = std::make_shared>(ALL::TENSOR, 3, 0); + _liball = + std::make_shared>( ALL::TENSOR, 3, 0 ); // todo(sschulz): Check if the code still crashes. // For some reason only(!) the following line causes the code to crash - //auto global_grid = _mesh->localGrid()->globalGrid(); - std::vector block_id(3,0); - for(std::size_t i=0; i<3; ++i) - block_id.at(i) = _mesh->mutGlobalGrid().dimBlockId(i); - std::vector blocks_per_dim(3,0); - for(std::size_t i=0; i<3; ++i) - blocks_per_dim.at(i) = _mesh->mutGlobalGrid().dimNumBlock(i); - _liball->setProcGridParams(block_id, blocks_per_dim); - std::vector min_domain_size(3,0); - for(std::size_t i=0; i<3; ++i) - min_domain_size.at(i) = 3.*_mesh->cellSize(); - _liball->setMinDomainSize(min_domain_size); - _liball->setCommunicator(_comm); - _liball->setProcTag(_rank); + // auto global_grid = _mesh->localGrid()->globalGrid(); + std::vector block_id( 3, 0 ); + for ( std::size_t i = 0; i < 3; ++i ) + block_id.at( i ) = _mesh->mutGlobalGrid().dimBlockId( i ); + std::vector blocks_per_dim( 3, 0 ); + for ( std::size_t i = 0; i < 3; ++i ) + blocks_per_dim.at( i ) = _mesh->mutGlobalGrid().dimNumBlock( i ); + _liball->setProcGridParams( block_id, blocks_per_dim ); + std::vector min_domain_size( 3, 0 ); + for ( std::size_t i = 0; i < 3; ++i ) + min_domain_size.at( i ) = 3. * _mesh->cellSize(); + _liball->setMinDomainSize( min_domain_size ); + _liball->setCommunicator( _comm ); + _liball->setProcTag( _rank ); _liball->setup(); - std::vector> lb_vertices(2, ALL::Point(3)); - for(std::size_t d=0; d<3; ++d) - lb_vertices.at(0)[d] = _mesh->localGrid()->globalGrid().globalOffset(d) * _mesh->cellSize(); - for(std::size_t d=0; d<3; ++d) - lb_vertices.at(1)[d] = lb_vertices.at(0)[d] + _mesh->localGrid()->globalGrid().ownedNumCell(d) * _mesh->cellSize(); - _liball->setVertices(lb_vertices); + std::vector> lb_vertices( 2, + ALL::Point( 3 ) ); + for ( std::size_t d = 0; d < 3; ++d ) + lb_vertices.at( 0 )[d] = + _mesh->localGrid()->globalGrid().globalOffset( d ) * + _mesh->cellSize(); + for ( std::size_t d = 0; d < 3; ++d ) + lb_vertices.at( 1 )[d] = + lb_vertices.at( 0 )[d] + + _mesh->localGrid()->globalGrid().ownedNumCell( d ) * + _mesh->cellSize(); + _liball->setVertices( lb_vertices ); } // This will update the domain decomposition and also update the mesh - void balance(std::shared_ptr> pm ) + void balance( std::shared_ptr> pm ) { - _liball->setWork(pm->numParticle()); + _liball->setWork( pm->numParticle() ); _liball->balance(); - std::vector> updated_vertices = _liball->getVertices(); + std::vector> updated_vertices = + _liball->getVertices(); std::array cell_index_lo, cell_index_hi; - for(std::size_t d=0; d<3; ++d) - cell_index_lo[d] = std::rint( updated_vertices.at(0)[d] / _mesh->cellSize() ); - for(std::size_t d=0; d<3; ++d) - cell_index_hi[d] = std::rint( updated_vertices.at(1)[d] / _mesh->cellSize() ); - for(std::size_t d=0; d<3; ++d) - _mesh->mutGlobalGrid().setGlobalOffset(d, cell_index_lo[d]); - for(std::size_t d=0; d<3; ++d) - _mesh->mutGlobalGrid().setOwnedNumCell(d, (cell_index_hi[d]-cell_index_lo[d])); - _liball->setVertices(updated_vertices); - pm->updateMesh(_mesh); + for ( std::size_t d = 0; d < 3; ++d ) + cell_index_lo[d] = + std::rint( updated_vertices.at( 0 )[d] / _mesh->cellSize() ); + for ( std::size_t d = 0; d < 3; ++d ) + cell_index_hi[d] = + std::rint( updated_vertices.at( 1 )[d] / _mesh->cellSize() ); + for ( std::size_t d = 0; d < 3; ++d ) + _mesh->mutGlobalGrid().setGlobalOffset( d, cell_index_lo[d] ); + for ( std::size_t d = 0; d < 3; ++d ) + _mesh->mutGlobalGrid().setOwnedNumCell( + d, ( cell_index_hi[d] - cell_index_lo[d] ) ); + _liball->setVertices( updated_vertices ); + pm->updateMesh( _mesh ); } // Output the actual and internal load balancing grid to VTK files - void output(const int t) const + void output( const int t ) const { - std::string vtk_actual_domain_basename("domain_act"); - std::string vtk_lb_domain_basename("domain_lb"); - std::vector> updated_vertices = _liball->getVertices(); - // todo(sschulz): The official VTK routine seems to create mangled files on my system. - _liball->printVTKoutlines(t); + std::string vtk_actual_domain_basename( "domain_act" ); + std::string vtk_lb_domain_basename( "domain_lb" ); + std::vector> updated_vertices = + _liball->getVertices(); + // todo(sschulz): The official VTK routine seems to create mangled files + // on my system. + _liball->printVTKoutlines( t ); std::array vertices; - for(std::size_t d=0; d<3; ++d) - vertices[d] = static_cast(_mesh->mutGlobalGrid().globalOffset(d)) * _mesh->cellSize(); - for(std::size_t d=3; d<6; ++d) - vertices[d] = vertices[d-3] + static_cast(_mesh->mutGlobalGrid().ownedNumCell(d-3)) * _mesh->cellSize(); - VTKDomainWriter::writeDomain(_comm, t, vertices, vtk_actual_domain_basename); - for(std::size_t d=0; d<3; ++d) - vertices[d] = updated_vertices.at(0)[d]; - for(std::size_t d=3; d<6; ++d) - vertices[d] = updated_vertices.at(1)[d-3]; - VTKDomainWriter::writeDomain(_comm, t, vertices, vtk_lb_domain_basename); + for ( std::size_t d = 0; d < 3; ++d ) + vertices[d] = static_cast( + _mesh->mutGlobalGrid().globalOffset( d ) ) * + _mesh->cellSize(); + for ( std::size_t d = 3; d < 6; ++d ) + vertices[d] = vertices[d - 3] + + static_cast( + _mesh->mutGlobalGrid().ownedNumCell( d - 3 ) ) * + _mesh->cellSize(); + VTKDomainWriter::writeDomain( _comm, t, vertices, + vtk_actual_domain_basename ); + for ( std::size_t d = 0; d < 3; ++d ) + vertices[d] = updated_vertices.at( 0 )[d]; + for ( std::size_t d = 3; d < 6; ++d ) + vertices[d] = updated_vertices.at( 1 )[d - 3]; + VTKDomainWriter::writeDomain( _comm, t, vertices, + vtk_lb_domain_basename ); } - private: - MPI_Comm _comm; std::shared_ptr _mesh; std::shared_ptr> _liball; int _rank; - }; } // end namespace ExaMPM #endif // EXAMPM_LOADBALANCER_HPP diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index c3748b4..c921a4a 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -29,29 +29,26 @@ namespace ExaMPM \class Mesh \brief Logically and spatially uniform Cartesian mesh. */ -template +template class Mesh { public: - using memory_space = MemorySpace; // Construct a mesh. - Mesh( const Kokkos::Array& global_bounding_box, - const std::array& global_num_cell, - const std::array& periodic, + Mesh( const Kokkos::Array& global_bounding_box, + const std::array& global_num_cell, + const std::array& periodic, const Cajita::BlockPartitioner<3>& partitioner, - const int halo_cell_width, - const int minimum_halo_cell_width, + const int halo_cell_width, const int minimum_halo_cell_width, MPI_Comm comm ) { // Make a copy of the global number of cells so we can modify it. - std::array num_cell = global_num_cell; + std::array num_cell = global_num_cell; // Compute the cell size. double cell_size = - (global_bounding_box[3] - global_bounding_box[0]) / - num_cell[0]; + ( global_bounding_box[3] - global_bounding_box[0] ) / num_cell[0]; // Because the mesh is uniform check that the domain is evenly // divisible by the cell size in each dimension within round-off @@ -59,21 +56,20 @@ class Mesh for ( int d = 0; d < 3; ++d ) { double extent = num_cell[d] * cell_size; - if ( std::abs( - extent - (global_bounding_box[d+3]- - global_bounding_box[d]) ) > + if ( std::abs( extent - ( global_bounding_box[d + 3] - + global_bounding_box[d] ) ) > double( 10.0 ) * std::numeric_limits::epsilon() ) throw std::logic_error( "Extent not evenly divisible by uniform cell size" ); } // Create global mesh bounds. - std::array global_low_corner = { global_bounding_box[0], - global_bounding_box[1], - global_bounding_box[2] }; - std::array global_high_corner = { global_bounding_box[3], - global_bounding_box[4], - global_bounding_box[5] }; + std::array global_low_corner = { global_bounding_box[0], + global_bounding_box[1], + global_bounding_box[2] }; + std::array global_high_corner = { global_bounding_box[3], + global_bounding_box[4], + global_bounding_box[5] }; for ( int d = 0; d < 3; ++d ) { _min_domain_global_node_index[d] = 0; @@ -86,9 +82,9 @@ class Mesh { if ( !periodic[d] ) { - global_low_corner[d] -= cell_size*minimum_halo_cell_width; - global_high_corner[d] += cell_size*minimum_halo_cell_width; - num_cell[d] += 2*minimum_halo_cell_width; + global_low_corner[d] -= cell_size * minimum_halo_cell_width; + global_high_corner[d] += cell_size * minimum_halo_cell_width; + num_cell[d] += 2 * minimum_halo_cell_width; _min_domain_global_node_index[d] += minimum_halo_cell_width; _max_domain_global_node_index[d] -= minimum_halo_cell_width; } @@ -99,8 +95,8 @@ class Mesh global_low_corner, global_high_corner, num_cell ); // Build the global grid. - auto global_grid = Cajita::createGlobalGrid( - comm, global_mesh, periodic, partitioner ); + auto global_grid = Cajita::createGlobalGrid( comm, global_mesh, + periodic, partitioner ); // Build the local grid. int halo_width = std::max( minimum_halo_cell_width, halo_cell_width ); @@ -108,7 +104,8 @@ class Mesh } // Get the local grid. - const std::shared_ptr>>& localGrid() const + const std::shared_ptr>>& + localGrid() const { return _local_grid; } @@ -122,28 +119,26 @@ class Mesh // Get the cell size. double cellSize() const { - return _local_grid->globalGrid().globalMesh().cellSize(0); + return _local_grid->globalGrid().globalMesh().cellSize( 0 ); } // Get the minimum node index in the domain. - Kokkos::Array minDomainGlobalNodeIndex() const + Kokkos::Array minDomainGlobalNodeIndex() const { return _min_domain_global_node_index; } // Get the maximum node index in the domain. - Kokkos::Array maxDomainGlobalNodeIndex() const + Kokkos::Array maxDomainGlobalNodeIndex() const { return _max_domain_global_node_index; } public: + std::shared_ptr>> _local_grid; - std::shared_ptr< - Cajita::LocalGrid>> _local_grid; - - Kokkos::Array _min_domain_global_node_index; - Kokkos::Array _max_domain_global_node_index; + Kokkos::Array _min_domain_global_node_index; + Kokkos::Array _max_domain_global_node_index; }; //---------------------------------------------------------------------------// diff --git a/src/ExaMPM_ParticleInit.hpp b/src/ExaMPM_ParticleInit.hpp index 478bfb0..8b29b61 100644 --- a/src/ExaMPM_ParticleInit.hpp +++ b/src/ExaMPM_ParticleInit.hpp @@ -14,8 +14,8 @@ #include -#include #include +#include #include #include @@ -24,7 +24,7 @@ namespace ExaMPM { //---------------------------------------------------------------------------// // Filter out empty particles that weren't created. -template +template void filterEmpties( const ExecutionSpace& exec_space, const int local_num_create, const CreationView& particle_created, @@ -34,35 +34,37 @@ void filterEmpties( const ExecutionSpace& exec_space, // Determine the empty particle positions in the compaction zone. int num_particles = particles.size(); - Kokkos::View empties( - Kokkos::ViewAllocateWithoutInitializing("empties"), - std::min(num_particles-local_num_create,local_num_create) ); + Kokkos::View empties( + Kokkos::ViewAllocateWithoutInitializing( "empties" ), + std::min( num_particles - local_num_create, local_num_create ) ); Kokkos::parallel_scan( - Kokkos::RangePolicy(exec_space,0,local_num_create), - KOKKOS_LAMBDA( const int i, int& count, const bool final_pass ){ - if ( !particle_created(i) ) + Kokkos::RangePolicy( exec_space, 0, local_num_create ), + KOKKOS_LAMBDA( const int i, int& count, const bool final_pass ) { + if ( !particle_created( i ) ) { if ( final_pass ) { - empties(count) = i; + empties( count ) = i; } ++count; } - }); + } ); // Compact the list so the it only has real particles. Kokkos::parallel_scan( - Kokkos::RangePolicy(exec_space,local_num_create,num_particles), - KOKKOS_LAMBDA( const int i, int& count, const bool final_pass ){ - if ( particle_created(i) ) + Kokkos::RangePolicy( exec_space, local_num_create, + num_particles ), + KOKKOS_LAMBDA( const int i, int& count, const bool final_pass ) { + if ( particle_created( i ) ) { if ( final_pass ) { - particles.setTuple( empties(count), particles.getTuple(i) ); + particles.setTuple( empties( count ), + particles.getTuple( i ) ); } ++count; } - }); + } ); particles.resize( local_num_create ); particles.shrinkToFit(); } @@ -97,7 +99,8 @@ void filterEmpties( const ExecutionSpace& exec_space, filled with particles and resized to a size equal to the number of particles created. */ -template +template void initializeParticles( const ExecSpace& exec_space, const LocalGridType& local_grid, const int particles_per_cell_dim, @@ -119,15 +122,14 @@ void initializeParticles( const ExecSpace& exec_space, // Allocate enough space for the case the particles consume the entire // local grid. - int particles_per_cell = particles_per_cell_dim * - particles_per_cell_dim * + int particles_per_cell = particles_per_cell_dim * particles_per_cell_dim * particles_per_cell_dim; int num_particles = particles_per_cell * owned_cells.size(); particles.resize( num_particles ); // Creation status. - auto particle_created = Kokkos::View( - Kokkos::ViewAllocateWithoutInitializing("particle_created"), + auto particle_created = Kokkos::View( + Kokkos::ViewAllocateWithoutInitializing( "particle_created" ), num_particles ); // Initialize particles. @@ -135,29 +137,33 @@ void initializeParticles( const ExecSpace& exec_space, Kokkos::parallel_reduce( "init_particles_uniform", Cajita::createExecutionPolicy( owned_cells, exec_space ), - KOKKOS_LAMBDA( const int i, const int j, const int k, int& create_count ){ + KOKKOS_LAMBDA( const int i, const int j, const int k, + int& create_count ) { // Compute the owned local cell id. - int i_own = i - owned_cells.min(Dim::I); - int j_own = j - owned_cells.min(Dim::J); - int k_own = k - owned_cells.min(Dim::K); - int cell_id = i_own + owned_cells.extent(Dim::I) * ( - j_own + k_own * owned_cells.extent(Dim::J) ); + int i_own = i - owned_cells.min( Dim::I ); + int j_own = j - owned_cells.min( Dim::J ); + int k_own = k - owned_cells.min( Dim::K ); + int cell_id = + i_own + owned_cells.extent( Dim::I ) * + ( j_own + k_own * owned_cells.extent( Dim::J ) ); // Get the coordinates of the low cell node. - int low_node[3] = {i,j,k}; + int low_node[3] = { i, j, k }; double low_coords[3]; local_mesh.coordinates( Cajita::Node(), low_node, low_coords ); // Get the coordinates of the high cell node. - int high_node[3] = {i+1,j+1,k+1}; + int high_node[3] = { i + 1, j + 1, k + 1 }; double high_coords[3]; local_mesh.coordinates( Cajita::Node(), high_node, high_coords ); // Compute the particle spacing in each dimension. - double spacing[3] = - { (high_coords[Dim::I] - low_coords[Dim::I]) / particles_per_cell_dim, - (high_coords[Dim::J] - low_coords[Dim::J]) / particles_per_cell_dim, - (high_coords[Dim::K] - low_coords[Dim::K]) / particles_per_cell_dim }; + double spacing[3] = { ( high_coords[Dim::I] - low_coords[Dim::I] ) / + particles_per_cell_dim, + ( high_coords[Dim::J] - low_coords[Dim::J] ) / + particles_per_cell_dim, + ( high_coords[Dim::K] - low_coords[Dim::K] ) / + particles_per_cell_dim }; // Particle coordinate. double px[3]; @@ -171,23 +177,24 @@ void initializeParticles( const ExecSpace& exec_space, for ( int kp = 0; kp < particles_per_cell_dim; ++kp ) { // Local particle id. - int pid = cell_id * particles_per_cell + - ip + particles_per_cell_dim * ( - jp + particles_per_cell_dim * kp ); + int pid = cell_id * particles_per_cell + ip + + particles_per_cell_dim * + ( jp + particles_per_cell_dim * kp ); // Set the particle position. - px[Dim::I] = 0.5 * spacing[Dim::I] + ip * spacing[Dim::I] + - low_coords[Dim::I]; - px[Dim::J] = 0.5 * spacing[Dim::J] + jp * spacing[Dim::J] + - low_coords[Dim::J]; - px[Dim::K] = 0.5 * spacing[Dim::K] + kp * spacing[Dim::K] + - low_coords[Dim::K]; + px[Dim::I] = 0.5 * spacing[Dim::I] + + ip * spacing[Dim::I] + low_coords[Dim::I]; + px[Dim::J] = 0.5 * spacing[Dim::J] + + jp * spacing[Dim::J] + low_coords[Dim::J]; + px[Dim::K] = 0.5 * spacing[Dim::K] + + kp * spacing[Dim::K] + low_coords[Dim::K]; // Create a new particle. - particle_created(pid) = create_functor( px, particle ); + particle_created( pid ) = + create_functor( px, particle ); // If we created a new particle insert it into the list. - if ( particle_created(pid) ) + if ( particle_created( pid ) ) { particles.setTuple( pid, particle ); ++create_count; diff --git a/src/ExaMPM_ProblemManager.hpp b/src/ExaMPM_ProblemManager.hpp index e8e8598..ae423dd 100644 --- a/src/ExaMPM_ProblemManager.hpp +++ b/src/ExaMPM_ProblemManager.hpp @@ -12,8 +12,8 @@ #ifndef EXAMPM_PROBLEMMANAGER_HPP #define EXAMPM_PROBLEMMANAGER_HPP -#include #include +#include #include @@ -27,69 +27,86 @@ namespace ExaMPM // Field locations namespace Location { -struct Cell {}; -struct Node {}; -struct Particle {}; +struct Cell +{ +}; +struct Node +{ +}; +struct Particle +{ +}; } // end namespace Location //---------------------------------------------------------------------------// // Fields. namespace Field { -struct Velocity {}; -struct Affine {}; -struct Position {}; -struct Mass {}; -struct Volume {}; -struct J {}; -struct Momentum {}; -struct Force {}; -struct PositionCorrection {}; -struct Density {}; -struct Mark {}; +struct Velocity +{ +}; +struct Affine +{ +}; +struct Position +{ +}; +struct Mass +{ +}; +struct Volume +{ +}; +struct J +{ +}; +struct Momentum +{ +}; +struct Force +{ +}; +struct PositionCorrection +{ +}; +struct Density +{ +}; +struct Mark +{ +}; } // end namespace Field. //---------------------------------------------------------------------------// -template +template class ProblemManager { public: - using memory_space = MemorySpace; using execution_space = typename memory_space::execution_space; - using particle_members = Cabana::MemberTypes; - using particle_list = Cabana::AoSoA; + using particle_members = + Cabana::MemberTypes; + using particle_list = Cabana::AoSoA; using particle_type = typename particle_list::tuple_type; - using node_array = Cajita::Array, - MemorySpace>; + using node_array = Cajita::Array, MemorySpace>; - using cell_array = Cajita::Array, - MemorySpace>; + using cell_array = Cajita::Array, MemorySpace>; using halo = Cajita::Halo; using mesh_type = Mesh; - template + template ProblemManager( const ExecutionSpace& exec_space, const std::shared_ptr& mesh, const InitFunc& create_functor, - const int particles_per_cell, - const double bulk_modulus, - const double rho, - const double gamma, - const double kappa ) + const int particles_per_cell, const double bulk_modulus, + const double rho, const double gamma, const double kappa ) : _mesh( mesh ) , _bulk_modulus( bulk_modulus ) , _rho( rho ) @@ -97,9 +114,8 @@ class ProblemManager , _kappa( kappa ) , _particles( "particles" ) { - initializeParticles( - exec_space, *(_mesh->localGrid()), particles_per_cell, - create_functor, _particles ); + initializeParticles( exec_space, *( _mesh->localGrid() ), + particles_per_cell, create_functor, _particles ); auto node_vector_layout = Cajita::createArrayLayout( _mesh->localGrid(), 3, Cajita::Node() ); @@ -108,31 +124,28 @@ class ProblemManager auto cell_scalar_layout = Cajita::createArrayLayout( _mesh->localGrid(), 1, Cajita::Cell() ); - _momentum = - Cajita::createArray( "momentum", node_vector_layout ); - _mass = - Cajita::createArray( "mass", node_scalar_layout ); - _force = - Cajita::createArray( "force", node_vector_layout ); - _velocity = - Cajita::createArray( "velocity", node_vector_layout ); - _position_correction = - Cajita::createArray( "position_correction", node_vector_layout ); - _density = - Cajita::createArray( "density", cell_scalar_layout ); - - _mark = - Cajita::createArray( "mark", cell_scalar_layout ); - - _node_vector_halo = - Cajita::createHalo( *node_vector_layout, - Cajita::FullHaloPattern() ); - _node_scalar_halo = - Cajita::createHalo( *node_scalar_layout, - Cajita::FullHaloPattern() ); - _cell_scalar_halo = - Cajita::createHalo( *cell_scalar_layout, - Cajita::FullHaloPattern() ); + _momentum = Cajita::createArray( + "momentum", node_vector_layout ); + _mass = Cajita::createArray( "mass", + node_scalar_layout ); + _force = Cajita::createArray( "force", + node_vector_layout ); + _velocity = Cajita::createArray( + "velocity", node_vector_layout ); + _position_correction = Cajita::createArray( + "position_correction", node_vector_layout ); + _density = Cajita::createArray( + "density", cell_scalar_layout ); + + _mark = Cajita::createArray( "mark", + cell_scalar_layout ); + + _node_vector_halo = Cajita::createHalo( + *node_vector_layout, Cajita::FullHaloPattern() ); + _node_scalar_halo = Cajita::createHalo( + *node_scalar_layout, Cajita::FullHaloPattern() ); + _cell_scalar_halo = Cajita::createHalo( + *cell_scalar_layout, Cajita::FullHaloPattern() ); } void updateMesh( const std::shared_ptr& mesh ) @@ -144,137 +157,110 @@ class ProblemManager auto cell_scalar_layout = Cajita::createArrayLayout( _mesh->localGrid(), 1, Cajita::Cell() ); - _momentum = - Cajita::createArray( "momentum", node_vector_layout ); - _mass = - Cajita::createArray( "mass", node_scalar_layout ); - _force = - Cajita::createArray( "force", node_vector_layout ); - _velocity = - Cajita::createArray( "velocity", node_vector_layout ); - _position_correction = - Cajita::createArray( "position_correction", node_vector_layout ); - _density = - Cajita::createArray( "density", cell_scalar_layout ); - - _mark = - Cajita::createArray( "mark", cell_scalar_layout ); - - _node_vector_halo = - Cajita::createHalo( *node_vector_layout, - Cajita::FullHaloPattern() ); - _node_scalar_halo = - Cajita::createHalo( *node_scalar_layout, - Cajita::FullHaloPattern() ); - _cell_scalar_halo = - Cajita::createHalo( *cell_scalar_layout, - Cajita::FullHaloPattern() ); + _momentum = Cajita::createArray( + "momentum", node_vector_layout ); + _mass = Cajita::createArray( "mass", + node_scalar_layout ); + _force = Cajita::createArray( "force", + node_vector_layout ); + _velocity = Cajita::createArray( + "velocity", node_vector_layout ); + _position_correction = Cajita::createArray( + "position_correction", node_vector_layout ); + _density = Cajita::createArray( + "density", cell_scalar_layout ); + + _mark = Cajita::createArray( "mark", + cell_scalar_layout ); + + _node_vector_halo = Cajita::createHalo( + *node_vector_layout, Cajita::FullHaloPattern() ); + _node_scalar_halo = Cajita::createHalo( + *node_scalar_layout, Cajita::FullHaloPattern() ); + _cell_scalar_halo = Cajita::createHalo( + *cell_scalar_layout, Cajita::FullHaloPattern() ); } - std::size_t numParticle() const - { - return _particles.size(); - } + std::size_t numParticle() const { return _particles.size(); } - const std::shared_ptr& mesh() const - { - return _mesh; - } + const std::shared_ptr& mesh() const { return _mesh; } - double bulkModulus() const - { - return _bulk_modulus; - } + double bulkModulus() const { return _bulk_modulus; } - double density() const - { - return _rho; - } + double density() const { return _rho; } - double gamma() const - { - return _gamma; - } + double gamma() const { return _gamma; } - double kappa() const - { - return _kappa; - } + double kappa() const { return _kappa; } typename particle_list::template member_slice_type<0> - get( Location::Particle, Field::Affine ) const + get( Location::Particle, Field::Affine ) const { return Cabana::slice<0>( _particles, "affine" ); } typename particle_list::template member_slice_type<1> - get( Location::Particle, Field::Velocity ) const + get( Location::Particle, Field::Velocity ) const { return Cabana::slice<1>( _particles, "velocity" ); } typename particle_list::template member_slice_type<2> - get( Location::Particle, Field::Position ) const + get( Location::Particle, Field::Position ) const { return Cabana::slice<2>( _particles, "position" ); } typename particle_list::template member_slice_type<3> - get( Location::Particle, Field::Mass) const + get( Location::Particle, Field::Mass ) const { return Cabana::slice<3>( _particles, "mass" ); } typename particle_list::template member_slice_type<4> - get( Location::Particle, Field::Volume ) const + get( Location::Particle, Field::Volume ) const { return Cabana::slice<4>( _particles, "volume" ); } typename particle_list::template member_slice_type<5> - get( Location::Particle, Field::J ) const + get( Location::Particle, Field::J ) const { return Cabana::slice<5>( _particles, "J" ); } - typename node_array::view_type - get( Location::Node, Field::Momentum ) const + typename node_array::view_type get( Location::Node, Field::Momentum ) const { return _momentum->view(); } - typename node_array::view_type - get( Location::Node, Field::Mass ) const + typename node_array::view_type get( Location::Node, Field::Mass ) const { return _mass->view(); } - typename node_array::view_type - get( Location::Node, Field::Force ) const + typename node_array::view_type get( Location::Node, Field::Force ) const { return _force->view(); } - typename node_array::view_type - get( Location::Node, Field::Velocity ) const + typename node_array::view_type get( Location::Node, Field::Velocity ) const { return _velocity->view(); } - typename node_array::view_type - get( Location::Node, Field::PositionCorrection ) const + typename node_array::view_type get( Location::Node, + Field::PositionCorrection ) const { return _position_correction->view(); } - typename cell_array::view_type - get( Location::Cell, Field::Density ) const + typename cell_array::view_type get( Location::Cell, Field::Density ) const { return _density->view(); } - typename cell_array::view_type - get( Location::Cell, Field::Mark ) const + typename cell_array::view_type get( Location::Cell, Field::Mark ) const { return _mark->view(); } @@ -282,22 +268,19 @@ class ProblemManager void scatter( Location::Node, Field::Momentum ) const { _node_vector_halo->scatter( execution_space(), - Cajita::ScatterReduce::Sum(), - *_momentum ); + Cajita::ScatterReduce::Sum(), *_momentum ); } void scatter( Location::Node, Field::Mass ) const { _node_scalar_halo->scatter( execution_space(), - Cajita::ScatterReduce::Sum(), - *_mass ); + Cajita::ScatterReduce::Sum(), *_mass ); } void scatter( Location::Node, Field::Force ) const { _node_vector_halo->scatter( execution_space(), - Cajita::ScatterReduce::Sum(), - *_force ); + Cajita::ScatterReduce::Sum(), *_force ); } void scatter( Location::Node, Field::PositionCorrection ) const @@ -310,15 +293,13 @@ class ProblemManager void scatter( Location::Cell, Field::Density ) const { _cell_scalar_halo->scatter( execution_space(), - Cajita::ScatterReduce::Sum(), - *_density ); + Cajita::ScatterReduce::Sum(), *_density ); } void scatter( Location::Cell, Field::Mark ) const { _cell_scalar_halo->scatter( execution_space(), - Cajita::ScatterReduce::Sum(), - *_mark ); + Cajita::ScatterReduce::Sum(), *_mark ); } void gather( Location::Node, Field::Velocity ) const @@ -333,16 +314,12 @@ class ProblemManager void communicateParticles( const int minimum_halo_width ) { - auto positions = get( Location::Particle(), Field::Position()) ; - Cajita::particleGridMigrate( - *(_mesh->localGrid()), - positions, - _particles, - minimum_halo_width ); + auto positions = get( Location::Particle(), Field::Position() ); + Cajita::particleGridMigrate( *( _mesh->localGrid() ), positions, + _particles, minimum_halo_width ); } private: - std::shared_ptr _mesh; double _bulk_modulus; double _rho; diff --git a/src/ExaMPM_SiloParticleWriter.hpp b/src/ExaMPM_SiloParticleWriter.hpp index bef070a..99a332a 100644 --- a/src/ExaMPM_SiloParticleWriter.hpp +++ b/src/ExaMPM_SiloParticleWriter.hpp @@ -37,170 +37,148 @@ namespace SiloParticleWriter // Silo Particle Field Writer. //---------------------------------------------------------------------------// // Format traits. -template +template struct SiloTraits; -template<> +template <> struct SiloTraits { - static int type() - { return DB_SHORT; } + static int type() { return DB_SHORT; } }; -template<> +template <> struct SiloTraits { - static int type() - { return DB_INT; } + static int type() { return DB_INT; } }; -template<> +template <> struct SiloTraits { - static int type() - { return DB_FLOAT; } + static int type() { return DB_FLOAT; } }; -template<> +template <> struct SiloTraits { - static int type() - { return DB_DOUBLE; } + static int type() { return DB_DOUBLE; } }; //---------------------------------------------------------------------------// // Rank-0 field -template +template void writeFieldsImpl( - DBfile* silo_file, - const std::string& mesh_name, - const SliceType& slice, + DBfile* silo_file, const std::string& mesh_name, const SliceType& slice, typename std::enable_if< - 2==SliceType::kokkos_view::traits::dimension::rank,int*>::type = 0 ) + 2 == SliceType::kokkos_view::traits::dimension::rank, int*>::type = 0 ) { // Reorder in a contiguous blocked format. Kokkos::View view( "field", slice.size() ); + typename SliceType::device_type> + view( "field", slice.size() ); Kokkos::parallel_for( "SiloParticleWriter::writeFieldRank0", - Kokkos::RangePolicy(0,slice.size()), - KOKKOS_LAMBDA( const int i ){ - view(i) = slice(i); - }); + Kokkos::RangePolicy( + 0, slice.size() ), + KOKKOS_LAMBDA( const int i ) { view( i ) = slice( i ); } ); // Mirror the field to the host. - auto host_view = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), view ); + auto host_view = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), view ); // Write the field. - DBPutPointvar1( silo_file, - slice.label().c_str(), - mesh_name.c_str(), - host_view.data(), - host_view.extent(0), + DBPutPointvar1( silo_file, slice.label().c_str(), mesh_name.c_str(), + host_view.data(), host_view.extent( 0 ), SiloTraits::type(), nullptr ); } // Rank-1 field -template +template void writeFieldsImpl( - DBfile* silo_file, - const std::string& mesh_name, - const SliceType& slice, + DBfile* silo_file, const std::string& mesh_name, const SliceType& slice, typename std::enable_if< - 3==SliceType::kokkos_view::traits::dimension::rank,int*>::type = 0 ) + 3 == SliceType::kokkos_view::traits::dimension::rank, int*>::type = 0 ) { // Reorder in a contiguous blocked format. - Kokkos::View - view( "field", slice.size(), slice.extent(2) ); + view( "field", slice.size(), slice.extent( 2 ) ); Kokkos::parallel_for( "SiloParticleWriter::writeFieldRank1", - Kokkos::RangePolicy(0,slice.size()), - KOKKOS_LAMBDA( const int i ){ - for ( std::size_t d0 = 0; d0 < slice.extent(2); ++d0 ) - view(i,d0) = slice(i,d0); - }); + Kokkos::RangePolicy( + 0, slice.size() ), + KOKKOS_LAMBDA( const int i ) { + for ( std::size_t d0 = 0; d0 < slice.extent( 2 ); ++d0 ) + view( i, d0 ) = slice( i, d0 ); + } ); // Mirror the field to the host. - auto host_view = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), view ); + auto host_view = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), view ); // Get the data pointers. - std::vector ptrs( host_view.extent(1) ); - for ( std::size_t d0 = 0; d0 < host_view.extent(1); ++d0 ) - ptrs[d0] = &host_view(0,d0); + std::vector ptrs( host_view.extent( 1 ) ); + for ( std::size_t d0 = 0; d0 < host_view.extent( 1 ); ++d0 ) + ptrs[d0] = &host_view( 0, d0 ); // Write the field. - DBPutPointvar( silo_file, - slice.label().c_str(), - mesh_name.c_str(), - host_view.extent(1), - ptrs.data(), - host_view.extent(0), + DBPutPointvar( silo_file, slice.label().c_str(), mesh_name.c_str(), + host_view.extent( 1 ), ptrs.data(), host_view.extent( 0 ), SiloTraits::type(), nullptr ); } // Rank-2 field -template +template void writeFieldsImpl( - DBfile* silo_file, - const std::string& mesh_name, - const SliceType& slice, + DBfile* silo_file, const std::string& mesh_name, const SliceType& slice, typename std::enable_if< - 4==SliceType::kokkos_view::traits::dimension::rank,int*>::type = 0 ) + 4 == SliceType::kokkos_view::traits::dimension::rank, int*>::type = 0 ) { // Reorder in a contiguous blocked format. - Kokkos::View - view( "field", slice.size(), slice.extent(2), slice.extent(3) ); + view( "field", slice.size(), slice.extent( 2 ), slice.extent( 3 ) ); Kokkos::parallel_for( "SiloParticleWriter::writeFieldRank2", - Kokkos::RangePolicy(0,slice.size()), - KOKKOS_LAMBDA( const int i ){ - for ( std::size_t d0 = 0; d0 < slice.extent(2); ++d0 ) - for ( std::size_t d1 = 0; d1 < slice.extent(3); ++d1 ) - view(i,d0,d1) = slice(i,d0,d1); - }); + Kokkos::RangePolicy( + 0, slice.size() ), + KOKKOS_LAMBDA( const int i ) { + for ( std::size_t d0 = 0; d0 < slice.extent( 2 ); ++d0 ) + for ( std::size_t d1 = 0; d1 < slice.extent( 3 ); ++d1 ) + view( i, d0, d1 ) = slice( i, d0, d1 ); + } ); // Mirror the field to the host. - auto host_view = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), view ); + auto host_view = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), view ); // Get the data pointers. std::vector ptrs; - ptrs.reserve( host_view.extent(1) * host_view.extent(2) ); - for ( unsigned d0 = 0; d0 < host_view.extent(1); ++d0 ) - for ( unsigned d1 = 0; d1 < host_view.extent(2); ++d1 ) - ptrs.push_back( &host_view(0,d0,d1) ); + ptrs.reserve( host_view.extent( 1 ) * host_view.extent( 2 ) ); + for ( unsigned d0 = 0; d0 < host_view.extent( 1 ); ++d0 ) + for ( unsigned d1 = 0; d1 < host_view.extent( 2 ); ++d1 ) + ptrs.push_back( &host_view( 0, d0, d1 ) ); // Write the field. - DBPutPointvar( silo_file, - slice.label().c_str(), - mesh_name.c_str(), - host_view.extent(1) * host_view.extent(2), ptrs.data(), - host_view.extent(0), + DBPutPointvar( silo_file, slice.label().c_str(), mesh_name.c_str(), + host_view.extent( 1 ) * host_view.extent( 2 ), ptrs.data(), + host_view.extent( 0 ), SiloTraits::type(), nullptr ); } -template -void writeFields( DBfile* silo_file, - const std::string& mesh_name, +template +void writeFields( DBfile* silo_file, const std::string& mesh_name, const SliceType& slice ) { writeFieldsImpl( silo_file, mesh_name, slice ); } -template -void writeFields( DBfile* silo_file, - const std::string& mesh_name, - const SliceType& slice, - FieldSliceTypes&&... fields ) +template +void writeFields( DBfile* silo_file, const std::string& mesh_name, + const SliceType& slice, FieldSliceTypes&&... fields ) { writeFieldsImpl( silo_file, mesh_name, slice ); writeFields( silo_file, mesh_name, fields... ); @@ -219,7 +197,7 @@ void* createFile( const char* file_name, const char* dir_name, void* user_data ) DBSetDir( silo_file, dir_name ); } - return (void*) silo_file; + return (void*)silo_file; } void* openFile( const char* file_name, const char* dir_name, @@ -233,19 +211,20 @@ void* openFile( const char* file_name, const char* dir_name, DBMkDir( silo_file, dir_name ); DBSetDir( silo_file, dir_name ); } - return (void*) silo_file; + return (void*)silo_file; } void closeFile( void* file, void* user_data ) { std::ignore = user_data; - DBfile *silo_file = (DBfile *) file; - if ( silo_file ) DBClose( silo_file ); + DBfile* silo_file = (DBfile*)file; + if ( silo_file ) + DBClose( silo_file ); } //---------------------------------------------------------------------------// // Get field names. -template +template void getFieldNamesImpl( std::vector& names, const SliceType& slice ) { @@ -253,17 +232,15 @@ void getFieldNamesImpl( std::vector& names, } // Get field names. -template -void getFieldNamesImpl( std::vector& names, - const SliceType& slice, +template +void getFieldNamesImpl( std::vector& names, const SliceType& slice, FieldSliceTypes&&... fields ) { getFieldNamesImpl( names, slice ); getFieldNamesImpl( names, fields... ); } - -template +template std::vector getFieldNames( FieldSliceTypes&&... fields ) { std::vector names; @@ -273,13 +250,10 @@ std::vector getFieldNames( FieldSliceTypes&&... fields ) //---------------------------------------------------------------------------// // Write a multimesh hierarchy. -template -void writeMultiMesh( PMPIO_baton_t* baton, - DBfile* silo_file, - const int comm_size, - const std::string& mesh_name, - const int time_step_index, - const double time, +template +void writeMultiMesh( PMPIO_baton_t* baton, DBfile* silo_file, + const int comm_size, const std::string& mesh_name, + const int time_step_index, const double time, FieldSliceTypes&&... fields ) { // Go to the root directory of the file. @@ -294,19 +268,19 @@ void writeMultiMesh( PMPIO_baton_t* baton, { std::stringstream bname; bname << "rank_" << r << "/" << mesh_name; - mb_names.push_back(bname.str()); + mb_names.push_back( bname.str() ); } else { std::stringstream bname; - bname << "particles_" << time_step_index << "_group_" - << group_rank << ".silo:/rank_" << r << "/" << mesh_name; - mb_names.push_back(bname.str()); + bname << "particles_" << time_step_index << "_group_" << group_rank + << ".silo:/rank_" << r << "/" << mesh_name; + mb_names.push_back( bname.str() ); } } - char** mesh_block_names = (char **) malloc(comm_size * sizeof(char*)); + char** mesh_block_names = (char**)malloc( comm_size * sizeof( char* ) ); for ( int r = 0; r < comm_size; ++r ) - mesh_block_names[r] = const_cast(mb_names[r].c_str()); + mesh_block_names[r] = const_cast( mb_names[r].c_str() ); std::vector mesh_block_types( comm_size, DB_POINTMESH ); @@ -315,7 +289,7 @@ void writeMultiMesh( PMPIO_baton_t* baton, // Create the field block names. int num_field = field_names.size(); - std::vector > fb_names( num_field ); + std::vector> fb_names( num_field ); for ( int f = 0; f < num_field; ++f ) { for ( int r = 0; r < comm_size; ++r ) @@ -325,16 +299,16 @@ void writeMultiMesh( PMPIO_baton_t* baton, { std::stringstream bname; bname << "rank_" << r << "/" << field_names[f]; - fb_names[f].push_back(bname.str()); + fb_names[f].push_back( bname.str() ); } else { std::stringstream bname; bname << "particles_" << time_step_index << "_group_" - << group_rank << ".silo:/rank_" - << r << "/" << field_names[f]; - fb_names[f].push_back(bname.str()); + << group_rank << ".silo:/rank_" << r << "/" + << field_names[f]; + fb_names[f].push_back( bname.str() ); } } } @@ -342,17 +316,18 @@ void writeMultiMesh( PMPIO_baton_t* baton, std::vector field_block_names( num_field ); for ( int f = 0; f < num_field; ++f ) { - field_block_names[f] = (char**) malloc( comm_size * sizeof(char*) ); + field_block_names[f] = (char**)malloc( comm_size * sizeof( char* ) ); for ( int r = 0; r < comm_size; ++r ) - field_block_names[f][r] = const_cast(fb_names[f][r].c_str()); + field_block_names[f][r] = + const_cast( fb_names[f][r].c_str() ); } std::vector field_block_types( comm_size, DB_POINTVAR ); // Create options. DBoptlist* options = DBMakeOptlist( 1 ); - DBAddOption( options, DBOPT_DTIME, (void*) &time ); - DBAddOption( options, DBOPT_CYCLE, (void*) &time_step_index ); + DBAddOption( options, DBOPT_DTIME, (void*)&time ); + DBAddOption( options, DBOPT_CYCLE, (void*)&time_step_index ); // Add the multiblock mesh. std::stringstream mbname; @@ -372,18 +347,17 @@ void writeMultiMesh( PMPIO_baton_t* baton, // Cleanup. free( mesh_block_names ); - for ( auto& f_name : field_block_names ) free( f_name ); + for ( auto& f_name : field_block_names ) + free( f_name ); DBFreeOptlist( options ); } //---------------------------------------------------------------------------// // Write a time step. -template +template void writeTimeStep( const GlobalGridType& global_grid, - const int time_step_index, - const double time, - const CoordSliceType& coords, - FieldSliceTypes&&... fields ) + const int time_step_index, const double time, + const CoordSliceType& coords, FieldSliceTypes&&... fields ) { // Pick a number of groups. We want to write approximately the N^3 blocks // to N^2 groups. Pick the block dimension with the largest number of @@ -391,14 +365,14 @@ void writeTimeStep( const GlobalGridType& global_grid, // input later with this behavior as the default. int num_group = 0; for ( int d = 0; d < 3; ++d ) - if ( global_grid.dimNumBlock(d) > num_group ) - num_group = global_grid.dimNumBlock(d); + if ( global_grid.dimNumBlock( d ) > num_group ) + num_group = global_grid.dimNumBlock( d ); // Create the parallel baton. int mpi_tag = 1948; PMPIO_baton_t* baton = - PMPIO_Init( num_group, PMPIO_WRITE, global_grid.comm(), - mpi_tag, createFile, openFile, closeFile, nullptr ); + PMPIO_Init( num_group, PMPIO_WRITE, global_grid.comm(), mpi_tag, + createFile, openFile, closeFile, nullptr ); // Compose a data file name. int comm_rank; @@ -412,47 +386,42 @@ void writeTimeStep( const GlobalGridType& global_grid, // The other groups write auxiliary files. else - file_name << "particles_" << time_step_index << "_group_" - << group_rank << ".silo"; + file_name << "particles_" << time_step_index << "_group_" << group_rank + << ".silo"; // Compose a directory name. std::stringstream dir_name; dir_name << "rank_" << comm_rank; // Wait for our turn to write to the file. - DBfile* silo_file = (DBfile*) PMPIO_WaitForBaton( + DBfile* silo_file = (DBfile*)PMPIO_WaitForBaton( baton, file_name.str().c_str(), dir_name.str().c_str() ); // Reorder the coordinates in a blocked format. - Kokkos::View - view( "coords", coords.size(), coords.extent(2) ); + view( "coords", coords.size(), coords.extent( 2 ) ); Kokkos::parallel_for( "SiloParticleWriter::writeCoords", Kokkos::RangePolicy( - 0,coords.size()), - KOKKOS_LAMBDA( const int i ){ - for ( std::size_t d0 = 0; d0 < coords.extent(2); ++d0 ) - view(i,d0) = coords(i,d0); - }); + 0, coords.size() ), + KOKKOS_LAMBDA( const int i ) { + for ( std::size_t d0 = 0; d0 < coords.extent( 2 ); ++d0 ) + view( i, d0 ) = coords( i, d0 ); + } ); // Mirror the coordinates to the host. - auto host_coords = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), view ); + auto host_coords = + Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), view ); // Add the point mesh. std::string mesh_name = "particles"; - double* ptrs[3] = - {&host_coords(0,0), &host_coords(0,1), &host_coords(0,2)}; - DBPutPointmesh( - silo_file, - mesh_name.c_str(), - host_coords.extent(1), - ptrs, - host_coords.extent(0), - SiloTraits::type(), - nullptr ); + double* ptrs[3] = { &host_coords( 0, 0 ), &host_coords( 0, 1 ), + &host_coords( 0, 2 ) }; + DBPutPointmesh( silo_file, mesh_name.c_str(), host_coords.extent( 1 ), ptrs, + host_coords.extent( 0 ), + SiloTraits::type(), + nullptr ); // Add variables. writeFields( silo_file, mesh_name, fields... ); @@ -462,9 +431,8 @@ void writeTimeStep( const GlobalGridType& global_grid, int comm_size; MPI_Comm_size( global_grid.comm(), &comm_size ); if ( 0 == comm_rank && comm_size > 1 ) - writeMultiMesh( baton, silo_file, - comm_size, mesh_name, - time_step_index, time, fields... ); + writeMultiMesh( baton, silo_file, comm_size, mesh_name, time_step_index, + time, fields... ); // Hand off the baton. PMPIO_HandOffBaton( baton, silo_file ); diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 4cc3197..2531a70 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -12,12 +12,12 @@ #ifndef EXAMPM_SOLVER_HPP #define EXAMPM_SOLVER_HPP -#include -#include -#include #include -#include #include +#include +#include +#include +#include #include @@ -36,31 +36,24 @@ class SolverBase }; //---------------------------------------------------------------------------// -template +template class Solver : public SolverBase { public: - - template - Solver( MPI_Comm comm, - const Kokkos::Array& global_bounding_box, - const std::array& global_num_cell, - const std::array& periodic, + template + Solver( MPI_Comm comm, const Kokkos::Array& global_bounding_box, + const std::array& global_num_cell, + const std::array& periodic, const Cajita::BlockPartitioner<3>& partitioner, - const int halo_cell_width, - const InitFunc& create_functor, - const int particles_per_cell, - const double bulk_modulus, - const double density, - const double gamma, - const double kappa, - const double delta_t, - const double gravity, + const int halo_cell_width, const InitFunc& create_functor, + const int particles_per_cell, const double bulk_modulus, + const double density, const double gamma, const double kappa, + const double delta_t, const double gravity, const BoundaryCondition& bc ) - : _dt( delta_t ) - , _gravity( gravity ) - , _bc( bc ) - , _halo_min( 3 ) + : _dt( delta_t ) + , _gravity( gravity ) + , _bc( bc ) + , _halo_min( 3 ) { _mesh = std::make_shared>( global_bounding_box, global_num_cell, periodic, partitioner, @@ -75,21 +68,17 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); - _lb = std::make_shared>(comm, _mesh); - + _lb = std::make_shared>( comm, _mesh ); } void solve( const double t_final, const int write_freq ) override { SiloParticleWriter::writeTimeStep( - _mesh->localGrid()->globalGrid(), - 0, - 0.0, + _mesh->localGrid()->globalGrid(), 0, 0.0, _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); - _lb->output(0); - + _lb->output( 0 ); int num_step = t_final / _dt; double delta_t = t_final / num_step; @@ -97,32 +86,30 @@ class Solver : public SolverBase for ( int t = 0; t < num_step; ++t ) { if ( 0 == _rank && 0 == t % write_freq ) - printf( "Step %d / %d\n", t+1, num_step ); + printf( "Step %d / %d\n", t + 1, num_step ); - TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); + TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, + _bc ); - _lb->balance(_pm); + _lb->balance( _pm ); _pm->communicateParticles( _halo_min ); if ( 0 == t % write_freq ) { SiloParticleWriter::writeTimeStep( - _mesh->localGrid()->globalGrid(), - t+1, - time, + _mesh->localGrid()->globalGrid(), t + 1, time, _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); - _lb->output(t); + _lb->output( t ); } - time += delta_t; + time += delta_t; } } private: - double _dt; double _gravity; BoundaryCondition _bc; @@ -135,113 +122,63 @@ class Solver : public SolverBase //---------------------------------------------------------------------------// // Creation method. -template +template std::shared_ptr -createSolver( const std::string& device, - MPI_Comm comm, - const Kokkos::Array& global_bounding_box, - const std::array& global_num_cell, - const std::array& periodic, +createSolver( const std::string& device, MPI_Comm comm, + const Kokkos::Array& global_bounding_box, + const std::array& global_num_cell, + const std::array& periodic, const Cajita::BlockPartitioner<3>& partitioner, - const int halo_cell_width, - const InitFunc& create_functor, - const int particles_per_cell, - const double bulk_modulus, - const double density, - const double gamma, - const double kappa, - const double delta_t, - const double gravity, + const int halo_cell_width, const InitFunc& create_functor, + const int particles_per_cell, const double bulk_modulus, + const double density, const double gamma, const double kappa, + const double delta_t, const double gravity, const BoundaryCondition& bc ) { - if ( 0 == device.compare("serial") ) + if ( 0 == device.compare( "serial" ) ) { #ifdef KOKKOS_ENABLE_SERIAL - return std::make_shared>( - comm, - global_bounding_box, - global_num_cell, - periodic, - partitioner, - halo_cell_width, - create_functor, - particles_per_cell, - bulk_modulus, - density, - gamma, - kappa, - delta_t, - gravity, - bc ); + return std::make_shared< + ExaMPM::Solver>( + comm, global_bounding_box, global_num_cell, periodic, partitioner, + halo_cell_width, create_functor, particles_per_cell, bulk_modulus, + density, gamma, kappa, delta_t, gravity, bc ); #else throw std::runtime_error( "Serial Backend Not Enabled" ); #endif } - else if ( 0 == device.compare("openmp") ) + else if ( 0 == device.compare( "openmp" ) ) { #ifdef KOKKOS_ENABLE_OPENMP - return std::make_shared>( - comm, - global_bounding_box, - global_num_cell, - periodic, - partitioner, - halo_cell_width, - create_functor, - particles_per_cell, - bulk_modulus, - density, - gamma, - kappa, - delta_t, - gravity, - bc ); + return std::make_shared< + ExaMPM::Solver>( + comm, global_bounding_box, global_num_cell, periodic, partitioner, + halo_cell_width, create_functor, particles_per_cell, bulk_modulus, + density, gamma, kappa, delta_t, gravity, bc ); #else throw std::runtime_error( "OpenMP Backend Not Enabled" ); #endif } - else if ( 0 == device.compare("cuda") ) + else if ( 0 == device.compare( "cuda" ) ) { #ifdef KOKKOS_ENABLE_CUDA - return std::make_shared>( - comm, - global_bounding_box, - global_num_cell, - periodic, - partitioner, - halo_cell_width, - create_functor, - particles_per_cell, - bulk_modulus, - density, - gamma, - kappa, - delta_t, - gravity, - bc ); + return std::make_shared< + ExaMPM::Solver>( + comm, global_bounding_box, global_num_cell, periodic, partitioner, + halo_cell_width, create_functor, particles_per_cell, bulk_modulus, + density, gamma, kappa, delta_t, gravity, bc ); #else throw std::runtime_error( "CUDA Backend Not Enabled" ); #endif } - else if ( 0 == device.compare("hip") ) + else if ( 0 == device.compare( "hip" ) ) { #ifdef KOKKOS_ENABLE_HIP - return std::make_shared>( - comm, - global_bounding_box, - global_num_cell, - periodic, - partitioner, - halo_cell_width, - create_functor, - particles_per_cell, - bulk_modulus, - density, - gamma, - kappa, - delta_t, - gravity, - bc ); + return std::make_shared>( + comm, global_bounding_box, global_num_cell, periodic, partitioner, + halo_cell_width, create_functor, particles_per_cell, bulk_modulus, + density, gamma, kappa, delta_t, gravity, bc ); #else throw std::runtime_error( "HIP Backend Not Enabled" ); #endif diff --git a/src/ExaMPM_TimeIntegrator.hpp b/src/ExaMPM_TimeIntegrator.hpp index 01fd623..0d8cabd 100644 --- a/src/ExaMPM_TimeIntegrator.hpp +++ b/src/ExaMPM_TimeIntegrator.hpp @@ -12,9 +12,9 @@ #ifndef EXAMPM_TIMEINTEGRATOR_HPP #define EXAMPM_TIMEINTEGRATOR_HPP -#include #include #include +#include #include @@ -28,9 +28,8 @@ namespace TimeIntegrator { //---------------------------------------------------------------------------// // Particle-to-grid. -template -void p2g( const ExecutionSpace& exec_space, - const ProblemManagerType& pm ) +template +void p2g( const ExecutionSpace& exec_space, const ProblemManagerType& pm ) { // Get the particle data we need. auto m_p = pm.get( Location::Particle(), Field::Mass() ); @@ -61,42 +60,43 @@ void p2g( const ExecutionSpace& exec_space, // Build the local mesh. auto local_mesh = - Cajita::createLocalMesh( *(pm.mesh()->localGrid()) ); + Cajita::createLocalMesh( *( pm.mesh()->localGrid() ) ); // Loop over particles. Kokkos::parallel_for( "p2g", Kokkos::RangePolicy( exec_space, 0, pm.numParticle() ), - KOKKOS_LAMBDA( const int p ){ + KOKKOS_LAMBDA( const int p ) { // Get the particle position. - double x[3] = { x_p(p,0), x_p(p,1), x_p(p,2) }; + double x[3] = { x_p( p, 0 ), x_p( p, 1 ), x_p( p, 2 ) }; // Setup interpolation to the nodes. - Cajita::SplineData sd; + Cajita::SplineData sd; Cajita::evaluateSpline( local_mesh, x, sd ); // Compute the pressure on the particle with an equation of // state. - double pressure = -bulk_mod * ( pow(j_p(p),-gamma) - 1.0 ); + double pressure = -bulk_mod * ( pow( j_p( p ), -gamma ) - 1.0 ); // Project the pressure gradient to the grid. - Cajita::P2G::gradient( -v_p(p)*j_p(p)*pressure, sd, f_i_sv ); + Cajita::P2G::gradient( -v_p( p ) * j_p( p ) * pressure, sd, + f_i_sv ); // Extract the particle velocity - double vel_p[3] = { u_p(p,0), u_p(p,1), u_p(p,2) }; + double vel_p[3] = { u_p( p, 0 ), u_p( p, 1 ), u_p( p, 2 ) }; // Extract the affine particle matrix. double aff_p[3][3]; for ( int d0 = 0; d0 < 3; ++d0 ) for ( int d1 = 0; d1 < 3; ++d1 ) - aff_p[d0][d1] = B_p(p,d0,d1); + aff_p[d0][d1] = B_p( p, d0, d1 ); // Project momentum to the grid. - APIC::p2g( m_p(p), vel_p, aff_p, sd, mu_i_sv ); + APIC::p2g( m_p( p ), vel_p, aff_p, sd, mu_i_sv ); // Project mass to the grid. - Cajita::P2G::value( m_p(p), sd, m_i_sv ); - }); + Cajita::P2G::value( m_p( p ), sd, m_i_sv ); + } ); // Complete local scatter. Kokkos::Experimental::contribute( m_i, m_i_sv ); @@ -111,11 +111,9 @@ void p2g( const ExecutionSpace& exec_space, //---------------------------------------------------------------------------// // Field solve. -template -void fieldSolve( const ExecutionSpace& exec_space, - const ProblemManagerType& pm, - const double delta_t, - const double gravity, +template +void fieldSolve( const ExecutionSpace& exec_space, const ProblemManagerType& pm, + const double delta_t, const double gravity, const BoundaryCondition& bc ) { // Get the views we need. @@ -131,45 +129,44 @@ void fieldSolve( const ExecutionSpace& exec_space, double mass_epsilon = 1.0e-12; // Compute the velocity. - auto l2g = Cajita::IndexConversion::createL2G( - *(pm.mesh()->localGrid()), Cajita::Node() ); - auto local_nodes = - pm.mesh()->localGrid()->indexSpace( - Cajita::Ghost(), Cajita::Node(), Cajita::Local() ); + auto l2g = Cajita::IndexConversion::createL2G( *( pm.mesh()->localGrid() ), + Cajita::Node() ); + auto local_nodes = pm.mesh()->localGrid()->indexSpace( + Cajita::Ghost(), Cajita::Node(), Cajita::Local() ); Kokkos::parallel_for( - Cajita::createExecutionPolicy(local_nodes,exec_space), - KOKKOS_LAMBDA( const int li, const int lj, const int lk ){ + Cajita::createExecutionPolicy( local_nodes, exec_space ), + KOKKOS_LAMBDA( const int li, const int lj, const int lk ) { int gi, gj, gk; l2g( li, lj, lk, gi, gj, gk ); // Only compute velocity if a node has mass - u_i(li,lj,lk,0) = - ( m_i(li,lj,lk,0) > mass_epsilon ) - ? ( mu_i(li,lj,lk,0) + - delta_t * f_i(li,lj,lk,0) ) / m_i(li,lj,lk,0) - : 0.0; - u_i(li,lj,lk,1) = - ( m_i(li,lj,lk,0) > mass_epsilon ) - ? ( mu_i(li,lj,lk,1) + - delta_t * f_i(li,lj,lk,1) ) / m_i(li,lj,lk,0) - : 0.0; - u_i(li,lj,lk,2) = - ( m_i(li,lj,lk,0) > mass_epsilon ) - ? ( mu_i(li,lj,lk,2) + - delta_t * f_i(li,lj,lk,2) ) / m_i(li,lj,lk,0) - delta_g - : 0.0; + u_i( li, lj, lk, 0 ) = ( m_i( li, lj, lk, 0 ) > mass_epsilon ) + ? ( mu_i( li, lj, lk, 0 ) + + delta_t * f_i( li, lj, lk, 0 ) ) / + m_i( li, lj, lk, 0 ) + : 0.0; + u_i( li, lj, lk, 1 ) = ( m_i( li, lj, lk, 0 ) > mass_epsilon ) + ? ( mu_i( li, lj, lk, 1 ) + + delta_t * f_i( li, lj, lk, 1 ) ) / + m_i( li, lj, lk, 0 ) + : 0.0; + u_i( li, lj, lk, 2 ) = ( m_i( li, lj, lk, 0 ) > mass_epsilon ) + ? ( mu_i( li, lj, lk, 2 ) + + delta_t * f_i( li, lj, lk, 2 ) ) / + m_i( li, lj, lk, 0 ) - + delta_g + : 0.0; // Apply the boundary condition. - bc( gi, gj, gk, - u_i(li,lj,lk,0), u_i(li,lj,lk,1), u_i(li,lj,lk,2) ); - }); + bc( gi, gj, gk, u_i( li, lj, lk, 0 ), u_i( li, lj, lk, 1 ), + u_i( li, lj, lk, 2 ) ); + } ); } //---------------------------------------------------------------------------// // Grid-to-particle. -template -void g2p( const ExecutionSpace& exec_space, - const ProblemManagerType& pm, +template +void g2p( const ExecutionSpace& exec_space, const ProblemManagerType& pm, const double delta_t ) { // Get the particle data we need. @@ -194,9 +191,9 @@ void g2p( const ExecutionSpace& exec_space, // Build the local mesh. auto local_mesh = - Cajita::createLocalMesh( *(pm.mesh()->localGrid()) ); + Cajita::createLocalMesh( *( pm.mesh()->localGrid() ) ); auto cell_size = - pm.mesh()->localGrid()->globalGrid().globalMesh().cellSize(0); + pm.mesh()->localGrid()->globalGrid().globalMesh().cellSize( 0 ); auto cell_volume = cell_size * cell_size * cell_size; // Gather the data we need. @@ -206,12 +203,12 @@ void g2p( const ExecutionSpace& exec_space, Kokkos::parallel_for( "g2p", Kokkos::RangePolicy( exec_space, 0, pm.numParticle() ), - KOKKOS_LAMBDA( const int p ){ + KOKKOS_LAMBDA( const int p ) { // Get the particle position. - double x[3] = { x_p(p,0), x_p(p,1), x_p(p,2) }; + double x[3] = { x_p( p, 0 ), x_p( p, 1 ), x_p( p, 2 ) }; // Setup interpolation from the nodes. - Cajita::SplineData sd_i; + Cajita::SplineData sd_i; Cajita::evaluateSpline( local_mesh, x, sd_i ); // Update particle velocity. @@ -219,10 +216,10 @@ void g2p( const ExecutionSpace& exec_space, double aff_p[3][3]; APIC::g2p( u_i, sd_i, vel_p, aff_p ); for ( int d = 0; d < 3; ++d ) - u_p(p,d) = vel_p[d]; + u_p( p, d ) = vel_p[d]; for ( int d0 = 0; d0 < 3; ++d0 ) for ( int d1 = 0; d1 < 3; ++d1 ) - B_p(p,d0,d1) = aff_p[d0][d1]; + B_p( p, d0, d1 ) = aff_p[d0][d1]; // Compute the velocity divergence (this is the trace of the // velocity gradient). @@ -230,25 +227,25 @@ void g2p( const ExecutionSpace& exec_space, Cajita::G2P::divergence( u_i, sd_i, div_u ); // Update the deformation gradient determinant. - j_p(p) *= exp( delta_t * div_u ); + j_p( p ) *= exp( delta_t * div_u ); // Move the particle for ( int d = 0; d < 3; ++d ) { x[d] += delta_t * vel_p[d]; - x_p(p,d) = x[d]; + x_p( p, d ) = x[d]; } // Project density to cell. - Cajita::SplineData sd_c1; + Cajita::SplineData sd_c1; Cajita::evaluateSpline( local_mesh, x, sd_c1 ); - Cajita::P2G::value( m_p(p) / cell_volume, sd_c1, r_c_sv ); + Cajita::P2G::value( m_p( p ) / cell_volume, sd_c1, r_c_sv ); // Mark cells. Indicates whether or not cells have particles. - Cajita::SplineData sd_c0; + Cajita::SplineData sd_c0; Cajita::evaluateSpline( local_mesh, x, sd_c0 ); Cajita::P2G::value( 1.0, sd_c0, k_c_sv ); - }); + } ); // Complete local scatter. Kokkos::Experimental::contribute( r_c, r_c_sv ); @@ -261,7 +258,7 @@ void g2p( const ExecutionSpace& exec_space, //---------------------------------------------------------------------------// // Correct particle positions. -template +template void correctParticlePositions( const ExecutionSpace& exec_space, const ProblemManagerType& pm, const double delta_t, @@ -287,35 +284,34 @@ void correctParticlePositions( const ExecutionSpace& exec_space, // Build the local mesh. auto local_mesh = - Cajita::createLocalMesh( *(pm.mesh()->localGrid()) ); + Cajita::createLocalMesh( *( pm.mesh()->localGrid() ) ); // Compute nodal correction. - auto local_cells = - pm.mesh()->localGrid()->indexSpace( - Cajita::Own(), Cajita::Cell(), Cajita::Local() ); + auto local_cells = pm.mesh()->localGrid()->indexSpace( + Cajita::Own(), Cajita::Cell(), Cajita::Local() ); Kokkos::parallel_for( "compute_position_correction", - Cajita::createExecutionPolicy(local_cells,exec_space), - KOKKOS_LAMBDA( const int i, const int j, const int k ){ + Cajita::createExecutionPolicy( local_cells, exec_space ), + KOKKOS_LAMBDA( const int i, const int j, const int k ) { // Get the cell center. - int idx[3] = {i,j,k}; + int idx[3] = { i, j, k }; double x[3]; local_mesh.coordinates( Cajita::Cell(), idx, x ); // Setup interpolation from cell center to nodes. - Cajita::SplineData sd_i; + Cajita::SplineData sd_i; Cajita::evaluateSpline( local_mesh, x, sd_i ); // Clamp the density outside the fluid. - double rho = ( k_c(i,j,k,0) > 0.0 ) - ? r_c(i,j,k,0) - : fmax( r_c(i,j,k,0), density ); + double rho = ( k_c( i, j, k, 0 ) > 0.0 ) + ? r_c( i, j, k, 0 ) + : fmax( r_c( i, j, k, 0 ), density ); // Compute correction. double correction = - -delta_t * delta_t * kappa * (1 - rho/density) / density; + -delta_t * delta_t * kappa * ( 1 - rho / density ) / density; Cajita::P2G::gradient( correction, sd_i, x_i_sv ); - }); + } ); // Complete local scatter. Kokkos::Experimental::contribute( x_i, x_i_sv ); @@ -328,47 +324,44 @@ void correctParticlePositions( const ExecutionSpace& exec_space, // Apply boundary condition to position correction. // Compute the velocity. - auto l2g = Cajita::IndexConversion::createL2G( - *(pm.mesh()->localGrid()), Cajita::Node() ); - auto local_nodes = - pm.mesh()->localGrid()->indexSpace( - Cajita::Ghost(), Cajita::Node(), Cajita::Local() ); + auto l2g = Cajita::IndexConversion::createL2G( *( pm.mesh()->localGrid() ), + Cajita::Node() ); + auto local_nodes = pm.mesh()->localGrid()->indexSpace( + Cajita::Ghost(), Cajita::Node(), Cajita::Local() ); Kokkos::parallel_for( - Cajita::createExecutionPolicy(local_nodes,exec_space), - KOKKOS_LAMBDA( const int li, const int lj, const int lk ){ + Cajita::createExecutionPolicy( local_nodes, exec_space ), + KOKKOS_LAMBDA( const int li, const int lj, const int lk ) { int gi, gj, gk; l2g( li, lj, lk, gi, gj, gk ); - bc( gi, gj, gk, - x_i(li,lj,lk,0), x_i(li,lj,lk,1), x_i(li,lj,lk,2) ); - }); + bc( gi, gj, gk, x_i( li, lj, lk, 0 ), x_i( li, lj, lk, 1 ), + x_i( li, lj, lk, 2 ) ); + } ); // Update particle positions. Kokkos::parallel_for( "correct_particles", - Kokkos::RangePolicy(exec_space,0,pm.numParticle()), - KOKKOS_LAMBDA( const int p ){ + Kokkos::RangePolicy( exec_space, 0, pm.numParticle() ), + KOKKOS_LAMBDA( const int p ) { // Get the particle position. - double x[3] = { x_p(p,0), x_p(p,1), x_p(p,2) }; + double x[3] = { x_p( p, 0 ), x_p( p, 1 ), x_p( p, 2 ) }; // Setup interpolation from the nodes. - Cajita::SplineData sd_i; + Cajita::SplineData sd_i; Cajita::evaluateSpline( local_mesh, x, sd_i ); // Correct the particle position. double delta_x[3]; Cajita::G2P::value( x_i, sd_i, delta_x ); for ( int d = 0; d < 3; ++d ) - x_p(p,d) += delta_x[d]; - }); + x_p( p, d ) += delta_x[d]; + } ); } //---------------------------------------------------------------------------// // Take a time step. -template -void step( const ExecutionSpace& exec_space, - const ProblemManagerType& pm, - const double delta_t, - const double gravity, +template +void step( const ExecutionSpace& exec_space, const ProblemManagerType& pm, + const double delta_t, const double gravity, const BoundaryCondition& bc ) { p2g( exec_space, pm ); diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp index 1847338..bf6986b 100644 --- a/src/ExaMPM_VTKDomainWriter.hpp +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -21,99 +21,111 @@ namespace ExaMPM namespace VTKDomainWriter { // Create filename -void writeDomainParallelFile(MPI_Comm comm, int time_step, std::string& basename) +void writeDomainParallelFile( MPI_Comm comm, int time_step, + std::string& basename ) { // Should only be called from a single rank int size; - MPI_Comm_size(comm, &size); + MPI_Comm_size( comm, &size ); std::stringstream filename; filename << basename << "_" << time_step << ".pvtu"; - FILE* file = fopen(filename.str().c_str(), "w"); - fprintf(file, "\n"); - fprintf(file, "\n"); - fprintf(file, "\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\t\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\t\n"); - fprintf(file, "\t\n"); - for(std::size_t i=0; i\n", basename.c_str(), time_step, i); - fprintf(file, "\n"); - fprintf(file, "\n"); - fclose(file); + FILE* file = fopen( filename.str().c_str(), "w" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "\t\n" ); + for ( std::size_t i = 0; i < size; ++i ) + fprintf( file, "\t\n", basename.c_str(), + time_step, i ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fclose( file ); } // Write VTU for domain (low corner, high corner) -void writeDomain(MPI_Comm comm, int time_step, std::array& domain_vertices, std::string& basename) +void writeDomain( MPI_Comm comm, int time_step, + std::array& domain_vertices, + std::string& basename ) { int rank; - MPI_Comm_rank(comm, &rank); - if(rank==1) - writeDomainParallelFile(comm, time_step, basename); + MPI_Comm_rank( comm, &rank ); + if ( rank == 1 ) + writeDomainParallelFile( comm, time_step, basename ); std::stringstream filename; // todo(sschulz): properly format, according to max rank filename << basename << "_" << time_step << "_" << rank << ".vtu"; - FILE* file = fopen(filename.str().c_str(), "w"); - fprintf(file, "\n"); - fprintf(file, "\n"); - fprintf(file, "\n"); - std::array vertices; - vertices[0*3+0] = domain_vertices[0]; - vertices[2*3+0] = domain_vertices[0]; - vertices[4*3+0] = domain_vertices[0]; - vertices[6*3+0] = domain_vertices[0]; - vertices[0*3+1] = domain_vertices[1]; - vertices[1*3+1] = domain_vertices[1]; - vertices[4*3+1] = domain_vertices[1]; - vertices[5*3+1] = domain_vertices[1]; - vertices[0*3+2] = domain_vertices[2]; - vertices[1*3+2] = domain_vertices[2]; - vertices[2*3+2] = domain_vertices[2]; - vertices[3*3+2] = domain_vertices[2]; - vertices[1*3+0] = domain_vertices[3]; - vertices[3*3+0] = domain_vertices[3]; - vertices[5*3+0] = domain_vertices[3]; - vertices[7*3+0] = domain_vertices[3]; - vertices[2*3+1] = domain_vertices[4]; - vertices[3*3+1] = domain_vertices[4]; - vertices[6*3+1] = domain_vertices[4]; - vertices[7*3+1] = domain_vertices[4]; - vertices[4*3+2] = domain_vertices[5]; - vertices[5*3+2] = domain_vertices[5]; - vertices[6*3+2] = domain_vertices[5]; - vertices[7*3+2] = domain_vertices[5]; - std::array connectivity = {0,1,2,3,4,5,6,7}; - fprintf(file, "\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\t\n"); - fprintf(file, "%d", rank); - fprintf(file, "\n\t\t\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\t\n"); - for(const double &vert : vertices) - fprintf(file, "%g ", vert); - fprintf(file, "\n\t\t\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\n"); - fprintf(file, "\t\t\n"); - for(const int &conn : connectivity) - fprintf(file, "%d ", conn); - fprintf(file, "\n\t\t\n"); - fprintf(file, "\t\t\n"); - fprintf(file, "8\n"); - fprintf(file, "\n\t\t\n"); - fprintf(file, "\t\t\n"); - fprintf(file, "11\n"); - fprintf(file, "\n\t\t\n"); - fprintf(file, "\t\n"); - fprintf(file, "\n"); - fprintf(file, "\n"); - fprintf(file, "\n"); - fclose(file); + FILE* file = fopen( filename.str().c_str(), "w" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + std::array vertices; + vertices[0 * 3 + 0] = domain_vertices[0]; + vertices[2 * 3 + 0] = domain_vertices[0]; + vertices[4 * 3 + 0] = domain_vertices[0]; + vertices[6 * 3 + 0] = domain_vertices[0]; + vertices[0 * 3 + 1] = domain_vertices[1]; + vertices[1 * 3 + 1] = domain_vertices[1]; + vertices[4 * 3 + 1] = domain_vertices[1]; + vertices[5 * 3 + 1] = domain_vertices[1]; + vertices[0 * 3 + 2] = domain_vertices[2]; + vertices[1 * 3 + 2] = domain_vertices[2]; + vertices[2 * 3 + 2] = domain_vertices[2]; + vertices[3 * 3 + 2] = domain_vertices[2]; + vertices[1 * 3 + 0] = domain_vertices[3]; + vertices[3 * 3 + 0] = domain_vertices[3]; + vertices[5 * 3 + 0] = domain_vertices[3]; + vertices[7 * 3 + 0] = domain_vertices[3]; + vertices[2 * 3 + 1] = domain_vertices[4]; + vertices[3 * 3 + 1] = domain_vertices[4]; + vertices[6 * 3 + 1] = domain_vertices[4]; + vertices[7 * 3 + 1] = domain_vertices[4]; + vertices[4 * 3 + 2] = domain_vertices[5]; + vertices[5 * 3 + 2] = domain_vertices[5]; + vertices[6 * 3 + 2] = domain_vertices[5]; + vertices[7 * 3 + 2] = domain_vertices[5]; + std::array connectivity = { 0, 1, 2, 3, 4, 5, 6, 7 }; + fprintf( file, "\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "%d", rank ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + for ( const double& vert : vertices ) + fprintf( file, "%g ", vert ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\t\t\n" ); + for ( const int& conn : connectivity ) + fprintf( file, "%d ", conn ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "8\n" ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\t\n" ); + fprintf( file, "11\n" ); + fprintf( file, "\n\t\t\n" ); + fprintf( file, "\t\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fprintf( file, "\n" ); + fclose( file ); } // Write PVTU diff --git a/src/ExaMPM_VelocityInterpolation.hpp b/src/ExaMPM_VelocityInterpolation.hpp index 3e3a441..d6050fb 100644 --- a/src/ExaMPM_VelocityInterpolation.hpp +++ b/src/ExaMPM_VelocityInterpolation.hpp @@ -12,16 +12,16 @@ #ifndef EXAMPM_VELOCITYINTERPOLATION_HPP #define EXAMPM_VELOCITYINTERPOLATION_HPP -#include #include +#include #include #include #include -#include #include +#include namespace ExaMPM { @@ -29,22 +29,18 @@ namespace APIC { //---------------------------------------------------------------------------// // Inertial tensor scale factor. -template -KOKKOS_INLINE_FUNCTION -typename SplineDataType::scalar_type -inertialScaling( +template +KOKKOS_INLINE_FUNCTION typename SplineDataType::scalar_type inertialScaling( const SplineDataType& sd, - typename std::enable_if<(2==SplineDataType::order),void*>::type = 0) + typename std::enable_if<( 2 == SplineDataType::order ), void*>::type = 0 ) { return 4.0 / ( sd.dx[0] * sd.dx[0] ); } -template -KOKKOS_INLINE_FUNCTION -typename SplineDataType::scalar_type -inertialScaling( +template +KOKKOS_INLINE_FUNCTION typename SplineDataType::scalar_type inertialScaling( const SplineDataType& sd, - typename std::enable_if<(3==SplineDataType::order),void*>::type = 0) + typename std::enable_if<( 3 == SplineDataType::order ), void*>::type = 0 ) { return 3.0 / ( sd.dx[0] * sd.dx[0] ); } @@ -52,17 +48,16 @@ inertialScaling( //---------------------------------------------------------------------------// // Interpolate particle momentum to the nodes. (Second and Third order // splines) -template -KOKKOS_INLINE_FUNCTION -void p2g( - const typename MomentumView::original_value_type m_p, - const typename MomentumView::original_value_type u_p[3], - const typename MomentumView::original_value_type B_p[3][3], - const SplineDataType& sd, - const MomentumView& node_momentum, - typename std::enable_if< - (Cajita::isNode::value && - (SplineDataType::order==2 || SplineDataType::order==3)),void*>::type = 0 ) +template +KOKKOS_INLINE_FUNCTION void +p2g( const typename MomentumView::original_value_type m_p, + const typename MomentumView::original_value_type u_p[3], + const typename MomentumView::original_value_type B_p[3][3], + const SplineDataType& sd, const MomentumView& node_momentum, + typename std::enable_if< + ( Cajita::isNode::value && + ( SplineDataType::order == 2 || SplineDataType::order == 3 ) ), + void*>::type = 0 ) { static_assert( Cajita::P2G::is_scatter_view::value, "P2G requires a Kokkos::ScatterView" ); @@ -91,34 +86,29 @@ void p2g( DenseLinearAlgebra::matVecMultiply( B_p, distance, B_p_d ); // Weight times mass. - wm_ip = sd.w[Dim::I][i] * - sd.w[Dim::J][j] * - sd.w[Dim::K][k] * - m_p; + wm_ip = + sd.w[Dim::I][i] * sd.w[Dim::J][j] * sd.w[Dim::K][k] * m_p; // Interpolate particle momentum to the entity. for ( int d = 0; d < 3; ++d ) - momentum_access( sd.s[Dim::I][i], - sd.s[Dim::J][j], - sd.s[Dim::K][k], - d ) += + momentum_access( sd.s[Dim::I][i], sd.s[Dim::J][j], + sd.s[Dim::K][k], d ) += wm_ip * ( u_p[d] + D_p_inv * B_p_d[d] ); } } //---------------------------------------------------------------------------// // Interpolate particle momentum to the nodes. (First order splines) -template -KOKKOS_INLINE_FUNCTION -void p2g( - const typename MomentumView::original_value_type m_p, - const typename MomentumView::original_value_type u_p[3], - const typename MomentumView::original_value_type B_p[3][3], - const SplineDataType& sd, - const MomentumView& node_momentum, - typename std::enable_if< - (Cajita::isNode::value && - (SplineDataType::order==1)),void*>::type = 0 ) +template +KOKKOS_INLINE_FUNCTION void +p2g( const typename MomentumView::original_value_type m_p, + const typename MomentumView::original_value_type u_p[3], + const typename MomentumView::original_value_type B_p[3][3], + const SplineDataType& sd, const MomentumView& node_momentum, + typename std::enable_if< + ( Cajita::isNode::value && + ( SplineDataType::order == 1 ) ), + void*>::type = 0 ) { static_assert( Cajita::P2G::is_scatter_view::value, "P2G requires a Kokkos::ScatterView" ); @@ -135,48 +125,38 @@ void p2g( for ( int k = 0; k < SplineDataType::num_knot; ++k ) { // Weight times mass. - wm_ip = sd.w[Dim::I][i] * - sd.w[Dim::J][j] * - sd.w[Dim::K][k] * - m_p; + wm_ip = + sd.w[Dim::I][i] * sd.w[Dim::J][j] * sd.w[Dim::K][k] * m_p; // Weight gradient times mass. - gm_ip[0] = sd.g[Dim::I][i] * - sd.w[Dim::J][j] * - sd.w[Dim::K][k] * - m_p; - gm_ip[1] = sd.w[Dim::I][i] * - sd.g[Dim::J][j] * - sd.w[Dim::K][k] * - m_p; - gm_ip[2] = sd.w[Dim::I][i] * - sd.w[Dim::J][j] * - sd.g[Dim::K][k] * - m_p; + gm_ip[0] = + sd.g[Dim::I][i] * sd.w[Dim::J][j] * sd.w[Dim::K][k] * m_p; + gm_ip[1] = + sd.w[Dim::I][i] * sd.g[Dim::J][j] * sd.w[Dim::K][k] * m_p; + gm_ip[2] = + sd.w[Dim::I][i] * sd.w[Dim::J][j] * sd.g[Dim::K][k] * m_p; // Compute the action of B_p on the gradient. DenseLinearAlgebra::matVecMultiply( B_p, gm_ip, B_g_d ); // Interpolate particle momentum to the entity. for ( int d = 0; d < 3; ++d ) - momentum_access( sd.s[Dim::I][i], - sd.s[Dim::J][j], - sd.s[Dim::K][k], - d ) += wm_ip * u_p[d] + B_g_d[d]; + momentum_access( sd.s[Dim::I][i], sd.s[Dim::J][j], + sd.s[Dim::K][k], d ) += + wm_ip * u_p[d] + B_g_d[d]; } } //---------------------------------------------------------------------------// // Interpolate grid node velocity to the particle. -template -KOKKOS_INLINE_FUNCTION -void g2p( - const VelocityView& node_velocity, - const SplineDataType& sd, - typename VelocityView::value_type u_p[3], - typename VelocityView::value_type B_p[3][3], - typename std::enable_if< - Cajita::isNode::value,void*>::type = 0 ) +template +KOKKOS_INLINE_FUNCTION void +g2p( const VelocityView& node_velocity, const SplineDataType& sd, + typename VelocityView::value_type u_p[3], + typename VelocityView::value_type B_p[3][3], + typename std::enable_if< + Cajita::isNode::value, + void*>::type = 0 ) { using value_type = typename VelocityView::value_type; @@ -200,11 +180,8 @@ void g2p( // Update velocity. for ( int d = 0; d < 3; ++d ) u_p[d] += - w_ip * - node_velocity( sd.s[Dim::I][i], - sd.s[Dim::J][j], - sd.s[Dim::K][k], - d ); + w_ip * node_velocity( sd.s[Dim::I][i], sd.s[Dim::J][j], + sd.s[Dim::K][k], d ); // Physical distance to entity. distance[Dim::I] = sd.d[Dim::I][i]; @@ -214,12 +191,11 @@ void g2p( // Update affine matrix. for ( int d0 = 0; d0 < 3; ++d0 ) for ( int d1 = 0; d1 < 3; ++d1 ) - B_p[d0][d1] += w_ip * - node_velocity( sd.s[Dim::I][i], - sd.s[Dim::J][j], - sd.s[Dim::K][k], - d0 ) * - distance[d1]; + B_p[d0][d1] += + w_ip * + node_velocity( sd.s[Dim::I][i], sd.s[Dim::J][j], + sd.s[Dim::K][k], d0 ) * + distance[d1]; } } From ee32eebef2842443697462db208b35ce44afe508 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Mon, 26 Jul 2021 10:21:36 -0600 Subject: [PATCH 29/64] CI: use modified cabana for now. --- .github/workflows/CI.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 65349a7..ade6697 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -33,8 +33,8 @@ jobs: - name: Checkout Cabana uses: actions/checkout@v2.2.0 with: - repository: ECP-copa/Cabana - ref: master + repository: aetx/Cabana + ref: all_lb path: Cabana - name: Build Cabana working-directory: Cabana From d03eb083320983211d1682284321d29fc0b5e851 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 18:58:48 +0200 Subject: [PATCH 30/64] disable ALL VTK output --- src/ExaMPM_LoadBalancer.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index 06e4490..c13e9b2 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -100,7 +100,7 @@ class LoadBalancer _liball->getVertices(); // todo(sschulz): The official VTK routine seems to create mangled files // on my system. - _liball->printVTKoutlines( t ); + // _liball->printVTKoutlines( t ); std::array vertices; for ( std::size_t d = 0; d < 3; ++d ) vertices[d] = static_cast( From 697ff1093cbb7fa9a87aa69ae1bbc31af4b4ea77 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 26 Jul 2021 19:00:05 +0200 Subject: [PATCH 31/64] fix warning in printf --- src/ExaMPM_VTKDomainWriter.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp index bf6986b..6e8b485 100644 --- a/src/ExaMPM_VTKDomainWriter.hpp +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -42,7 +42,7 @@ void writeDomainParallelFile( MPI_Comm comm, int time_step, "NumberOfComponents=\"3\"/>\n" ); fprintf( file, "\t\n" ); for ( std::size_t i = 0; i < size; ++i ) - fprintf( file, "\t\n", basename.c_str(), + fprintf( file, "\t\n", basename.c_str(), time_step, i ); fprintf( file, "\n" ); fprintf( file, "\n" ); From 490b3df11faebacbb10ceab7f2b3b65876373f0d Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Tue, 27 Jul 2021 18:47:48 +0200 Subject: [PATCH 32/64] fix documentation in VTKDomainWriter --- src/ExaMPM_VTKDomainWriter.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp index 6e8b485..6b92246 100644 --- a/src/ExaMPM_VTKDomainWriter.hpp +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -20,7 +20,7 @@ namespace ExaMPM { namespace VTKDomainWriter { -// Create filename +// Write PVTU void writeDomainParallelFile( MPI_Comm comm, int time_step, std::string& basename ) { @@ -128,7 +128,6 @@ void writeDomain( MPI_Comm comm, int time_step, fclose( file ); } -// Write PVTU } // end namespace VTKDomainWriter } // end namespace ExaMPM #endif // EXAMPM_VTKDOMAINWRITER_HPP From 47213d7d991dbe40afafd9e7511e73aaff439989 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Wed, 28 Jul 2021 16:49:47 +0200 Subject: [PATCH 33/64] update constness --- src/ExaMPM_LoadBalancer.hpp | 2 +- src/ExaMPM_Mesh.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index c13e9b2..55a68e3 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -30,7 +30,7 @@ class LoadBalancer public: using mesh_type = Mesh; // todo(sschulz): Allow for arbitrary dimension - LoadBalancer( MPI_Comm comm, const std::shared_ptr& mesh ) + LoadBalancer( MPI_Comm comm, std::shared_ptr& mesh ) : _comm( comm ) , _mesh( mesh ) { diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index c921a4a..795c01f 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -111,7 +111,7 @@ class Mesh } // Get the mutable global grid. - Cajita::GlobalGrid>& mutGlobalGrid() const + Cajita::GlobalGrid>& mutGlobalGrid() { return _local_grid->mutGlobalGrid(); } From 00c9239ab32c7c4a58e3e71b757fd3deb4e0a75e Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 29 Jul 2021 17:19:47 +0200 Subject: [PATCH 34/64] update for cabana refactoring --- src/ExaMPM_LoadBalancer.hpp | 19 ++++++++++--------- src/ExaMPM_Mesh.hpp | 4 ++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index 55a68e3..353fa0d 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -42,10 +42,10 @@ class LoadBalancer // auto global_grid = _mesh->localGrid()->globalGrid(); std::vector block_id( 3, 0 ); for ( std::size_t i = 0; i < 3; ++i ) - block_id.at( i ) = _mesh->mutGlobalGrid().dimBlockId( i ); + block_id.at( i ) = _mesh->globalGrid().dimBlockId( i ); std::vector blocks_per_dim( 3, 0 ); for ( std::size_t i = 0; i < 3; ++i ) - blocks_per_dim.at( i ) = _mesh->mutGlobalGrid().dimNumBlock( i ); + blocks_per_dim.at( i ) = _mesh->globalGrid().dimNumBlock( i ); _liball->setProcGridParams( block_id, blocks_per_dim ); std::vector min_domain_size( 3, 0 ); for ( std::size_t i = 0; i < 3; ++i ) @@ -83,10 +83,11 @@ class LoadBalancer cell_index_hi[d] = std::rint( updated_vertices.at( 1 )[d] / _mesh->cellSize() ); for ( std::size_t d = 0; d < 3; ++d ) - _mesh->mutGlobalGrid().setGlobalOffset( d, cell_index_lo[d] ); + _mesh->globalGrid().setGlobalOffset( d, cell_index_lo[d] ); + std::array num_cell; for ( std::size_t d = 0; d < 3; ++d ) - _mesh->mutGlobalGrid().setOwnedNumCell( - d, ( cell_index_hi[d] - cell_index_lo[d] ) ); + num_cell[d] = cell_index_hi[d] - cell_index_lo[d]; + _mesh->globalGrid().setOwnedNumCell( num_cell ); _liball->setVertices( updated_vertices ); pm->updateMesh( _mesh ); } @@ -103,13 +104,13 @@ class LoadBalancer // _liball->printVTKoutlines( t ); std::array vertices; for ( std::size_t d = 0; d < 3; ++d ) - vertices[d] = static_cast( - _mesh->mutGlobalGrid().globalOffset( d ) ) * - _mesh->cellSize(); + vertices[d] = + static_cast( _mesh->globalGrid().globalOffset( d ) ) * + _mesh->cellSize(); for ( std::size_t d = 3; d < 6; ++d ) vertices[d] = vertices[d - 3] + static_cast( - _mesh->mutGlobalGrid().ownedNumCell( d - 3 ) ) * + _mesh->globalGrid().ownedNumCell( d - 3 ) ) * _mesh->cellSize(); VTKDomainWriter::writeDomain( _comm, t, vertices, vtk_actual_domain_basename ); diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index 795c01f..09754d5 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -111,9 +111,9 @@ class Mesh } // Get the mutable global grid. - Cajita::GlobalGrid>& mutGlobalGrid() + Cajita::GlobalGrid>& globalGrid() { - return _local_grid->mutGlobalGrid(); + return _local_grid->globalGrid(); } // Get the cell size. From 6e1389b42aeb1130a28cc9e9549de2709f093dbe Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 29 Jul 2021 17:20:03 +0200 Subject: [PATCH 35/64] update format --- src/ExaMPM_VTKDomainWriter.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp index 6b92246..b23c5d7 100644 --- a/src/ExaMPM_VTKDomainWriter.hpp +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -42,8 +42,8 @@ void writeDomainParallelFile( MPI_Comm comm, int time_step, "NumberOfComponents=\"3\"/>\n" ); fprintf( file, "\t\n" ); for ( std::size_t i = 0; i < size; ++i ) - fprintf( file, "\t\n", basename.c_str(), - time_step, i ); + fprintf( file, "\t\n", + basename.c_str(), time_step, i ); fprintf( file, "\n" ); fprintf( file, "\n" ); fclose( file ); From f1c50b91ccdf10b8977025398c6d81c285974457 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 29 Jul 2021 20:40:36 +0200 Subject: [PATCH 36/64] update to new cabana api --- src/ExaMPM_LoadBalancer.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index 353fa0d..efe073a 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -75,19 +75,17 @@ class LoadBalancer _liball->balance(); std::vector> updated_vertices = _liball->getVertices(); - std::array cell_index_lo, cell_index_hi; + std::array cell_index_lo, cell_index_hi; for ( std::size_t d = 0; d < 3; ++d ) cell_index_lo[d] = std::rint( updated_vertices.at( 0 )[d] / _mesh->cellSize() ); for ( std::size_t d = 0; d < 3; ++d ) cell_index_hi[d] = std::rint( updated_vertices.at( 1 )[d] / _mesh->cellSize() ); - for ( std::size_t d = 0; d < 3; ++d ) - _mesh->globalGrid().setGlobalOffset( d, cell_index_lo[d] ); std::array num_cell; for ( std::size_t d = 0; d < 3; ++d ) num_cell[d] = cell_index_hi[d] - cell_index_lo[d]; - _mesh->globalGrid().setOwnedNumCell( num_cell ); + _mesh->globalGrid().setNumCellAndOffset( num_cell, cell_index_lo ); _liball->setVertices( updated_vertices ); pm->updateMesh( _mesh ); } From 57a52427d6666a50db107e1a173d1abddf82effa Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Thu, 5 Aug 2021 13:36:14 -0500 Subject: [PATCH 37/64] Update CI.yml --- .github/workflows/CI.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 60493cd..163336d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -33,8 +33,8 @@ jobs: - name: Checkout Cabana uses: actions/checkout@v2.2.0 with: - repository: aetx/Cabana - ref: all_lb + repository: ECP-copa/Cabana + ref: master path: Cabana - name: Build Cabana working-directory: Cabana From 078227bf205bc6d26dfba284519f560f2fa52933 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Fri, 13 Aug 2021 15:58:52 +0200 Subject: [PATCH 38/64] make ALL optional --- CMakeLists.txt | 17 ++++++++++++++++- src/CMakeLists.txt | 14 ++++++++++++-- src/ExaMPM_Solver.hpp | 15 ++++++++++++++- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 925f3ca..97bf6fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,22 @@ if( NOT Cabana_ENABLE_CAJITA ) message( FATAL_ERROR "Cabana must be compiled with Cajita" ) endif() find_package(Silo REQUIRED) -find_package(ALL 0.9.1 REQUIRED) + +# optional dependencies (taken from Cabana) +macro(ExaMPM_add_dependency) + cmake_parse_arguments(EXAMPM_DEPENDENCY "" "PACKAGE;VERSION" "" ${ARGN}) + find_package( ${EXAMPM_DEPENDENCY_PACKAGE} ${EXAMPM_DEPENDENCY_VERSION} QUIET ) + string(TOUPPER "${EXAMPM_DEPENDENCY_PACKAGE}" EXAMPM_DEPENDENCY_OPTION ) + option( + ExaMPM_REQUIRE_${EXAMPM_DEPENDENCY_OPTION} + "Require ExaMPM to build with ${EXAMPM_DEPENDENCY_PACKAGE} support" ${EXAMPM_DEPENDENCY_PACKAGE}_FOUND) + if(ExaMPM_REQUIRE_${EXAMPM_DEPENDENCY_OPTION}) + find_package( ${EXAMPM_DEPENDENCY_PACKAGE} ${EXAMPM_DEPENDENCY_VERSION} REQUIRED ) + endif() + set(ExaMPM_ENABLE_${EXAMPM_DEPENDENCY_OPTION} ${${EXAMPM_DEPENDENCY_PACKAGE}_FOUND}) +endmacro() + +ExaMPM_add_dependency( PACKAGE "ALL" VERSION 0.9.2 ) # find Clang Format find_package( CLANG_FORMAT 10 ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 14e9e0d..46f5c82 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,9 +8,16 @@ set(HEADERS ExaMPM_Solver.hpp ExaMPM_TimeIntegrator.hpp ExaMPM_Types.hpp + ExaMPM_VTKDomainWriter.hpp ExaMPM_VelocityInterpolation.hpp ) +if(ExaMPM_ENABLE_ALL) + list(APPEND HEADERS + ExaMPM_LoadBalancer.hpp + ) +endif() + set(SOURCES ExaMPM_Mesh.cpp ) @@ -20,8 +27,11 @@ add_library(exampm ${SOURCES}) target_link_libraries(exampm Cabana::cabanacore Cabana::Cajita - Silo::silo - ALL::ALL ) + Silo::silo ) + +if(ExaMPM_ENABLE_ALL) + target_link_libraries(exampm ALL::ALL) +endif() target_include_directories(exampm PUBLIC diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 2531a70..f932660 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -13,12 +13,15 @@ #define EXAMPM_SOLVER_HPP #include -#include #include #include #include #include +#ifdef ExaMPM_ENABLE_ALL +#include +#endif + #include #include @@ -68,7 +71,9 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); +#ifdef ExaMPM_ENABLE_ALL _lb = std::make_shared>( comm, _mesh ); +#endif } void solve( const double t_final, const int write_freq ) override @@ -78,7 +83,9 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); +#ifdef ExaMPM_ENABLE_ALL _lb->output( 0 ); +#endif int num_step = t_final / _dt; double delta_t = t_final / num_step; @@ -91,7 +98,9 @@ class Solver : public SolverBase TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); +#ifdef ExaMPM_ENABLE_ALL _lb->balance( _pm ); +#endif _pm->communicateParticles( _halo_min ); @@ -102,7 +111,9 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); +#ifdef ExaMPM_ENABLE_ALL _lb->output( t ); +#endif } time += delta_t; @@ -117,7 +128,9 @@ class Solver : public SolverBase std::shared_ptr> _mesh; std::shared_ptr> _pm; int _rank; +#ifdef ExaMPM_ENABLE_ALL std::shared_ptr> _lb; +#endif }; //---------------------------------------------------------------------------// From 34868512801460a8f0b2f462c3f4dabfaa47e8c8 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 23 Aug 2021 19:33:33 +0200 Subject: [PATCH 39/64] get loadbalancer to compile --- .github/workflows/CI.yml | 4 +- examples/dam_break.cpp | 2 +- examples/free_fall.cpp | 2 +- src/ExaMPM_LoadBalancer.hpp | 78 +++++-------------------------------- src/ExaMPM_Mesh.hpp | 28 ++++++++++--- src/ExaMPM_Solver.hpp | 56 +++++++++++++++----------- 6 files changed, 69 insertions(+), 101 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 163336d..f77f9fe 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -33,8 +33,8 @@ jobs: - name: Checkout Cabana uses: actions/checkout@v2.2.0 with: - repository: ECP-copa/Cabana - ref: master + repository: aetx/Cabana + ref: cajita_loadbalancer path: Cabana - name: Build Cabana working-directory: Cabana diff --git a/examples/dam_break.cpp b/examples/dam_break.cpp index e137445..b1fc862 100644 --- a/examples/dam_break.cpp +++ b/examples/dam_break.cpp @@ -87,7 +87,7 @@ void damBreak( const double cell_size, const int ppc, const int halo_size, int comm_size; MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); std::array ranks_per_dim = { 1, comm_size, 1 }; - Cajita::ManualPartitioner partitioner( ranks_per_dim ); + std::shared_ptr partitioner = std::make_shared( ranks_per_dim ); // Material properties. double bulk_modulus = 1.0e5; diff --git a/examples/free_fall.cpp b/examples/free_fall.cpp index 4b4eb19..079c084 100644 --- a/examples/free_fall.cpp +++ b/examples/free_fall.cpp @@ -86,7 +86,7 @@ void freeFall( const double cell_size, const int ppc, const int halo_size, int comm_size; MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); std::array ranks_per_dim = { 1, comm_size, 1 }; - Cajita::ManualPartitioner partitioner( ranks_per_dim ); + std::shared_ptr partitioner = std::make_shared( ranks_per_dim ); // Material properties. double bulk_modulus = 5.0e5; diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index efe073a..ca08fb4 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -16,7 +16,7 @@ #include #include -#include +#include #include @@ -35,58 +35,17 @@ class LoadBalancer , _mesh( mesh ) { MPI_Comm_rank( comm, &_rank ); - _liball = - std::make_shared>( ALL::TENSOR, 3, 0 ); - // todo(sschulz): Check if the code still crashes. - // For some reason only(!) the following line causes the code to crash - // auto global_grid = _mesh->localGrid()->globalGrid(); - std::vector block_id( 3, 0 ); - for ( std::size_t i = 0; i < 3; ++i ) - block_id.at( i ) = _mesh->globalGrid().dimBlockId( i ); - std::vector blocks_per_dim( 3, 0 ); - for ( std::size_t i = 0; i < 3; ++i ) - blocks_per_dim.at( i ) = _mesh->globalGrid().dimNumBlock( i ); - _liball->setProcGridParams( block_id, blocks_per_dim ); - std::vector min_domain_size( 3, 0 ); - for ( std::size_t i = 0; i < 3; ++i ) - min_domain_size.at( i ) = 3. * _mesh->cellSize(); - _liball->setMinDomainSize( min_domain_size ); - _liball->setCommunicator( _comm ); - _liball->setProcTag( _rank ); - _liball->setup(); - std::vector> lb_vertices( 2, - ALL::Point( 3 ) ); - for ( std::size_t d = 0; d < 3; ++d ) - lb_vertices.at( 0 )[d] = - _mesh->localGrid()->globalGrid().globalOffset( d ) * - _mesh->cellSize(); - for ( std::size_t d = 0; d < 3; ++d ) - lb_vertices.at( 1 )[d] = - lb_vertices.at( 0 )[d] + - _mesh->localGrid()->globalGrid().ownedNumCell( d ) * - _mesh->cellSize(); - _liball->setVertices( lb_vertices ); + _lb = std::make_shared>>( _mesh->localGrid()->globalGrid(), 3.*_mesh->cellSize() ); } // This will update the domain decomposition and also update the mesh void balance( std::shared_ptr> pm ) { - _liball->setWork( pm->numParticle() ); - _liball->balance(); - std::vector> updated_vertices = - _liball->getVertices(); - std::array cell_index_lo, cell_index_hi; - for ( std::size_t d = 0; d < 3; ++d ) - cell_index_lo[d] = - std::rint( updated_vertices.at( 0 )[d] / _mesh->cellSize() ); - for ( std::size_t d = 0; d < 3; ++d ) - cell_index_hi[d] = - std::rint( updated_vertices.at( 1 )[d] / _mesh->cellSize() ); - std::array num_cell; - for ( std::size_t d = 0; d < 3; ++d ) - num_cell[d] = cell_index_hi[d] - cell_index_lo[d]; - _mesh->globalGrid().setNumCellAndOffset( num_cell, cell_index_lo ); - _liball->setVertices( updated_vertices ); + double work = pm->numParticle(); + // todo(sschulz): How to save the partitioner, without requiring a shared ptr everywhere? + auto partitioner = + auto global_grid = _lb->createBalancedGlobalGrid( _mesh->globalMesh(), partitioner, work ) + _mesh->newGlobalGrid(global_grid); pm->updateMesh( _mesh ); } @@ -95,28 +54,9 @@ class LoadBalancer { std::string vtk_actual_domain_basename( "domain_act" ); std::string vtk_lb_domain_basename( "domain_lb" ); - std::vector> updated_vertices = - _liball->getVertices(); - // todo(sschulz): The official VTK routine seems to create mangled files - // on my system. - // _liball->printVTKoutlines( t ); - std::array vertices; - for ( std::size_t d = 0; d < 3; ++d ) - vertices[d] = - static_cast( _mesh->globalGrid().globalOffset( d ) ) * - _mesh->cellSize(); - for ( std::size_t d = 3; d < 6; ++d ) - vertices[d] = vertices[d - 3] + - static_cast( - _mesh->globalGrid().ownedNumCell( d - 3 ) ) * - _mesh->cellSize(); - VTKDomainWriter::writeDomain( _comm, t, vertices, + VTKDomainWriter::writeDomain( _comm, t, _lb->getVertices(), vtk_actual_domain_basename ); - for ( std::size_t d = 0; d < 3; ++d ) - vertices[d] = updated_vertices.at( 0 )[d]; - for ( std::size_t d = 3; d < 6; ++d ) - vertices[d] = updated_vertices.at( 1 )[d - 3]; - VTKDomainWriter::writeDomain( _comm, t, vertices, + VTKDomainWriter::writeDomain( _comm, t, _lb->getInternalVertices(), vtk_lb_domain_basename ); } diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index 09754d5..af94d8e 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -91,16 +91,23 @@ class Mesh } // Create the global mesh. - auto global_mesh = Cajita::createUniformGlobalMesh( + _global_mesh = Cajita::createUniformGlobalMesh( global_low_corner, global_high_corner, num_cell ); // Build the global grid. - auto global_grid = Cajita::createGlobalGrid( comm, global_mesh, + _global_grid = Cajita::createGlobalGrid( comm, _global_mesh, periodic, partitioner ); // Build the local grid. - int halo_width = std::max( minimum_halo_cell_width, halo_cell_width ); - _local_grid = Cajita::createLocalGrid( global_grid, halo_width ); + _halo_width = std::max( minimum_halo_cell_width, halo_cell_width ); + _local_grid = Cajita::createLocalGrid( _global_grid, _halo_width ); + } + + // Global grid updated and update local values + void newGlobalGrid( std::shared_ptr>>& global_grid ) + { + _global_grid = global_grid; + _local_grid = Cajita::createLocalGrid( _global_grid, _halo_width ); } // Get the local grid. @@ -111,9 +118,14 @@ class Mesh } // Get the mutable global grid. - Cajita::GlobalGrid>& globalGrid() + const std::shared_ptr>>& globalGrid() { - return _local_grid->globalGrid(); + return _global_grid; + } + + const std::shared_ptr>>& globalMesh() const + { + return _global_mesh; } // Get the cell size. @@ -136,6 +148,10 @@ class Mesh public: std::shared_ptr>> _local_grid; + std::shared_ptr>> _global_grid; + std::shared_ptr>> _global_mesh; + + int _halo_width; Kokkos::Array _min_domain_global_node_index; Kokkos::Array _max_domain_global_node_index; diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index f932660..1557ef7 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -17,10 +17,9 @@ #include #include #include +#include -#ifdef ExaMPM_ENABLE_ALL -#include -#endif +#include #include @@ -47,7 +46,7 @@ class Solver : public SolverBase Solver( MPI_Comm comm, const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::BlockPartitioner<3>& partitioner, + const std::shared_ptr& partitioner, const int halo_cell_width, const InitFunc& create_functor, const int particles_per_cell, const double bulk_modulus, const double density, const double gamma, const double kappa, @@ -57,9 +56,11 @@ class Solver : public SolverBase , _gravity( gravity ) , _bc( bc ) , _halo_min( 3 ) + , _comm(comm) + , _partitioner( partitioner) { _mesh = std::make_shared>( - global_bounding_box, global_num_cell, periodic, partitioner, + global_bounding_box, global_num_cell, periodic, *partitioner, halo_cell_width, _halo_min, comm ); _bc.min = _mesh->minDomainGlobalNodeIndex(); @@ -71,9 +72,7 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); -#ifdef ExaMPM_ENABLE_ALL - _lb = std::make_shared>( comm, _mesh ); -#endif + _lb = std::make_shared>>( _mesh->globalGrid(),3.* _mesh->cellSize() ); } void solve( const double t_final, const int write_freq ) override @@ -83,9 +82,16 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); -#ifdef ExaMPM_ENABLE_ALL - _lb->output( 0 ); -#endif + + std::string vtk_actual_domain_basename( "domain_act" ); + std::string vtk_lb_domain_basename( "domain_lb" ); + std::array vertices; + vertices = _lb->getVertices(); + VTKDomainWriter::writeDomain( _comm, 0, vertices, + vtk_actual_domain_basename ); + vertices = _lb->getInternalVertices(); + VTKDomainWriter::writeDomain( _comm, 0, vertices, + vtk_lb_domain_basename ); int num_step = t_final / _dt; double delta_t = t_final / num_step; @@ -96,11 +102,13 @@ class Solver : public SolverBase printf( "Step %d / %d\n", t + 1, num_step ); TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, - _bc ); + _bc ); -#ifdef ExaMPM_ENABLE_ALL - _lb->balance( _pm ); -#endif + double work = _pm->numParticle(); + // todo(sschulz): How to save the partitioner, without requiring a shared ptr everywhere? + auto global_grid = _lb->createBalancedGlobalGrid( _mesh->globalMesh(), *_partitioner, work ); + _mesh->newGlobalGrid(global_grid); + _pm->updateMesh( _mesh ); _pm->communicateParticles( _halo_min ); @@ -111,9 +119,13 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Position() ), _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); -#ifdef ExaMPM_ENABLE_ALL - _lb->output( t ); -#endif + + vertices = _lb->getVertices(); + VTKDomainWriter::writeDomain( _comm, 0, vertices, + vtk_actual_domain_basename ); + vertices = _lb->getInternalVertices(); + VTKDomainWriter::writeDomain( _comm, 0, vertices, + vtk_lb_domain_basename ); } time += delta_t; @@ -128,9 +140,9 @@ class Solver : public SolverBase std::shared_ptr> _mesh; std::shared_ptr> _pm; int _rank; -#ifdef ExaMPM_ENABLE_ALL - std::shared_ptr> _lb; -#endif + MPI_Comm _comm; + std::shared_ptr _partitioner; + std::shared_ptr>> _lb; }; //---------------------------------------------------------------------------// @@ -141,7 +153,7 @@ createSolver( const std::string& device, MPI_Comm comm, const Kokkos::Array& global_bounding_box, const std::array& global_num_cell, const std::array& periodic, - const Cajita::BlockPartitioner<3>& partitioner, + const std::shared_ptr& partitioner, const int halo_cell_width, const InitFunc& create_functor, const int particles_per_cell, const double bulk_modulus, const double density, const double gamma, const double kappa, From ff993d45be877e9dd11221b85ff8f570cc699a5e Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 23 Aug 2021 20:17:07 +0200 Subject: [PATCH 40/64] build ALL before cabana and require ALL in cabana --- .github/workflows/CI.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f77f9fe..ee7a5bc 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -30,6 +30,15 @@ jobs: cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/kokkos -DKokkos_CXX_STANDARD=14 -DKokkos_ENABLE_${{ matrix.backend }}=ON cmake --build build --parallel 2 cmake --install build + - name: Checkout ALL + run: | + git clone --depth 1 --branch master https://gitlab.jsc.fz-juelich.de/SLMS/loadbalancing ALL + - name: Build ALL + working-directory: ALL + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$HOME/ALL + cmake --build build --parallel 2 + cmake --install build - name: Checkout Cabana uses: actions/checkout@v2.2.0 with: @@ -39,16 +48,7 @@ jobs: - name: Build Cabana working-directory: Cabana run: | - cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos" -DCabana_REQUIRE_${{ matrix.backend }}=ON - cmake --build build --parallel 2 - cmake --install build - - name: Checkout ALL - run: | - git clone --depth 1 --branch master https://gitlab.jsc.fz-juelich.de/SLMS/loadbalancing ALL - - name: Build ALL - working-directory: ALL - run: | - cmake -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$HOME/ALL + cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos" -DCabana_REQUIRE_${{ matrix.backend }}=ON -DCabana_REQUIRE_ALL=ON cmake --build build --parallel 2 cmake --install build - name: Checkout code From 75726d42783f62755abaa15737f6837cb582f389 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 23 Aug 2021 20:18:25 +0200 Subject: [PATCH 41/64] make format --- examples/dam_break.cpp | 3 ++- examples/free_fall.cpp | 3 ++- src/ExaMPM_LoadBalancer.hpp | 13 ++++++++----- src/ExaMPM_Mesh.hpp | 20 +++++++++++++------- src/ExaMPM_Solver.hpp | 22 +++++++++++++--------- 5 files changed, 38 insertions(+), 23 deletions(-) diff --git a/examples/dam_break.cpp b/examples/dam_break.cpp index b1fc862..d7ff00e 100644 --- a/examples/dam_break.cpp +++ b/examples/dam_break.cpp @@ -87,7 +87,8 @@ void damBreak( const double cell_size, const int ppc, const int halo_size, int comm_size; MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); std::array ranks_per_dim = { 1, comm_size, 1 }; - std::shared_ptr partitioner = std::make_shared( ranks_per_dim ); + std::shared_ptr partitioner = + std::make_shared( ranks_per_dim ); // Material properties. double bulk_modulus = 1.0e5; diff --git a/examples/free_fall.cpp b/examples/free_fall.cpp index 079c084..4b51889 100644 --- a/examples/free_fall.cpp +++ b/examples/free_fall.cpp @@ -86,7 +86,8 @@ void freeFall( const double cell_size, const int ppc, const int halo_size, int comm_size; MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); std::array ranks_per_dim = { 1, comm_size, 1 }; - std::shared_ptr partitioner = std::make_shared( ranks_per_dim ); + std::shared_ptr partitioner = + std::make_shared( ranks_per_dim ); // Material properties. double bulk_modulus = 5.0e5; diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp index ca08fb4..ada2bb6 100644 --- a/src/ExaMPM_LoadBalancer.hpp +++ b/src/ExaMPM_LoadBalancer.hpp @@ -35,17 +35,20 @@ class LoadBalancer , _mesh( mesh ) { MPI_Comm_rank( comm, &_rank ); - _lb = std::make_shared>>( _mesh->localGrid()->globalGrid(), 3.*_mesh->cellSize() ); + _lb = std::make_shared>>( + _mesh->localGrid()->globalGrid(), 3. * _mesh->cellSize() ); } // This will update the domain decomposition and also update the mesh void balance( std::shared_ptr> pm ) { double work = pm->numParticle(); - // todo(sschulz): How to save the partitioner, without requiring a shared ptr everywhere? - auto partitioner = - auto global_grid = _lb->createBalancedGlobalGrid( _mesh->globalMesh(), partitioner, work ) - _mesh->newGlobalGrid(global_grid); + // todo(sschulz): How to save the partitioner, without requiring a + // shared ptr everywhere? + auto partitioner = auto global_grid = + _lb->createBalancedGlobalGrid( _mesh->globalMesh(), partitioner, + work ) + _mesh->newGlobalGrid( global_grid ); pm->updateMesh( _mesh ); } diff --git a/src/ExaMPM_Mesh.hpp b/src/ExaMPM_Mesh.hpp index af94d8e..bdf886a 100644 --- a/src/ExaMPM_Mesh.hpp +++ b/src/ExaMPM_Mesh.hpp @@ -95,8 +95,8 @@ class Mesh global_low_corner, global_high_corner, num_cell ); // Build the global grid. - _global_grid = Cajita::createGlobalGrid( comm, _global_mesh, - periodic, partitioner ); + _global_grid = Cajita::createGlobalGrid( comm, _global_mesh, periodic, + partitioner ); // Build the local grid. _halo_width = std::max( minimum_halo_cell_width, halo_cell_width ); @@ -104,7 +104,9 @@ class Mesh } // Global grid updated and update local values - void newGlobalGrid( std::shared_ptr>>& global_grid ) + void newGlobalGrid( + std::shared_ptr>>& + global_grid ) { _global_grid = global_grid; _local_grid = Cajita::createLocalGrid( _global_grid, _halo_width ); @@ -118,12 +120,14 @@ class Mesh } // Get the mutable global grid. - const std::shared_ptr>>& globalGrid() + const std::shared_ptr>>& + globalGrid() { return _global_grid; } - const std::shared_ptr>>& globalMesh() const + const std::shared_ptr>>& + globalMesh() const { return _global_mesh; } @@ -148,8 +152,10 @@ class Mesh public: std::shared_ptr>> _local_grid; - std::shared_ptr>> _global_grid; - std::shared_ptr>> _global_mesh; + std::shared_ptr>> + _global_grid; + std::shared_ptr>> + _global_mesh; int _halo_width; diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 1557ef7..d3f9400 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -56,8 +56,8 @@ class Solver : public SolverBase , _gravity( gravity ) , _bc( bc ) , _halo_min( 3 ) - , _comm(comm) - , _partitioner( partitioner) + , _comm( comm ) + , _partitioner( partitioner ) { _mesh = std::make_shared>( global_bounding_box, global_num_cell, periodic, *partitioner, @@ -72,7 +72,9 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); - _lb = std::make_shared>>( _mesh->globalGrid(),3.* _mesh->cellSize() ); + _lb = + std::make_shared>>( + _mesh->globalGrid(), 3. * _mesh->cellSize() ); } void solve( const double t_final, const int write_freq ) override @@ -102,12 +104,14 @@ class Solver : public SolverBase printf( "Step %d / %d\n", t + 1, num_step ); TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, - _bc ); + _bc ); double work = _pm->numParticle(); - // todo(sschulz): How to save the partitioner, without requiring a shared ptr everywhere? - auto global_grid = _lb->createBalancedGlobalGrid( _mesh->globalMesh(), *_partitioner, work ); - _mesh->newGlobalGrid(global_grid); + // todo(sschulz): How to save the partitioner, without requiring a + // shared ptr everywhere? + auto global_grid = _lb->createBalancedGlobalGrid( + _mesh->globalMesh(), *_partitioner, work ); + _mesh->newGlobalGrid( global_grid ); _pm->updateMesh( _mesh ); _pm->communicateParticles( _halo_min ); @@ -122,10 +126,10 @@ class Solver : public SolverBase vertices = _lb->getVertices(); VTKDomainWriter::writeDomain( _comm, 0, vertices, - vtk_actual_domain_basename ); + vtk_actual_domain_basename ); vertices = _lb->getInternalVertices(); VTKDomainWriter::writeDomain( _comm, 0, vertices, - vtk_lb_domain_basename ); + vtk_lb_domain_basename ); } time += delta_t; From 61bee26212272e5121b134e3230f5ad0a1b9103d Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 23 Aug 2021 20:22:54 +0200 Subject: [PATCH 42/64] make sure cabana finds all --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index ee7a5bc..852a441 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -48,7 +48,7 @@ jobs: - name: Build Cabana working-directory: Cabana run: | - cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos" -DCabana_REQUIRE_${{ matrix.backend }}=ON -DCabana_REQUIRE_ALL=ON + cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos:$HOME/ALL" -DCabana_REQUIRE_${{ matrix.backend }}=ON -DCabana_REQUIRE_ALL=ON cmake --build build --parallel 2 cmake --install build - name: Checkout code From 8f549db863ac58a44e0a9c203c9c9931270f83aa Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 23 Aug 2021 20:43:19 +0200 Subject: [PATCH 43/64] cmake syntax mistake --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 852a441..1aa93b3 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -48,7 +48,7 @@ jobs: - name: Build Cabana working-directory: Cabana run: | - cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos:$HOME/ALL" -DCabana_REQUIRE_${{ matrix.backend }}=ON -DCabana_REQUIRE_ALL=ON + cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos;$HOME/ALL" -DCabana_REQUIRE_${{ matrix.backend }}=ON -DCabana_REQUIRE_ALL=ON cmake --build build --parallel 2 cmake --install build - name: Checkout code From 901be364f7c685a041ff81cd98496fe3a730830f Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 23 Aug 2021 20:47:27 +0200 Subject: [PATCH 44/64] yaml syntax mistake --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1aa93b3..51b81e7 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -48,7 +48,7 @@ jobs: - name: Build Cabana working-directory: Cabana run: | - cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos;$HOME/ALL" -DCabana_REQUIRE_${{ matrix.backend }}=ON -DCabana_REQUIRE_ALL=ON + cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos;$HOME/ALL" -DCabana_REQUIRE_${{ matrix.backend }}=ON -DCabana_REQUIRE_ALL=ON cmake --build build --parallel 2 cmake --install build - name: Checkout code From ebdd9d6d896c4aed9644be51d7c697fd97fad98e Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 26 Aug 2021 10:31:34 +0200 Subject: [PATCH 45/64] make sure new mesh is actually used --- src/ExaMPM_ProblemManager.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ExaMPM_ProblemManager.hpp b/src/ExaMPM_ProblemManager.hpp index ae423dd..38ece11 100644 --- a/src/ExaMPM_ProblemManager.hpp +++ b/src/ExaMPM_ProblemManager.hpp @@ -150,6 +150,7 @@ class ProblemManager void updateMesh( const std::shared_ptr& mesh ) { + _mesh = mesh; auto node_vector_layout = Cajita::createArrayLayout( _mesh->localGrid(), 3, Cajita::Node() ); auto node_scalar_layout = From d415dcb736da13ce6925567f0f71eb340d97deb6 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 26 Aug 2021 10:32:30 +0200 Subject: [PATCH 46/64] add debug output --- src/ExaMPM_ParticleInit.hpp | 2 ++ src/ExaMPM_SiloParticleWriter.hpp | 3 +++ 2 files changed, 5 insertions(+) diff --git a/src/ExaMPM_ParticleInit.hpp b/src/ExaMPM_ParticleInit.hpp index 8b29b61..d03f927 100644 --- a/src/ExaMPM_ParticleInit.hpp +++ b/src/ExaMPM_ParticleInit.hpp @@ -67,6 +67,7 @@ void filterEmpties( const ExecutionSpace& exec_space, } ); particles.resize( local_num_create ); particles.shrinkToFit(); + printf("%s: particles.size(): %lu\n", __func__, particles.size()); } //---------------------------------------------------------------------------// @@ -202,6 +203,7 @@ void initializeParticles( const ExecSpace& exec_space, } }, local_num_create ); + printf("%s: local_num_create %d\n", __func__, local_num_create); // Filter empties. filterEmpties( exec_space, local_num_create, particle_created, particles ); diff --git a/src/ExaMPM_SiloParticleWriter.hpp b/src/ExaMPM_SiloParticleWriter.hpp index 99a332a..776e235 100644 --- a/src/ExaMPM_SiloParticleWriter.hpp +++ b/src/ExaMPM_SiloParticleWriter.hpp @@ -28,6 +28,7 @@ #include #include #include +#include namespace ExaMPM { @@ -413,6 +414,8 @@ void writeTimeStep( const GlobalGridType& global_grid, // Mirror the coordinates to the host. auto host_coords = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), view ); + printf("%s: coords size: %lu, extent %lu %lu %lu\n", __func__, coords.size(), coords.extent(0), coords.extent(1), coords.extent(2)); + printf("%s: host_coords size: %lu, extent %lu %lu %lu\n", __func__, host_coords.size(), host_coords.extent(0), host_coords.extent(1), host_coords.extent(2)); // Add the point mesh. std::string mesh_name = "particles"; From 3181c0f6f58eb08ee5d34f6442a51391731e5031 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 26 Aug 2021 11:01:22 +0200 Subject: [PATCH 47/64] update to new Cabana lb interface --- src/ExaMPM_Solver.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index d3f9400..081ca0a 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -73,7 +73,7 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); _lb = - std::make_shared>>( + std::make_shared>>( _comm, _mesh->globalGrid(), 3. * _mesh->cellSize() ); } From 3292ef75d0419ec6eebdc2e2db81ad3a0396da98 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 26 Aug 2021 11:01:31 +0200 Subject: [PATCH 48/64] Revert "add debug output" This reverts commit d415dcb736da13ce6925567f0f71eb340d97deb6. --- src/ExaMPM_ParticleInit.hpp | 2 -- src/ExaMPM_SiloParticleWriter.hpp | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/ExaMPM_ParticleInit.hpp b/src/ExaMPM_ParticleInit.hpp index d03f927..8b29b61 100644 --- a/src/ExaMPM_ParticleInit.hpp +++ b/src/ExaMPM_ParticleInit.hpp @@ -67,7 +67,6 @@ void filterEmpties( const ExecutionSpace& exec_space, } ); particles.resize( local_num_create ); particles.shrinkToFit(); - printf("%s: particles.size(): %lu\n", __func__, particles.size()); } //---------------------------------------------------------------------------// @@ -203,7 +202,6 @@ void initializeParticles( const ExecSpace& exec_space, } }, local_num_create ); - printf("%s: local_num_create %d\n", __func__, local_num_create); // Filter empties. filterEmpties( exec_space, local_num_create, particle_created, particles ); diff --git a/src/ExaMPM_SiloParticleWriter.hpp b/src/ExaMPM_SiloParticleWriter.hpp index 776e235..99a332a 100644 --- a/src/ExaMPM_SiloParticleWriter.hpp +++ b/src/ExaMPM_SiloParticleWriter.hpp @@ -28,7 +28,6 @@ #include #include #include -#include namespace ExaMPM { @@ -414,8 +413,6 @@ void writeTimeStep( const GlobalGridType& global_grid, // Mirror the coordinates to the host. auto host_coords = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), view ); - printf("%s: coords size: %lu, extent %lu %lu %lu\n", __func__, coords.size(), coords.extent(0), coords.extent(1), coords.extent(2)); - printf("%s: host_coords size: %lu, extent %lu %lu %lu\n", __func__, host_coords.size(), host_coords.extent(0), host_coords.extent(1), host_coords.extent(2)); // Add the point mesh. std::string mesh_name = "particles"; From aca70637236dd44553309857222f0cfa63a4d478 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 26 Aug 2021 11:03:37 +0200 Subject: [PATCH 49/64] format --- src/ExaMPM_Solver.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 081ca0a..93b71ed 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -73,8 +73,8 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); _lb = - std::make_shared>>( _comm, - _mesh->globalGrid(), 3. * _mesh->cellSize() ); + std::make_shared>>( + _comm, _mesh->globalGrid(), 3. * _mesh->cellSize() ); } void solve( const double t_final, const int write_freq ) override From 979a39e96b6dfa79b30faa690b88a75e3f71e78e Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 26 Aug 2021 11:03:59 +0200 Subject: [PATCH 50/64] fix output time step for domain output --- src/ExaMPM_Solver.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 93b71ed..25f5f59 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -125,10 +125,10 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::J() ) ); vertices = _lb->getVertices(); - VTKDomainWriter::writeDomain( _comm, 0, vertices, + VTKDomainWriter::writeDomain( _comm, t, vertices, vtk_actual_domain_basename ); vertices = _lb->getInternalVertices(); - VTKDomainWriter::writeDomain( _comm, 0, vertices, + VTKDomainWriter::writeDomain( _comm, t, vertices, vtk_lb_domain_basename ); } From c2e7061a79f22a46ce6c04fefcdd10f87eb9dcf7 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 26 Aug 2021 11:04:22 +0200 Subject: [PATCH 51/64] add todo regarding Silo writer and empty domains --- src/ExaMPM_SiloParticleWriter.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ExaMPM_SiloParticleWriter.hpp b/src/ExaMPM_SiloParticleWriter.hpp index 99a332a..6b55794 100644 --- a/src/ExaMPM_SiloParticleWriter.hpp +++ b/src/ExaMPM_SiloParticleWriter.hpp @@ -413,6 +413,8 @@ void writeTimeStep( const GlobalGridType& global_grid, // Mirror the coordinates to the host. auto host_coords = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), view ); + // todo(sschulz): Handle empty domains. Without kokkos debug build this + // seems to work. But it should still be handled properly. // Add the point mesh. std::string mesh_name = "particles"; From b91bfb6449a22230072f3b5eb701e87b150c3ae8 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Sun, 29 Aug 2021 11:27:32 +0200 Subject: [PATCH 52/64] use Cabana_ENABLE_ALL and clean up --- CMakeLists.txt | 16 -------- src/CMakeLists.txt | 14 ++----- src/ExaMPM_LoadBalancer.hpp | 73 ------------------------------------- src/ExaMPM_Solver.hpp | 15 +++++++- 4 files changed, 17 insertions(+), 101 deletions(-) delete mode 100644 src/ExaMPM_LoadBalancer.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 97bf6fc..a0a242f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,22 +16,6 @@ if( NOT Cabana_ENABLE_CAJITA ) endif() find_package(Silo REQUIRED) -# optional dependencies (taken from Cabana) -macro(ExaMPM_add_dependency) - cmake_parse_arguments(EXAMPM_DEPENDENCY "" "PACKAGE;VERSION" "" ${ARGN}) - find_package( ${EXAMPM_DEPENDENCY_PACKAGE} ${EXAMPM_DEPENDENCY_VERSION} QUIET ) - string(TOUPPER "${EXAMPM_DEPENDENCY_PACKAGE}" EXAMPM_DEPENDENCY_OPTION ) - option( - ExaMPM_REQUIRE_${EXAMPM_DEPENDENCY_OPTION} - "Require ExaMPM to build with ${EXAMPM_DEPENDENCY_PACKAGE} support" ${EXAMPM_DEPENDENCY_PACKAGE}_FOUND) - if(ExaMPM_REQUIRE_${EXAMPM_DEPENDENCY_OPTION}) - find_package( ${EXAMPM_DEPENDENCY_PACKAGE} ${EXAMPM_DEPENDENCY_VERSION} REQUIRED ) - endif() - set(ExaMPM_ENABLE_${EXAMPM_DEPENDENCY_OPTION} ${${EXAMPM_DEPENDENCY_PACKAGE}_FOUND}) -endmacro() - -ExaMPM_add_dependency( PACKAGE "ALL" VERSION 0.9.2 ) - # find Clang Format find_package( CLANG_FORMAT 10 ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 46f5c82..b264cb1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,27 +12,21 @@ set(HEADERS ExaMPM_VelocityInterpolation.hpp ) -if(ExaMPM_ENABLE_ALL) - list(APPEND HEADERS - ExaMPM_LoadBalancer.hpp - ) -endif() - set(SOURCES ExaMPM_Mesh.cpp ) add_library(exampm ${SOURCES}) +if(Cabana_ENABLE_ALL) + target_compile_definitions(exampm PUBLIC Cabana_ENABLE_ALL) +endif() + target_link_libraries(exampm Cabana::cabanacore Cabana::Cajita Silo::silo ) -if(ExaMPM_ENABLE_ALL) - target_link_libraries(exampm ALL::ALL) -endif() - target_include_directories(exampm PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/src/ExaMPM_LoadBalancer.hpp b/src/ExaMPM_LoadBalancer.hpp deleted file mode 100644 index ada2bb6..0000000 --- a/src/ExaMPM_LoadBalancer.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/**************************************************************************** - * Copyright (c) 2018-2020 by the ExaMPM authors * - * All rights reserved. * - * * - * This file is part of the ExaMPM library. ExaMPM is distributed under a * - * BSD 3-clause license. For the licensing terms see the LICENSE file in * - * the top-level directory. * - * * - * SPDX-License-Identifier: BSD-3-Clause * - ****************************************************************************/ - -#ifndef EXAMPM_LOADBALANCER_HPP -#define EXAMPM_LOADBALANCER_HPP - -#include -#include -#include - -#include - -#include - -#include - -namespace ExaMPM -{ -template -class LoadBalancer -{ - public: - using mesh_type = Mesh; - // todo(sschulz): Allow for arbitrary dimension - LoadBalancer( MPI_Comm comm, std::shared_ptr& mesh ) - : _comm( comm ) - , _mesh( mesh ) - { - MPI_Comm_rank( comm, &_rank ); - _lb = std::make_shared>>( - _mesh->localGrid()->globalGrid(), 3. * _mesh->cellSize() ); - } - - // This will update the domain decomposition and also update the mesh - void balance( std::shared_ptr> pm ) - { - double work = pm->numParticle(); - // todo(sschulz): How to save the partitioner, without requiring a - // shared ptr everywhere? - auto partitioner = auto global_grid = - _lb->createBalancedGlobalGrid( _mesh->globalMesh(), partitioner, - work ) - _mesh->newGlobalGrid( global_grid ); - pm->updateMesh( _mesh ); - } - - // Output the actual and internal load balancing grid to VTK files - void output( const int t ) const - { - std::string vtk_actual_domain_basename( "domain_act" ); - std::string vtk_lb_domain_basename( "domain_lb" ); - VTKDomainWriter::writeDomain( _comm, t, _lb->getVertices(), - vtk_actual_domain_basename ); - VTKDomainWriter::writeDomain( _comm, t, _lb->getInternalVertices(), - vtk_lb_domain_basename ); - } - - private: - MPI_Comm _comm; - std::shared_ptr _mesh; - std::shared_ptr> _liball; - int _rank; -}; -} // end namespace ExaMPM -#endif // EXAMPM_LOADBALANCER_HPP diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 25f5f59..2f9a397 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -19,7 +19,9 @@ #include #include +#ifdef Cabana_ENABLE_ALL #include +#endif #include @@ -72,9 +74,11 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); +#ifdef Cabana_ENABLE_ALL _lb = std::make_shared>>( _comm, _mesh->globalGrid(), 3. * _mesh->cellSize() ); +#endif } void solve( const double t_final, const int write_freq ) override @@ -88,12 +92,15 @@ class Solver : public SolverBase std::string vtk_actual_domain_basename( "domain_act" ); std::string vtk_lb_domain_basename( "domain_lb" ); std::array vertices; + +#ifdef Cabana_ENABLE_ALL vertices = _lb->getVertices(); VTKDomainWriter::writeDomain( _comm, 0, vertices, vtk_actual_domain_basename ); vertices = _lb->getInternalVertices(); VTKDomainWriter::writeDomain( _comm, 0, vertices, vtk_lb_domain_basename ); +#endif int num_step = t_final / _dt; double delta_t = t_final / num_step; @@ -106,13 +113,13 @@ class Solver : public SolverBase TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); +#ifdef Cabana_ENABLE_ALL double work = _pm->numParticle(); - // todo(sschulz): How to save the partitioner, without requiring a - // shared ptr everywhere? auto global_grid = _lb->createBalancedGlobalGrid( _mesh->globalMesh(), *_partitioner, work ); _mesh->newGlobalGrid( global_grid ); _pm->updateMesh( _mesh ); +#endif _pm->communicateParticles( _halo_min ); @@ -124,12 +131,14 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); +#ifdef Cabana_ENABLE_ALL vertices = _lb->getVertices(); VTKDomainWriter::writeDomain( _comm, t, vertices, vtk_actual_domain_basename ); vertices = _lb->getInternalVertices(); VTKDomainWriter::writeDomain( _comm, t, vertices, vtk_lb_domain_basename ); +#endif } time += delta_t; @@ -146,7 +155,9 @@ class Solver : public SolverBase int _rank; MPI_Comm _comm; std::shared_ptr _partitioner; +#ifdef Cabana_ENABLE_ALL std::shared_ptr>> _lb; +#endif }; //---------------------------------------------------------------------------// From dcc9337dd3461cffa5849e9a095942023ee619b3 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Mon, 30 Aug 2021 17:48:16 +0200 Subject: [PATCH 53/64] explicitly setting Cabana_ENABLE_ALL no longer required --- src/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b264cb1..42fced2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,10 +18,6 @@ set(SOURCES add_library(exampm ${SOURCES}) -if(Cabana_ENABLE_ALL) - target_compile_definitions(exampm PUBLIC Cabana_ENABLE_ALL) -endif() - target_link_libraries(exampm Cabana::cabanacore Cabana::Cajita From 0981e1fb53a884889e13288b9b37d15e2fc343f2 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Wed, 1 Sep 2021 19:07:50 +0200 Subject: [PATCH 54/64] fix warning --- src/ExaMPM_VTKDomainWriter.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp index b23c5d7..e50c7eb 100644 --- a/src/ExaMPM_VTKDomainWriter.hpp +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -41,7 +41,7 @@ void writeDomainParallelFile( MPI_Comm comm, int time_step, fprintf( file, "\t\t\n" ); fprintf( file, "\t\n" ); - for ( std::size_t i = 0; i < size; ++i ) + for ( int i = 0; i < size; ++i ) fprintf( file, "\t\n", basename.c_str(), time_step, i ); fprintf( file, "\n" ); From 17d3ebca646ced62972e40aed5c9c1d7350c53f8 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Wed, 1 Sep 2021 19:11:49 +0200 Subject: [PATCH 55/64] fix warnings --- src/ExaMPM_VTKDomainWriter.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp index e50c7eb..29afd12 100644 --- a/src/ExaMPM_VTKDomainWriter.hpp +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -42,7 +42,7 @@ void writeDomainParallelFile( MPI_Comm comm, int time_step, "NumberOfComponents=\"3\"/>\n" ); fprintf( file, "\t\n" ); for ( int i = 0; i < size; ++i ) - fprintf( file, "\t\n", + fprintf( file, "\t\n", basename.c_str(), time_step, i ); fprintf( file, "\n" ); fprintf( file, "\n" ); From 60143717f1bc01920b2a62aad516b7946e910643 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Wed, 1 Sep 2021 19:47:00 +0200 Subject: [PATCH 56/64] make format --- src/ExaMPM_VTKDomainWriter.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ExaMPM_VTKDomainWriter.hpp b/src/ExaMPM_VTKDomainWriter.hpp index 29afd12..ff806d6 100644 --- a/src/ExaMPM_VTKDomainWriter.hpp +++ b/src/ExaMPM_VTKDomainWriter.hpp @@ -42,8 +42,8 @@ void writeDomainParallelFile( MPI_Comm comm, int time_step, "NumberOfComponents=\"3\"/>\n" ); fprintf( file, "\t\n" ); for ( int i = 0; i < size; ++i ) - fprintf( file, "\t\n", - basename.c_str(), time_step, i ); + fprintf( file, "\t\n", basename.c_str(), + time_step, i ); fprintf( file, "\n" ); fprintf( file, "\n" ); fclose( file ); From bb6526e26721effbd50764377f0378ee7b0171c2 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 2 Sep 2021 17:56:37 +0200 Subject: [PATCH 57/64] Use ExaMPM_ENABLE_LB to force lb on or off --- CMakeLists.txt | 3 +++ src/CMakeLists.txt | 4 ++++ src/ExaMPM_Solver.hpp | 12 ++++++------ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a0a242f..1e21d27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,7 @@ cmake_minimum_required(VERSION 3.12) project(ExaMPM LANGUAGES CXX VERSION 0.1.0) include(GNUInstallDirs) +include(CMakeDependentOption) # find dependencies set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) @@ -16,6 +17,8 @@ if( NOT Cabana_ENABLE_CAJITA ) endif() find_package(Silo REQUIRED) +cmake_dependent_option(ExaMPM_ENABLE_LB "Enable Cabana load balancing" ON Cabana_ENABLE_ALL OFF) + # find Clang Format find_package( CLANG_FORMAT 10 ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 42fced2..70c88e1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,10 @@ set(SOURCES add_library(exampm ${SOURCES}) +if(ExaMPM_ENABLE_LB) + target_compile_definitions(exampm PUBLIC ExaMPM_ENABLE_LB) +endif() + target_link_libraries(exampm Cabana::cabanacore Cabana::Cajita diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index 2f9a397..b30a6e9 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -19,7 +19,7 @@ #include #include -#ifdef Cabana_ENABLE_ALL +#ifdef ExaMPM_ENABLE_LB #include #endif @@ -74,7 +74,7 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); -#ifdef Cabana_ENABLE_ALL +#ifdef ExaMPM_ENABLE_LB _lb = std::make_shared>>( _comm, _mesh->globalGrid(), 3. * _mesh->cellSize() ); @@ -93,7 +93,7 @@ class Solver : public SolverBase std::string vtk_lb_domain_basename( "domain_lb" ); std::array vertices; -#ifdef Cabana_ENABLE_ALL +#ifdef ExaMPM_ENABLE_LB vertices = _lb->getVertices(); VTKDomainWriter::writeDomain( _comm, 0, vertices, vtk_actual_domain_basename ); @@ -113,7 +113,7 @@ class Solver : public SolverBase TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); -#ifdef Cabana_ENABLE_ALL +#ifdef ExaMPM_ENABLE_LB double work = _pm->numParticle(); auto global_grid = _lb->createBalancedGlobalGrid( _mesh->globalMesh(), *_partitioner, work ); @@ -131,7 +131,7 @@ class Solver : public SolverBase _pm->get( Location::Particle(), Field::Velocity() ), _pm->get( Location::Particle(), Field::J() ) ); -#ifdef Cabana_ENABLE_ALL +#ifdef ExaMPM_ENABLE_LB vertices = _lb->getVertices(); VTKDomainWriter::writeDomain( _comm, t, vertices, vtk_actual_domain_basename ); @@ -155,7 +155,7 @@ class Solver : public SolverBase int _rank; MPI_Comm _comm; std::shared_ptr _partitioner; -#ifdef Cabana_ENABLE_ALL +#ifdef ExaMPM_ENABLE_LB std::shared_ptr>> _lb; #endif }; From 034e5f5b40a7e203539715f08e2d1d4a27a42f76 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Wed, 8 Sep 2021 17:41:56 +0200 Subject: [PATCH 58/64] only balance every 10 steps --- src/ExaMPM_Solver.hpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index b30a6e9..df97e64 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -58,6 +58,7 @@ class Solver : public SolverBase , _gravity( gravity ) , _bc( bc ) , _halo_min( 3 ) + , _lb_step( 10 ) , _comm( comm ) , _partitioner( partitioner ) { @@ -114,11 +115,14 @@ class Solver : public SolverBase _bc ); #ifdef ExaMPM_ENABLE_LB - double work = _pm->numParticle(); - auto global_grid = _lb->createBalancedGlobalGrid( - _mesh->globalMesh(), *_partitioner, work ); - _mesh->newGlobalGrid( global_grid ); - _pm->updateMesh( _mesh ); + if ( _lb_step > 0 && 0 == t % _lb_step ) + { + double work = _pm->numParticle(); + auto global_grid = _lb->createBalancedGlobalGrid( + _mesh->globalMesh(), *_partitioner, work ); + _mesh->newGlobalGrid( global_grid ); + _pm->updateMesh( _mesh ); + } #endif _pm->communicateParticles( _halo_min ); @@ -152,6 +156,7 @@ class Solver : public SolverBase int _halo_min; std::shared_ptr> _mesh; std::shared_ptr> _pm; + int _lb_step; int _rank; MPI_Comm _comm; std::shared_ptr _partitioner; From fd35d8e4a6b907e2cd1ff23bfee24c661d553248 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Wed, 15 Sep 2021 15:09:34 +0200 Subject: [PATCH 59/64] add basic timing info --- src/ExaMPM_Solver.hpp | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index df97e64..d86796c 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -84,6 +84,14 @@ class Solver : public SolverBase void solve( const double t_final, const int write_freq ) override { + double output_time = 0; + double integrate_time = 0; + double lb_time = 0; + double comm_part_time = 0; + Kokkos::Timer timer, output_timer, integrate_timer, lb_timer, + comm_part_timer; + + output_timer.reset(); SiloParticleWriter::writeTimeStep( _mesh->localGrid()->globalGrid(), 0, 0.0, _pm->get( Location::Particle(), Field::Position() ), @@ -102,6 +110,7 @@ class Solver : public SolverBase VTKDomainWriter::writeDomain( _comm, 0, vertices, vtk_lb_domain_basename ); #endif + output_time += output_timer.seconds(); int num_step = t_final / _dt; double delta_t = t_final / num_step; @@ -111,9 +120,12 @@ class Solver : public SolverBase if ( 0 == _rank && 0 == t % write_freq ) printf( "Step %d / %d\n", t + 1, num_step ); + integrate_timer.reset(); TimeIntegrator::step( ExecutionSpace(), *_pm, delta_t, _gravity, _bc ); + integrate_time += integrate_timer.seconds(); + lb_timer.reset(); #ifdef ExaMPM_ENABLE_LB if ( _lb_step > 0 && 0 == t % _lb_step ) { @@ -124,11 +136,15 @@ class Solver : public SolverBase _pm->updateMesh( _mesh ); } #endif + lb_time += lb_timer.seconds(); + comm_part_timer.reset(); _pm->communicateParticles( _halo_min ); + comm_part_time += comm_part_timer.seconds(); if ( 0 == t % write_freq ) { + output_timer.reset(); SiloParticleWriter::writeTimeStep( _mesh->localGrid()->globalGrid(), t + 1, time, _pm->get( Location::Particle(), Field::Position() ), @@ -143,10 +159,37 @@ class Solver : public SolverBase VTKDomainWriter::writeDomain( _comm, t, vertices, vtk_lb_domain_basename ); #endif + output_time += output_timer.seconds(); } time += delta_t; } + double total_time = timer.seconds(); + double steps_per_sec = 1. * num_step / total_time; + int total_num_particle; + int num_particle = _pm->numParticle(); + MPI_Reduce( &num_particle, &total_num_particle, 1, MPI_INT, MPI_SUM, 0, + _comm ); + int comm_size; + MPI_Comm_size( _comm, &comm_size ); + if ( _rank == 0 ) + { + std::cout + << std::fixed << std::setprecision( 2 ) + << "\n#Procs Particles | Time T_Out T_lb T_Int T_Comm_part\n" + << comm_size << " " << total_num_particle << " | " << total_time + << " " << output_time << " " << lb_time << " " << integrate_time + << " " << comm_part_time << " | PERFORMANCE\n" + << comm_size << " " << total_num_particle << " | " + << "1.0" + << " " << output_time / total_time << " " + << lb_time / total_time << " " << integrate_time / total_time + << " " << comm_part_time / total_time << " | FRACTION\n\n" + << "#Steps/s Particlesteps/s Particlesteps/(proc*s)\n" + << std::scientific << steps_per_sec << " " + << steps_per_sec * total_num_particle << " " + << steps_per_sec * total_num_particle / comm_size << std::endl; + } } private: From 9babd7c9d270e07d23ebba6f3c3b20bd90b3b41d Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Tue, 21 Sep 2021 11:16:49 +0200 Subject: [PATCH 60/64] allow run time selection of lb frequency --- examples/dam_break.cpp | 10 +++++++--- examples/free_fall.cpp | 10 +++++++--- src/ExaMPM_ProblemManager.hpp | 3 +++ src/ExaMPM_Solver.hpp | 12 +++++++++--- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/examples/dam_break.cpp b/examples/dam_break.cpp index d7ff00e..47c4370 100644 --- a/examples/dam_break.cpp +++ b/examples/dam_break.cpp @@ -66,7 +66,7 @@ struct ParticleInitFunc //---------------------------------------------------------------------------// void damBreak( const double cell_size, const int ppc, const int halo_size, const double delta_t, const double t_final, const int write_freq, - const std::string& device ) + const std::string& device, const int lb_freq ) { // The dam break domain is in a box on [0,1] in each dimension. Kokkos::Array global_box = { 0.0, 0.0, 0.0, 1.0, 1.0, 1.0 }; @@ -113,7 +113,7 @@ void damBreak( const double cell_size, const int ppc, const int halo_size, device, MPI_COMM_WORLD, global_box, global_num_cell, periodic, partitioner, halo_size, ParticleInitFunc( cell_size, ppc, density ), ppc, bulk_modulus, density, gamma, kappa, delta_t, gravity, bc ); - solver->solve( t_final, write_freq ); + solver->solve( t_final, write_freq, lb_freq ); } //---------------------------------------------------------------------------// @@ -144,8 +144,12 @@ int main( int argc, char* argv[] ) // device type std::string device( argv[7] ); + // optional lb frequency + int lb_freq = argc > 8 ? std::atoi( argv[8] ) : 10; + // run the problem. - damBreak( cell_size, ppc, halo_size, delta_t, t_final, write_freq, device ); + damBreak( cell_size, ppc, halo_size, delta_t, t_final, write_freq, device, + lb_freq ); Kokkos::finalize(); diff --git a/examples/free_fall.cpp b/examples/free_fall.cpp index 4b51889..9bb24f9 100644 --- a/examples/free_fall.cpp +++ b/examples/free_fall.cpp @@ -65,7 +65,7 @@ struct ParticleInitFunc //---------------------------------------------------------------------------// void freeFall( const double cell_size, const int ppc, const int halo_size, const double delta_t, const double t_final, const int write_freq, - const std::string& device ) + const std::string& device, const int lb_freq ) { // The dam break domain is in a box on [0,1] in each dimension. Kokkos::Array global_box = { -0.5, -0.5, -0.5, 0.5, 0.5, 0.5 }; @@ -112,7 +112,7 @@ void freeFall( const double cell_size, const int ppc, const int halo_size, device, MPI_COMM_WORLD, global_box, global_num_cell, periodic, partitioner, halo_size, ParticleInitFunc( cell_size, ppc, density ), ppc, bulk_modulus, density, gamma, kappa, delta_t, gravity, bc ); - solver->solve( t_final, write_freq ); + solver->solve( t_final, write_freq, lb_freq ); } //---------------------------------------------------------------------------// @@ -143,8 +143,12 @@ int main( int argc, char* argv[] ) // device type std::string device( argv[7] ); + // optional lb frequency + int lb_freq = argc > 8 ? std::atoi( argv[8] ) : 10; + // run the problem. - freeFall( cell_size, ppc, halo_size, delta_t, t_final, write_freq, device ); + freeFall( cell_size, ppc, halo_size, delta_t, t_final, write_freq, device, + lb_freq ); Kokkos::finalize(); diff --git a/src/ExaMPM_ProblemManager.hpp b/src/ExaMPM_ProblemManager.hpp index 38ece11..1fd21a1 100644 --- a/src/ExaMPM_ProblemManager.hpp +++ b/src/ExaMPM_ProblemManager.hpp @@ -293,6 +293,9 @@ class ProblemManager void scatter( Location::Cell, Field::Density ) const { + printf( "%s: _density extent %lu %lu %lu\n", __func__, + _density->view().extent( 0 ), _density->view().extent( 1 ), + _density->view().extent( 2 ) ); _cell_scalar_halo->scatter( execution_space(), Cajita::ScatterReduce::Sum(), *_density ); } diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index d86796c..d7f9aff 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -37,6 +37,8 @@ class SolverBase public: virtual ~SolverBase() = default; virtual void solve( const double t_final, const int write_freq ) = 0; + virtual void solve( const double t_final, const int write_freq, + const int lb_freq ) = 0; }; //---------------------------------------------------------------------------// @@ -58,7 +60,6 @@ class Solver : public SolverBase , _gravity( gravity ) , _bc( bc ) , _halo_min( 3 ) - , _lb_step( 10 ) , _comm( comm ) , _partitioner( partitioner ) { @@ -83,6 +84,12 @@ class Solver : public SolverBase } void solve( const double t_final, const int write_freq ) override + { + solve( t_final, write_freq, 10 ); + } + + void solve( const double t_final, const int write_freq, + const int lb_freq ) override { double output_time = 0; double integrate_time = 0; @@ -127,7 +134,7 @@ class Solver : public SolverBase lb_timer.reset(); #ifdef ExaMPM_ENABLE_LB - if ( _lb_step > 0 && 0 == t % _lb_step ) + if ( lb_freq > 0 && 0 == t % lb_freq ) { double work = _pm->numParticle(); auto global_grid = _lb->createBalancedGlobalGrid( @@ -199,7 +206,6 @@ class Solver : public SolverBase int _halo_min; std::shared_ptr> _mesh; std::shared_ptr> _pm; - int _lb_step; int _rank; MPI_Comm _comm; std::shared_ptr _partitioner; From 75bc0bf11935a4e1c05644a42b3e157f1bc4d0c5 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Wed, 22 Sep 2021 17:27:12 +0200 Subject: [PATCH 61/64] add continuous perf output --- src/ExaMPM_Solver.hpp | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index d7f9aff..c5ab6c2 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -96,7 +96,7 @@ class Solver : public SolverBase double lb_time = 0; double comm_part_time = 0; Kokkos::Timer timer, output_timer, integrate_timer, lb_timer, - comm_part_timer; + comm_part_timer, step_timer; output_timer.reset(); SiloParticleWriter::writeTimeStep( @@ -119,6 +119,14 @@ class Solver : public SolverBase #endif output_time += output_timer.seconds(); + int total_num_particle; + int num_particle = _pm->numParticle(); + MPI_Reduce( &num_particle, &total_num_particle, 1, MPI_INT, MPI_SUM, 0, + _comm ); + int comm_size; + MPI_Comm_size( _comm, &comm_size ); + step_timer.reset(); + int num_step = t_final / _dt; double delta_t = t_final / num_step; double time = 0.0; @@ -165,20 +173,29 @@ class Solver : public SolverBase vertices = _lb->getInternalVertices(); VTKDomainWriter::writeDomain( _comm, t, vertices, vtk_lb_domain_basename ); -#endif + double quality = _lb->getQuality(); output_time += output_timer.seconds(); + output_timer.reset(); + if ( _rank == 0 ) + { + double step_time = step_timer.seconds(); + std::cout + << std::fixed << std::setprecision( 5 ) << t << " " + << quality << " " << write_freq / step_time << " " + << write_freq / step_time * total_num_particle << " " + << write_freq / step_time * total_num_particle / + comm_size + << std::endl; + } + step_timer.reset(); + output_time += output_timer.seconds(); +#endif } time += delta_t; } double total_time = timer.seconds(); double steps_per_sec = 1. * num_step / total_time; - int total_num_particle; - int num_particle = _pm->numParticle(); - MPI_Reduce( &num_particle, &total_num_particle, 1, MPI_INT, MPI_SUM, 0, - _comm ); - int comm_size; - MPI_Comm_size( _comm, &comm_size ); if ( _rank == 0 ) { std::cout From a9d94339fa0b7b121948deec891eb1f48af46303 Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 7 Oct 2021 09:44:27 +0200 Subject: [PATCH 62/64] update for Quality->Imbalance rename --- src/ExaMPM_Solver.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index c5ab6c2..ac47bc5 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -173,7 +173,7 @@ class Solver : public SolverBase vertices = _lb->getInternalVertices(); VTKDomainWriter::writeDomain( _comm, t, vertices, vtk_lb_domain_basename ); - double quality = _lb->getQuality(); + double imbalance = _lb->getImbalance(); output_time += output_timer.seconds(); output_timer.reset(); if ( _rank == 0 ) @@ -181,7 +181,7 @@ class Solver : public SolverBase double step_time = step_timer.seconds(); std::cout << std::fixed << std::setprecision( 5 ) << t << " " - << quality << " " << write_freq / step_time << " " + << imbalance << " " << write_freq / step_time << " " << write_freq / step_time * total_num_particle << " " << write_freq / step_time * total_num_particle / comm_size From 69bebc6d3f11d7e90c7cc50b1d06a0bdd8474f7d Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 21 Oct 2021 13:12:53 +0200 Subject: [PATCH 63/64] move LoadBalancer into Experimental namespace --- src/ExaMPM_Solver.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ExaMPM_Solver.hpp b/src/ExaMPM_Solver.hpp index ac47bc5..7bd0b02 100644 --- a/src/ExaMPM_Solver.hpp +++ b/src/ExaMPM_Solver.hpp @@ -77,9 +77,9 @@ class Solver : public SolverBase MPI_Comm_rank( comm, &_rank ); #ifdef ExaMPM_ENABLE_LB - _lb = - std::make_shared>>( - _comm, _mesh->globalGrid(), 3. * _mesh->cellSize() ); + _lb = std::make_shared< + Cajita::Experimental::LoadBalancer>>( + _comm, _mesh->globalGrid(), 3. * _mesh->cellSize() ); #endif } @@ -227,7 +227,9 @@ class Solver : public SolverBase MPI_Comm _comm; std::shared_ptr _partitioner; #ifdef ExaMPM_ENABLE_LB - std::shared_ptr>> _lb; + std::shared_ptr< + Cajita::Experimental::LoadBalancer>> + _lb; #endif }; From f38049439ed037ad381b23eb5da911adc825f7dc Mon Sep 17 00:00:00 2001 From: Stephan Schulz Date: Thu, 18 Nov 2021 11:41:10 +0100 Subject: [PATCH 64/64] remove some redundancy --- src/ExaMPM_ProblemManager.hpp | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/src/ExaMPM_ProblemManager.hpp b/src/ExaMPM_ProblemManager.hpp index 1fd21a1..2d7b1f1 100644 --- a/src/ExaMPM_ProblemManager.hpp +++ b/src/ExaMPM_ProblemManager.hpp @@ -116,36 +116,7 @@ class ProblemManager { initializeParticles( exec_space, *( _mesh->localGrid() ), particles_per_cell, create_functor, _particles ); - - auto node_vector_layout = - Cajita::createArrayLayout( _mesh->localGrid(), 3, Cajita::Node() ); - auto node_scalar_layout = - Cajita::createArrayLayout( _mesh->localGrid(), 1, Cajita::Node() ); - auto cell_scalar_layout = - Cajita::createArrayLayout( _mesh->localGrid(), 1, Cajita::Cell() ); - - _momentum = Cajita::createArray( - "momentum", node_vector_layout ); - _mass = Cajita::createArray( "mass", - node_scalar_layout ); - _force = Cajita::createArray( "force", - node_vector_layout ); - _velocity = Cajita::createArray( - "velocity", node_vector_layout ); - _position_correction = Cajita::createArray( - "position_correction", node_vector_layout ); - _density = Cajita::createArray( - "density", cell_scalar_layout ); - - _mark = Cajita::createArray( "mark", - cell_scalar_layout ); - - _node_vector_halo = Cajita::createHalo( - *node_vector_layout, Cajita::FullHaloPattern() ); - _node_scalar_halo = Cajita::createHalo( - *node_scalar_layout, Cajita::FullHaloPattern() ); - _cell_scalar_halo = Cajita::createHalo( - *cell_scalar_layout, Cajita::FullHaloPattern() ); + updateMesh( mesh ); } void updateMesh( const std::shared_ptr& mesh )