From 8f8f64a5a77f9fb84784250357450a4180e30fa9 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Sat, 6 Aug 2022 13:22:51 +0100 Subject: [PATCH 01/51] add minimal pair_mace (only the required pure virtual functions). --- src/ML-MACE/pair_mace.cpp | 51 +++++++++++++++++++++++++++++++++++++++ src/ML-MACE/pair_mace.h | 48 ++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 src/ML-MACE/pair_mace.cpp create mode 100644 src/ML-MACE/pair_mace.h diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp new file mode 100644 index 00000000000..a494d6dff28 --- /dev/null +++ b/src/ML-MACE/pair_mace.cpp @@ -0,0 +1,51 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributors + William C Witt (University of Cambridge) +------------------------------------------------------------------------- */ + +#include "pair_mace.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairMACE::PairMACE(LAMMPS *lmp) : Pair(lmp) +{ +} + +/* ---------------------------------------------------------------------- */ + +PairMACE::~PairMACE() +{ +} + +/* ---------------------------------------------------------------------- */ + +void PairMACE::compute(int eflag, int vflag) +{ +} + +/* ---------------------------------------------------------------------- */ + +void PairMACE::settings(int narg, char **arg) +{ +} + +/* ---------------------------------------------------------------------- */ + +void PairMACE::coeff(int narg, char **arg) +{ +} diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h new file mode 100644 index 00000000000..a0759ca7621 --- /dev/null +++ b/src/ML-MACE/pair_mace.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributors + William C Witt (University of Cambridge) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(mace,PairMACE); +// clang-format on +#else + +#ifndef LMP_PAIR_MACE_H +#define LMP_PAIR_MACE_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairMACE : public Pair { + + public: + + PairMACE(class LAMMPS *); + ~PairMACE() override; + void compute(int, int) override; + void settings(int, char **) override; + void coeff(int, char **) override; + + protected: + +}; +} // namespace LAMMPS_NS + +#endif +#endif From 5875569738767268eebed13194c80bb35e0a0ec2 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Sat, 6 Aug 2022 13:25:33 +0100 Subject: [PATCH 02/51] enable cmake build with -DPKG_ML-MACE=ON. --- cmake/CMakeLists.txt | 3 ++- cmake/Modules/Packages/ML-MACE.cmake | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 cmake/Modules/Packages/ML-MACE.cmake diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index d4dcdcff88a..37b156b4403 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -255,6 +255,7 @@ set(STANDARD_PACKAGES MISC ML-HDNNP ML-IAP + ML-MACE ML-PACE ML-QUIP ML-RANN @@ -499,7 +500,7 @@ else() endif() foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF - PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM LATTE MSCG COMPRESS ML-PACE) + PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM LATTE MSCG COMPRESS ML-PACE ML-MACE) if(PKG_${PKG_WITH_INCL}) include(Packages/${PKG_WITH_INCL}) endif() diff --git a/cmake/Modules/Packages/ML-MACE.cmake b/cmake/Modules/Packages/ML-MACE.cmake new file mode 100644 index 00000000000..c2d5253ac5b --- /dev/null +++ b/cmake/Modules/Packages/ML-MACE.cmake @@ -0,0 +1 @@ +message("Hello from ML-MACE.cmake.") From 4607e225df5456fc2686407c8dd5140816ac4c2f Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Sat, 20 Aug 2022 17:57:01 +0100 Subject: [PATCH 03/51] add basic compute functionality to pair_mace. --- cmake/Modules/Packages/ML-MACE.cmake | 9 ++ src/ML-MACE/pair_mace.cpp | 157 +++++++++++++++++++++++++++ src/ML-MACE/pair_mace.h | 7 ++ 3 files changed, 173 insertions(+) diff --git a/cmake/Modules/Packages/ML-MACE.cmake b/cmake/Modules/Packages/ML-MACE.cmake index c2d5253ac5b..984298c75fd 100644 --- a/cmake/Modules/Packages/ML-MACE.cmake +++ b/cmake/Modules/Packages/ML-MACE.cmake @@ -1 +1,10 @@ +cmake_minimum_required(VERSION 3.0 FATAL_ERROR) + message("Hello from ML-MACE.cmake.") + +find_package(Torch REQUIRED) + +#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") +#target_include_directories(lammps PRIVATE "${TORCH_INCLUDE_DIRS}") +target_link_libraries(lammps PRIVATE "${TORCH_LIBRARIES}") +set_property(TARGET lammps PROPERTY CXX_STANDARD 14) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index a494d6dff28..33b9d8232a3 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -18,34 +18,191 @@ #include "pair_mace.h" +#include "atom.h" +#include "memory.h" +#include "neigh_list.h" +#include "neighbor.h" + +#include + using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairMACE::PairMACE(LAMMPS *lmp) : Pair(lmp) { + std::cout << "Hello from MACE constructor." << std::endl; + std::cout << "Goodbye from MACE constructor." << std::endl; } /* ---------------------------------------------------------------------- */ PairMACE::~PairMACE() { + std::cout << "Hello from MACE destructor." << std::endl; + std::cout << "Goodbye from MACE destructor." << std::endl; } /* ---------------------------------------------------------------------- */ void PairMACE::compute(int eflag, int vflag) { + std::cout << "Hello from MACE compute." << std::endl; + + ev_init(eflag, vflag); + + // ----- positions ----- + int n_nodes = atom->nlocal + atom->nghost; + auto positions = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); + for (int ii = 0; ii < n_nodes; ii++) + for (int jj = 0; jj < 3; jj++) + positions[ii][jj] = atom->x[ii][jj]; + + // ----- cell ----- + auto cell = torch::zeros({3,3}, torch::dtype(torch::kFloat64)); + for (int ii = 0; ii < 3; ii++) { + cell[ii][ii] = 50.0; + } + + // ----- edge_index ----- + int n_edges = 0; + for (int ii = 0; ii < list->inum; ii++) + n_edges += list->numneigh[list->ilist[ii]]; + auto edge_index = torch::empty({2,n_edges}, torch::dtype(torch::kInt64)); + int k = 0; + for (int ii = 0; ii < list->inum; ii++) { + int i = list->ilist[ii]; + int *jlist = list->firstneigh[i]; + int jnum = list->numneigh[i]; + for (int jj = 0; jj < jnum; jj++) { + edge_index[0][k] = i; + edge_index[1][k] = jlist[jj]; + //edge_index[1,k] = (jlist[jj] & NEIGHMASK) + 1; + k++; + } + } + + // node_attrs involves atomic numbers + int n_node_feats = atom->ntypes; + auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch::dtype(torch::kFloat64)); + // TODO: generalize this + for (int ii = 0; ii < list->inum; ii++) { + if (atom->type[ii] == 1) { + node_attrs[ii][1] = 1.0; + } else if (atom->type[ii]==2) { + node_attrs[ii][0] = 1.0; + } + } + + // TODO: consider from_blob to avoid copy + auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); + auto energy = torch::empty({1}, torch::dtype(torch::kFloat64)); + auto forces = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); + auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); //? size + auto shifts = torch::zeros({n_edges,3}, torch::dtype(torch::kFloat64)); //zeros instead of empty + auto weight = torch::empty({1}, torch::dtype(torch::kFloat64)); + ptr[0] = 0; + ptr[1] = 3; + weight[0] = 1.0; + + c10::Dict input; + input.insert("batch", batch); + //std::cout << "batch" << std::endl; + //std::cout << batch << std::endl; + //input.insert("cell", cell); + //std::cout << "cell" << std::endl; + //std::cout << cell << std::endl; + input.insert("edge_index", edge_index); + //std::cout << "edge_index" << std::endl; + //std::cout << edge_index << std::endl; + input.insert("energy", energy); + //std::cout << "energy" << std::endl; + //std::cout << energy << std::endl; + input.insert("forces", forces); + //std::cout << "forces" << std::endl; + //std::cout << forces << std::endl; + input.insert("node_attrs", node_attrs); + //std::cout << "node_attrs" << std::endl; + //std::cout << node_attrs << std::endl; + input.insert("positions", positions); + //std::cout << "positions" << std::endl; + //std::cout << positions << std::endl; + input.insert("ptr", ptr); + //std::cout << "ptr" << std::endl; + //std::cout << ptr << std::endl; + input.insert("shifts", shifts); + //std::cout << "shifts" << std::endl; + //std::cout << shifts << std::endl; + input.insert("weight", weight); + //std::cout << "weight" << std::endl; + //std::cout << weight << std::endl; + + std::cout << "evaluating model" << std::endl; + auto output = model.forward({input, true}).toGenericDict(); + energy = output.at("energy").toTensor(); + auto contributions = output.at("contributions").toTensor(); + forces = output.at("forces").toTensor(); + std::cout << energy << std::endl; + std::cout << contributions << std::endl; + std::cout << forces << std::endl; + + std::cout << "Goodbye from MACE compute." << std::endl; } /* ---------------------------------------------------------------------- */ void PairMACE::settings(int narg, char **arg) { + std::cout << "Hello from MACE settings." << std::endl; + std::cout << "Goodbye from MACE settings." << std::endl; } /* ---------------------------------------------------------------------- */ void PairMACE::coeff(int narg, char **arg) { + std::cout << "Hello from MACE coeff." << std::endl; + + if (!allocated) allocate(); + + std::cout << "Loading MACE model from \"" << arg[2] << "\" ..."; + model = torch::jit::load(arg[2]); + std::cout << " finished." << std::endl; + + for (int i=1; intypes+1; i++) + for (int j=i; jntypes+1; j++) + setflag[i][j] = 1; + + std::cout << "Goodbye from MACE coeff." << std::endl; +} + +void PairMACE::init_style() +{ + std::cout << "Hello from MACE init_coef." << std::endl; + // require full neighbor list + neighbor->add_request(this, NeighConst::REQ_FULL); + std::cout << "Goodbye from MACE init_coef." << std::endl; +} + +double PairMACE::init_one(int i, int j) +{ + // TODO: address neighbor list skin distance (2A) differently + return model.attr("r_max").toDouble() - 2.0; +} + +void PairMACE::allocate() +{ + std::cout << "Hello from MACE allocate." << std::endl; + + allocated = 1; + + memory->create(setflag, atom->ntypes+1, atom->ntypes+1, "pair:setflag"); + for (int i=1; intypes+1; i++) + for (int j=i; jntypes+1; j++) + setflag[i][j] = 0; + + memory->create(cutsq, atom->ntypes+1, atom->ntypes+1, "pair:cutsq"); + std::cout << "WARNING: may need to overload init_one, which sets cutsq." << std::endl; + + std::cout << "Goodbye from MACE allocate." << std::endl; } diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h index a0759ca7621..c57f76a5dd0 100644 --- a/src/ML-MACE/pair_mace.h +++ b/src/ML-MACE/pair_mace.h @@ -27,6 +27,8 @@ PairStyle(mace,PairMACE); #include "pair.h" +#include + namespace LAMMPS_NS { class PairMACE : public Pair { @@ -38,9 +40,14 @@ class PairMACE : public Pair { void compute(int, int) override; void settings(int, char **) override; void coeff(int, char **) override; + void init_style() override; + double init_one(int, int) override; + void allocate(); protected: + torch::jit::script::Module model; + }; } // namespace LAMMPS_NS From 9d8991e1958214973923ce21c349ded8f5155906 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 4 Oct 2022 12:04:48 +0100 Subject: [PATCH 04/51] various things. --- src/ML-MACE/pair_mace.cpp | 58 ++++++++++++++++++++++++++++++++------- src/ML-MACE/pair_mace.h | 6 ++++ 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 33b9d8232a3..46b9231d53c 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -23,6 +23,7 @@ #include "neigh_list.h" #include "neighbor.h" +#include #include using namespace LAMMPS_NS; @@ -82,34 +83,48 @@ void PairMACE::compute(int eflag, int vflag) } } + std::cout << "mace atomic numbers size" << mace_atomic_numbers.size() << std::endl; + std::cout << "mace atomic numbers " << mace_atomic_numbers << std::endl; + auto get_mace_type = [this](int lammps_type) { + for (int i=0; intypes; + int n_node_feats = mace_atomic_numbers.size(); auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch::dtype(torch::kFloat64)); // TODO: generalize this for (int ii = 0; ii < list->inum; ii++) { - if (atom->type[ii] == 1) { - node_attrs[ii][1] = 1.0; - } else if (atom->type[ii]==2) { - node_attrs[ii][0] = 1.0; - } + + // map lammps type to mace type + int mace_type = get_mace_type(atom->type[ii]); + std::cout << "mace_type " << mace_type << std::endl; + node_attrs[ii][mace_type-1] = 1.0; } // TODO: consider from_blob to avoid copy auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); auto energy = torch::empty({1}, torch::dtype(torch::kFloat64)); auto forces = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); - auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); //? size + auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); auto shifts = torch::zeros({n_edges,3}, torch::dtype(torch::kFloat64)); //zeros instead of empty + auto unit_shifts = torch::zeros({n_edges,3}, torch::dtype(torch::kFloat64)); //zeros instead of empty auto weight = torch::empty({1}, torch::dtype(torch::kFloat64)); - ptr[0] = 0; - ptr[1] = 3; + ptr[0] = 0; // always zero + ptr[1] = 3; // always n_atoms weight[0] = 1.0; c10::Dict input; input.insert("batch", batch); //std::cout << "batch" << std::endl; //std::cout << batch << std::endl; - //input.insert("cell", cell); + input.insert("cell", cell); //std::cout << "cell" << std::endl; //std::cout << cell << std::endl; input.insert("edge_index", edge_index); @@ -133,6 +148,9 @@ void PairMACE::compute(int eflag, int vflag) input.insert("shifts", shifts); //std::cout << "shifts" << std::endl; //std::cout << shifts << std::endl; + input.insert("unit_shifts", unit_shifts); + //std::cout << "unit_shifts" << std::endl; + //std::cout << unit_shifts << std::endl; input.insert("weight", weight); //std::cout << "weight" << std::endl; //std::cout << weight << std::endl; @@ -169,6 +187,26 @@ void PairMACE::coeff(int narg, char **arg) model = torch::jit::load(arg[2]); std::cout << " finished." << std::endl; + std::cout << "attributes" << std::endl; + for (const auto& pair : model.named_attributes()) { + //std::cout << pair.name << ": " << pair.value << std::endl; + std::cout << pair.name << std::endl; + } + + r_max = model.attr("r_max").toDouble(); + std::cout << " - The r_max is: " << r_max << "." << std::endl; + std::cout << " xxxx atomic numbers " << model.attr("atomic_numbers") << std::endl; + mace_atomic_numbers = model.attr("atomic_numbers").toIntVector(); + std::cout << " - The model atomic numbers are: " << mace_atomic_numbers << "." << std::endl; + //std::cout << " - The atomic numbers are: " << model.attr("atomic_numbers") << "." << std::endl; + + for (int i=3; intypes+1; i++) for (int j=i; jntypes+1; j++) setflag[i][j] = 1; diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h index c57f76a5dd0..94de254c57e 100644 --- a/src/ML-MACE/pair_mace.h +++ b/src/ML-MACE/pair_mace.h @@ -47,6 +47,12 @@ class PairMACE : public Pair { protected: torch::jit::script::Module model; + double r_max; + std::vector mace_atomic_numbers; + std::vector lammps_atomic_numbers; + const std::array periodic_table = + {"H", "He", + "Li", "Be", "B", "C", "N", "O", "F", "Ne"}; }; } // namespace LAMMPS_NS From be70511b445ac5372fd1fce9c43e5fc7e501f41b Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 4 Oct 2022 13:14:13 +0100 Subject: [PATCH 05/51] save global energy. --- src/ML-MACE/pair_mace.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 46b9231d53c..dfc6284e225 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -164,6 +164,7 @@ void PairMACE::compute(int eflag, int vflag) std::cout << contributions << std::endl; std::cout << forces << std::endl; + eng_vdwl = output.at("energy").toTensor()[0].item(); std::cout << "Goodbye from MACE compute." << std::endl; } From b9207fa240de55ba14aa2acd671526e61f7c1eb1 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 11 Oct 2022 17:42:27 +0100 Subject: [PATCH 06/51] more progress, including reading site energies. --- src/ML-MACE/pair_mace.cpp | 52 ++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index dfc6284e225..9770a7177a6 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -52,13 +52,17 @@ void PairMACE::compute(int eflag, int vflag) ev_init(eflag, vflag); + std::cout << "nlocal: " << atom->nlocal << std::endl; + std::cout << "nghost: " << atom->nghost << std::endl; + // ----- positions ----- int n_nodes = atom->nlocal + atom->nghost; auto positions = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); - for (int ii = 0; ii < n_nodes; ii++) - for (int jj = 0; jj < 3; jj++) + for (int ii = 0; ii < n_nodes; ii++) { + for (int jj = 0; jj < 3; jj++) { positions[ii][jj] = atom->x[ii][jj]; - + } + } // ----- cell ----- auto cell = torch::zeros({3,3}, torch::dtype(torch::kFloat64)); for (int ii = 0; ii < 3; ii++) { @@ -83,11 +87,8 @@ void PairMACE::compute(int eflag, int vflag) } } - std::cout << "mace atomic numbers size" << mace_atomic_numbers.size() << std::endl; - std::cout << "mace atomic numbers " << mace_atomic_numbers << std::endl; auto get_mace_type = [this](int lammps_type) { for (int i=0; i input; @@ -156,13 +157,17 @@ void PairMACE::compute(int eflag, int vflag) //std::cout << weight << std::endl; std::cout << "evaluating model" << std::endl; - auto output = model.forward({input, true}).toGenericDict(); + // when should stress be printed? + auto output = model.forward({input, false, true, false, false}).toGenericDict(); + std::cout << "energy" << std::endl; energy = output.at("energy").toTensor(); - auto contributions = output.at("contributions").toTensor(); - forces = output.at("forces").toTensor(); std::cout << energy << std::endl; - std::cout << contributions << std::endl; + std::cout << "forces" << std::endl; + forces = output.at("forces").toTensor(); std::cout << forces << std::endl; + std::cout << "site energies" << std::endl; + auto site_energies = output.at("node_energy").toTensor(); + std::cout << site_energies << std::endl; eng_vdwl = output.at("energy").toTensor()[0].item(); std::cout << "Goodbye from MACE compute." << std::endl; @@ -188,19 +193,22 @@ void PairMACE::coeff(int narg, char **arg) model = torch::jit::load(arg[2]); std::cout << " finished." << std::endl; - std::cout << "attributes" << std::endl; - for (const auto& pair : model.named_attributes()) { - //std::cout << pair.name << ": " << pair.value << std::endl; - std::cout << pair.name << std::endl; - } +// std::cout << "attributes" << std::endl; +// for (const auto& pair : model.named_attributes()) { +// //std::cout << pair.name << ": " << pair.value << std::endl; +// std::cout << pair.name << std::endl; +// } - r_max = model.attr("r_max").toDouble(); + r_max = model.attr("r_max").toTensor().item(); std::cout << " - The r_max is: " << r_max << "." << std::endl; - std::cout << " xxxx atomic numbers " << model.attr("atomic_numbers") << std::endl; - mace_atomic_numbers = model.attr("atomic_numbers").toIntVector(); - std::cout << " - The model atomic numbers are: " << mace_atomic_numbers << "." << std::endl; - //std::cout << " - The atomic numbers are: " << model.attr("atomic_numbers") << "." << std::endl; + // extract atomic numbers from mace model + auto a_n = model.attr("atomic_numbers").toTensor(); + for (int i=0; i()); + } + std::cout << " - The model atomic numbers are: " << mace_atomic_numbers << "." << std::endl; + // extract atomic numbers from pair_coeff for (int i=3; i() - 2.0; } void PairMACE::allocate() From 637f1769a4d6c54c2e4a5f4241e403e603572c5b Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 11 Oct 2022 18:37:49 +0100 Subject: [PATCH 07/51] tidy. --- src/ML-MACE/pair_mace.cpp | 83 ++++++--------------------------------- 1 file changed, 13 insertions(+), 70 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 9770a7177a6..07c29ab700f 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -32,29 +32,20 @@ using namespace LAMMPS_NS; PairMACE::PairMACE(LAMMPS *lmp) : Pair(lmp) { - std::cout << "Hello from MACE constructor." << std::endl; - std::cout << "Goodbye from MACE constructor." << std::endl; } /* ---------------------------------------------------------------------- */ PairMACE::~PairMACE() { - std::cout << "Hello from MACE destructor." << std::endl; - std::cout << "Goodbye from MACE destructor." << std::endl; } /* ---------------------------------------------------------------------- */ void PairMACE::compute(int eflag, int vflag) { - std::cout << "Hello from MACE compute." << std::endl; - ev_init(eflag, vflag); - std::cout << "nlocal: " << atom->nlocal << std::endl; - std::cout << "nghost: " << atom->nghost << std::endl; - // ----- positions ----- int n_nodes = atom->nlocal + atom->nghost; auto positions = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); @@ -87,7 +78,7 @@ void PairMACE::compute(int eflag, int vflag) } } - auto get_mace_type = [this](int lammps_type) { + auto mace_type = [this](int lammps_type) { for (int i=0; iinum; ii++) { - // map lammps type to mace type - int mace_type = get_mace_type(atom->type[ii]); - std::cout << "mace_type " << mace_type << std::endl; - node_attrs[ii][mace_type-1] = 1.0; + node_attrs[ii][mace_type(atom->type[ii])-1] = 1.0; } // TODO: consider from_blob to avoid copy @@ -114,91 +102,53 @@ void PairMACE::compute(int eflag, int vflag) auto energy = torch::empty({1}, torch::dtype(torch::kFloat64)); auto forces = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); - auto shifts = torch::zeros({n_edges,3}, torch::dtype(torch::kFloat64)); //zeros instead of empty - auto unit_shifts = torch::zeros({n_edges,3}, torch::dtype(torch::kFloat64)); //zeros instead of empty + auto shifts = torch::zeros({n_edges,3}, torch::dtype(torch::kFloat64)); + auto unit_shifts = torch::zeros({n_edges,3}, torch::dtype(torch::kFloat64)); auto weight = torch::empty({1}, torch::dtype(torch::kFloat64)); - ptr[0] = 0; // always zero - ptr[1] = n_nodes; // always n_atoms + ptr[0] = 0; + ptr[1] = n_nodes; weight[0] = 1.0; + // pack the input, call the model, extract the output c10::Dict input; input.insert("batch", batch); - //std::cout << "batch" << std::endl; - //std::cout << batch << std::endl; input.insert("cell", cell); - //std::cout << "cell" << std::endl; - //std::cout << cell << std::endl; input.insert("edge_index", edge_index); - //std::cout << "edge_index" << std::endl; - //std::cout << edge_index << std::endl; input.insert("energy", energy); - //std::cout << "energy" << std::endl; - //std::cout << energy << std::endl; input.insert("forces", forces); - //std::cout << "forces" << std::endl; - //std::cout << forces << std::endl; input.insert("node_attrs", node_attrs); - //std::cout << "node_attrs" << std::endl; - //std::cout << node_attrs << std::endl; input.insert("positions", positions); - //std::cout << "positions" << std::endl; - //std::cout << positions << std::endl; input.insert("ptr", ptr); - //std::cout << "ptr" << std::endl; - //std::cout << ptr << std::endl; input.insert("shifts", shifts); - //std::cout << "shifts" << std::endl; - //std::cout << shifts << std::endl; input.insert("unit_shifts", unit_shifts); - //std::cout << "unit_shifts" << std::endl; - //std::cout << unit_shifts << std::endl; input.insert("weight", weight); - //std::cout << "weight" << std::endl; - //std::cout << weight << std::endl; - - std::cout << "evaluating model" << std::endl; - // when should stress be printed? + // TODO: when should stress be printed? auto output = model.forward({input, false, true, false, false}).toGenericDict(); - std::cout << "energy" << std::endl; - energy = output.at("energy").toTensor(); - std::cout << energy << std::endl; - std::cout << "forces" << std::endl; - forces = output.at("forces").toTensor(); - std::cout << forces << std::endl; - std::cout << "site energies" << std::endl; + // TODO: remove this unused energy call + //energy = output.at("energy").toTensor(); auto site_energies = output.at("node_energy").toTensor(); - std::cout << site_energies << std::endl; + forces = output.at("forces").toTensor(); + // post-process eng_vdwl = output.at("energy").toTensor()[0].item(); - std::cout << "Goodbye from MACE compute." << std::endl; } /* ---------------------------------------------------------------------- */ void PairMACE::settings(int narg, char **arg) { - std::cout << "Hello from MACE settings." << std::endl; - std::cout << "Goodbye from MACE settings." << std::endl; } /* ---------------------------------------------------------------------- */ void PairMACE::coeff(int narg, char **arg) { - std::cout << "Hello from MACE coeff." << std::endl; - if (!allocated) allocate(); std::cout << "Loading MACE model from \"" << arg[2] << "\" ..."; model = torch::jit::load(arg[2]); std::cout << " finished." << std::endl; -// std::cout << "attributes" << std::endl; -// for (const auto& pair : model.named_attributes()) { -// //std::cout << pair.name << ": " << pair.value << std::endl; -// std::cout << pair.name << std::endl; -// } - r_max = model.attr("r_max").toTensor().item(); std::cout << " - The r_max is: " << r_max << "." << std::endl; @@ -208,6 +158,7 @@ void PairMACE::coeff(int narg, char **arg) mace_atomic_numbers.push_back(a_n[i].item()); } std::cout << " - The model atomic numbers are: " << mace_atomic_numbers << "." << std::endl; + // extract atomic numbers from pair_coeff for (int i=3; intypes+1; i++) for (int j=i; jntypes+1; j++) setflag[i][j] = 1; - - std::cout << "Goodbye from MACE coeff." << std::endl; } void PairMACE::init_style() { - std::cout << "Hello from MACE init_coef." << std::endl; // require full neighbor list neighbor->add_request(this, NeighConst::REQ_FULL); - std::cout << "Goodbye from MACE init_coef." << std::endl; } double PairMACE::init_one(int i, int j) @@ -239,8 +186,6 @@ double PairMACE::init_one(int i, int j) void PairMACE::allocate() { - std::cout << "Hello from MACE allocate." << std::endl; - allocated = 1; memory->create(setflag, atom->ntypes+1, atom->ntypes+1, "pair:setflag"); @@ -250,6 +195,4 @@ void PairMACE::allocate() memory->create(cutsq, atom->ntypes+1, atom->ntypes+1, "pair:cutsq"); std::cout << "WARNING: may need to overload init_one, which sets cutsq." << std::endl; - - std::cout << "Goodbye from MACE allocate." << std::endl; } From 51329f4304f76d1bac9c347dbad6ce687a21c442 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 11 Oct 2022 18:48:34 +0100 Subject: [PATCH 08/51] get cell info from lammps. --- src/ML-MACE/pair_mace.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 07c29ab700f..dede53fc85d 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -19,6 +19,7 @@ #include "pair_mace.h" #include "atom.h" +#include "domain.h" #include "memory.h" #include "neigh_list.h" #include "neighbor.h" @@ -56,9 +57,15 @@ void PairMACE::compute(int eflag, int vflag) } // ----- cell ----- auto cell = torch::zeros({3,3}, torch::dtype(torch::kFloat64)); - for (int ii = 0; ii < 3; ii++) { - cell[ii][ii] = 50.0; - } + cell[0][0] = domain->xprd; + cell[0][1] = 0.0; + cell[0][2] = 0.0; + cell[1][0] = domain->xy; + cell[1][1] = domain->yprd; + cell[1][2] = 0.0; + cell[2][0] = domain->xz; + cell[2][1] = domain->yz; + cell[2][2] = domain->zprd; // ----- edge_index ----- int n_edges = 0; From 19625a371d810b5d58d0fba11f405c429b6fa3a9 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Wed, 2 Nov 2022 14:33:13 +0000 Subject: [PATCH 09/51] add num_interactions and more. --- src/ML-MACE/pair_mace.cpp | 89 ++++++++++++++++++++++++++++++--------- src/ML-MACE/pair_mace.h | 1 + 2 files changed, 69 insertions(+), 21 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index dede53fc85d..e3d735d36cb 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -48,13 +48,17 @@ void PairMACE::compute(int eflag, int vflag) ev_init(eflag, vflag); // ----- positions ----- + // WARNING: list->gnum not the same as atom->nghost int n_nodes = atom->nlocal + atom->nghost; + //int n_nodes = list->inum + list->gnum; auto positions = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); for (int ii = 0; ii < n_nodes; ii++) { - for (int jj = 0; jj < 3; jj++) { - positions[ii][jj] = atom->x[ii][jj]; - } + int i = list->ilist[ii]; + positions[i][0] = atom->x[i][0]; + positions[i][1] = atom->x[i][1]; + positions[i][2] = atom->x[i][2]; } + // ----- cell ----- auto cell = torch::zeros({3,3}, torch::dtype(torch::kFloat64)); cell[0][0] = domain->xprd; @@ -68,20 +72,53 @@ void PairMACE::compute(int eflag, int vflag) cell[2][2] = domain->zprd; // ----- edge_index ----- + // count total number of edges int n_edges = 0; - for (int ii = 0; ii < list->inum; ii++) - n_edges += list->numneigh[list->ilist[ii]]; + for (int ii = 0; ii < n_nodes; ii++) { + //for (int ii = 0; ii < list->inum; ii++) { + int i = list->ilist[ii]; + double xtmp = atom->x[i][0]; + double ytmp = atom->x[i][1]; + double ztmp = atom->x[i][2]; + int *jlist = list->firstneigh[i]; + int jnum = list->numneigh[i]; + for (int jj = 0; jj < jnum; jj++) { + int j = jlist[jj]; + j &= NEIGHMASK; // TODO: what is this? + double delx = xtmp - atom->x[j][0]; + double dely = ytmp - atom->x[j][1]; + double delz = ztmp - atom->x[j][2]; + double rsq = delx * delx + dely * dely + delz * delz; + // TODO: which is better? + //if (rsq < cutsq[itype][jtype]) n_edges += 1; + if (rsq < r_max*r_max) n_edges += 1; + } + } + + auto edge_index = torch::empty({2,n_edges}, torch::dtype(torch::kInt64)); int k = 0; - for (int ii = 0; ii < list->inum; ii++) { + for (int ii = 0; ii < n_nodes; ii++) { + //for (int ii = 0; ii < list->inum; ii++) { int i = list->ilist[ii]; + double xtmp = atom->x[i][0]; + double ytmp = atom->x[i][1]; + double ztmp = atom->x[i][2]; int *jlist = list->firstneigh[i]; int jnum = list->numneigh[i]; for (int jj = 0; jj < jnum; jj++) { - edge_index[0][k] = i; - edge_index[1][k] = jlist[jj]; - //edge_index[1,k] = (jlist[jj] & NEIGHMASK) + 1; - k++; + int j = jlist[jj]; + j &= NEIGHMASK; // TODO: what is this? + double delx = xtmp - atom->x[j][0]; + double dely = ytmp - atom->x[j][1]; + double delz = ztmp - atom->x[j][2]; + double rsq = delx * delx + dely * dely + delz * delz; + if (rsq < r_max*r_max) { + edge_index[0][k] = i; + edge_index[1][k] = j; + //edge_index[1,k] = (jlist[jj] & NEIGHMASK) + 1; + k++; + } } } @@ -98,10 +135,10 @@ void PairMACE::compute(int eflag, int vflag) // node_attrs involves atomic numbers int n_node_feats = mace_atomic_numbers.size(); auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch::dtype(torch::kFloat64)); - // TODO: generalize this - for (int ii = 0; ii < list->inum; ii++) { + for (int ii = 0; ii < n_nodes; ii++) { + int i = list->ilist[ii]; // map lammps type to mace type - node_attrs[ii][mace_type(atom->type[ii])-1] = 1.0; + node_attrs[i][mace_type(atom->type[i])-1] = 1.0; } // TODO: consider from_blob to avoid copy @@ -131,13 +168,21 @@ void PairMACE::compute(int eflag, int vflag) input.insert("weight", weight); // TODO: when should stress be printed? auto output = model.forward({input, false, true, false, false}).toGenericDict(); - // TODO: remove this unused energy call - //energy = output.at("energy").toTensor(); - auto site_energies = output.at("node_energy").toTensor(); + auto node_energy = output.at("node_energy").toTensor(); forces = output.at("forces").toTensor(); // post-process - eng_vdwl = output.at("energy").toTensor()[0].item(); + eng_vdwl = 0.0; + for (int ii = 0; ii < n_nodes; ii++) { + int i = list->ilist[ii]; + // TODO: update forces here + if (ii < list->inum) { + eng_vdwl += node_energy[i].item(); + if (eflag_atom) + eatom[i] = node_energy[i].item(); + } + } + } /* ---------------------------------------------------------------------- */ @@ -150,6 +195,8 @@ void PairMACE::settings(int narg, char **arg) void PairMACE::coeff(int narg, char **arg) { + // TODO: remove print statements from this routine, or have a single proc print + if (!allocated) allocate(); std::cout << "Loading MACE model from \"" << arg[2] << "\" ..."; @@ -158,6 +205,8 @@ void PairMACE::coeff(int narg, char **arg) r_max = model.attr("r_max").toTensor().item(); std::cout << " - The r_max is: " << r_max << "." << std::endl; + num_interactions = model.attr("num_interactions").toTensor().item(); + std::cout << " - The model has: " << num_interactions << " layers." << std::endl; // extract atomic numbers from mace model auto a_n = model.attr("atomic_numbers").toTensor(); @@ -181,14 +230,12 @@ void PairMACE::coeff(int narg, char **arg) void PairMACE::init_style() { - // require full neighbor list - neighbor->add_request(this, NeighConst::REQ_FULL); + neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); } double PairMACE::init_one(int i, int j) { - // TODO: address neighbor list skin distance (2A) differently - return model.attr("r_max").toTensor().item() - 2.0; + return num_interactions*model.attr("r_max").toTensor().item(); } void PairMACE::allocate() diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h index 94de254c57e..5d0ed3da0f5 100644 --- a/src/ML-MACE/pair_mace.h +++ b/src/ML-MACE/pair_mace.h @@ -48,6 +48,7 @@ class PairMACE : public Pair { torch::jit::script::Module model; double r_max; + int64_t num_interactions; std::vector mace_atomic_numbers; std::vector lammps_atomic_numbers; const std::array periodic_table = From d97f7c98776bf6a4ef0a8afdf0216487f3318585 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Thu, 3 Nov 2022 02:41:09 +0000 Subject: [PATCH 10/51] several improvements. --- src/ML-MACE/pair_mace.cpp | 49 ++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index e3d735d36cb..7d098a78b90 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -20,12 +20,15 @@ #include "atom.h" #include "domain.h" +#include "error.h" +#include "force.h" #include "memory.h" #include "neigh_list.h" #include "neighbor.h" #include #include +#include using namespace LAMMPS_NS; @@ -47,10 +50,11 @@ void PairMACE::compute(int eflag, int vflag) { ev_init(eflag, vflag); + if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); + if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); + // ----- positions ----- - // WARNING: list->gnum not the same as atom->nghost - int n_nodes = atom->nlocal + atom->nghost; - //int n_nodes = list->inum + list->gnum; + int n_nodes = list->inum + list->gnum; auto positions = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); for (int ii = 0; ii < n_nodes; ii++) { int i = list->ilist[ii]; @@ -75,7 +79,6 @@ void PairMACE::compute(int eflag, int vflag) // count total number of edges int n_edges = 0; for (int ii = 0; ii < n_nodes; ii++) { - //for (int ii = 0; ii < list->inum; ii++) { int i = list->ilist[ii]; double xtmp = atom->x[i][0]; double ytmp = atom->x[i][1]; @@ -89,8 +92,6 @@ void PairMACE::compute(int eflag, int vflag) double dely = ytmp - atom->x[j][1]; double delz = ztmp - atom->x[j][2]; double rsq = delx * delx + dely * dely + delz * delz; - // TODO: which is better? - //if (rsq < cutsq[itype][jtype]) n_edges += 1; if (rsq < r_max*r_max) n_edges += 1; } } @@ -99,7 +100,6 @@ void PairMACE::compute(int eflag, int vflag) auto edge_index = torch::empty({2,n_edges}, torch::dtype(torch::kInt64)); int k = 0; for (int ii = 0; ii < n_nodes; ii++) { - //for (int ii = 0; ii < list->inum; ii++) { int i = list->ilist[ii]; double xtmp = atom->x[i][0]; double ytmp = atom->x[i][1]; @@ -128,20 +128,18 @@ void PairMACE::compute(int eflag, int vflag) return i+1; } } - // TODO: should throw error - return -1000; + error->all(FLERR, "ERROR: problem converting lammps_type to mace_type."); + return -1; }; - // node_attrs involves atomic numbers + // node_attrs is one-hot encoding for atomic numbers int n_node_feats = mace_atomic_numbers.size(); auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch::dtype(torch::kFloat64)); for (int ii = 0; ii < n_nodes; ii++) { int i = list->ilist[ii]; - // map lammps type to mace type node_attrs[i][mace_type(atom->type[i])-1] = 1.0; } - // TODO: consider from_blob to avoid copy auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); auto energy = torch::empty({1}, torch::dtype(torch::kFloat64)); auto forces = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); @@ -166,20 +164,21 @@ void PairMACE::compute(int eflag, int vflag) input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); - // TODO: when should stress be printed? - auto output = model.forward({input, false, true, false, false}).toGenericDict(); + auto output = model.forward({input, false, true, vflag, false}).toGenericDict(); auto node_energy = output.at("node_energy").toTensor(); forces = output.at("forces").toTensor(); - // post-process + // pass the output to LAMMPS variables eng_vdwl = 0.0; for (int ii = 0; ii < n_nodes; ii++) { int i = list->ilist[ii]; - // TODO: update forces here if (ii < list->inum) { eng_vdwl += node_energy[i].item(); - if (eflag_atom) - eatom[i] = node_energy[i].item(); + atom->f[i][0] = forces[i][0].item(); + atom->f[i][1] = forces[i][1].item(); + atom->f[i][2] = forces[i][2].item(); + if (eflag_atom) + eatom[i] = node_energy[i].item(); } } @@ -230,11 +229,24 @@ void PairMACE::coeff(int narg, char **arg) void PairMACE::init_style() { + if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); + + /* + MACE requires the full neighbor list AND neighbors of ghost atoms + it appears that: + * without REQ_GHOST + list->gnum == 0 + list->ilist does not include ghost atoms, but the jlists do + * with REQ_GHOST + list->gnum == atom->nghost + list->ilist includes ghost atoms + */ neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); } double PairMACE::init_one(int i, int j) { + // to account for message passing, require cutoff of n_layers * r_max return num_interactions*model.attr("r_max").toTensor().item(); } @@ -248,5 +260,4 @@ void PairMACE::allocate() setflag[i][j] = 0; memory->create(cutsq, atom->ntypes+1, atom->ntypes+1, "pair:cutsq"); - std::cout << "WARNING: may need to overload init_one, which sets cutsq." << std::endl; } From f611691683a39d71e7dd885a65a0b7621c5dbe9a Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Fri, 18 Nov 2022 22:13:07 +0000 Subject: [PATCH 11/51] virials now working in serial. --- src/ML-MACE/pair_mace.cpp | 62 +++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 7d098a78b90..2b7acc9507d 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -36,6 +36,7 @@ using namespace LAMMPS_NS; PairMACE::PairMACE(LAMMPS *lmp) : Pair(lmp) { + no_virial_fdotr_compute = 1; } /* ---------------------------------------------------------------------- */ @@ -164,24 +165,59 @@ void PairMACE::compute(int eflag, int vflag) input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); - auto output = model.forward({input, false, true, vflag, false}).toGenericDict(); - auto node_energy = output.at("node_energy").toTensor(); - forces = output.at("forces").toTensor(); + auto output = model.forward({input, false, true, true, false}).toGenericDict(); - // pass the output to LAMMPS variables - eng_vdwl = 0.0; - for (int ii = 0; ii < n_nodes; ii++) { - int i = list->ilist[ii]; - if (ii < list->inum) { + auto stress = output.at("stress").toTensor(); + + // mace energy + // -> sum of site energies of local atoms + if (eflag_global) { + auto node_energy = output.at("node_energy").toTensor(); + eng_vdwl = 0.0; + for (int ii = 0; ii < list->inum; ii++) { + int i = list->ilist[ii]; eng_vdwl += node_energy[i].item(); - atom->f[i][0] = forces[i][0].item(); - atom->f[i][1] = forces[i][1].item(); - atom->f[i][2] = forces[i][2].item(); - if (eflag_atom) - eatom[i] = node_energy[i].item(); } } + // mace forces + // -> derivatives of total mace energy + forces = output.at("forces").toTensor(); + for (int ii = 0; ii < list->inum; ii++) { + int i = list->ilist[ii]; + atom->f[i][0] = forces[i][0].item(); + atom->f[i][1] = forces[i][1].item(); + atom->f[i][2] = forces[i][2].item(); + } + + // mace site energies + // -> local atoms only + if (eflag_atom) { + auto node_energy = output.at("node_energy").toTensor(); + for (int ii = 0; ii < list->inum; ii++) { + int i = list->ilist[ii]; + eatom[i] = node_energy[i].item(); + } + } + + // mace virials (local atoms only) + // -> derivatives of sum of site energies of local atoms + if (vflag_global) { + auto vir = output.at("virials").toTensor(); + virial[0] = vir[0][0][0].item(); + virial[1] = vir[0][1][1].item(); + virial[2] = vir[0][2][2].item(); + virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); + virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); + virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); + } + + // mace site virials + // -> not available + if (vflag_atom) { + error->all(FLERR, "ERROR: pair_mace does not support vflag_atom."); + } + } /* ---------------------------------------------------------------------- */ From 17148576c776d5c53e7cef19219e804878756f10 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Sun, 20 Nov 2022 23:46:10 +0000 Subject: [PATCH 12/51] update virials for MPI. --- src/ML-MACE/pair_mace.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 2b7acc9507d..9a85df4612f 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -123,6 +123,8 @@ void PairMACE::compute(int eflag, int vflag) } } + // + auto mace_type = [this](int lammps_type) { for (int i=0; itype[i])-1] = 1.0; } + // ----- mask for ghost ----- + auto mask = torch::zeros(n_nodes, torch::dtype(torch::kBool)); + for (int ii = 0; ii < list->inum; ii++) { + int i = list->ilist[ii]; + mask[i] = true; + } + auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); auto energy = torch::empty({1}, torch::dtype(torch::kFloat64)); auto forces = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); @@ -165,7 +174,7 @@ void PairMACE::compute(int eflag, int vflag) input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); - auto output = model.forward({input, false, true, true, false}).toGenericDict(); + auto output = model.forward({input, mask, true, true, false}).toGenericDict(); auto stress = output.at("stress").toTensor(); From f5d7251ce82129e2aca9ff6e9650ed64640e2265 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Sun, 20 Nov 2022 23:46:24 +0000 Subject: [PATCH 13/51] grow periodic table. --- src/ML-MACE/pair_mace.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h index 5d0ed3da0f5..c54f569fcc3 100644 --- a/src/ML-MACE/pair_mace.h +++ b/src/ML-MACE/pair_mace.h @@ -52,8 +52,15 @@ class PairMACE : public Pair { std::vector mace_atomic_numbers; std::vector lammps_atomic_numbers; const std::array periodic_table = - {"H", "He", - "Li", "Be", "B", "C", "N", "O", "F", "Ne"}; + { "H", "He", + "Li", "Be", "B", "C", "N", "O", "F", "Ne", + "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", + "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", + "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I", "Xe", + "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", + "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", + "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", + "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og"}; }; } // namespace LAMMPS_NS From 67e96baf011e4b82471218fe7d48d2ec9ac5f89e Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Fri, 25 Nov 2022 11:11:31 +0000 Subject: [PATCH 14/51] update periodic_table. --- src/ML-MACE/pair_mace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h index c54f569fcc3..1ce687cc274 100644 --- a/src/ML-MACE/pair_mace.h +++ b/src/ML-MACE/pair_mace.h @@ -51,7 +51,7 @@ class PairMACE : public Pair { int64_t num_interactions; std::vector mace_atomic_numbers; std::vector lammps_atomic_numbers; - const std::array periodic_table = + const std::array periodic_table = { "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", From 3bd488c03a452aaf664ee43ca61565a313e2256f Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Fri, 9 Dec 2022 13:03:52 +0000 Subject: [PATCH 15/51] add ghostneigh=1 to pair_mace construction. --- src/ML-MACE/pair_mace.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 9a85df4612f..3dae6514ee7 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -37,6 +37,7 @@ using namespace LAMMPS_NS; PairMACE::PairMACE(LAMMPS *lmp) : Pair(lmp) { no_virial_fdotr_compute = 1; + ghostneigh = 1; } /* ---------------------------------------------------------------------- */ From d212e47364f04e29aa465096fe9ee368d9bd60ff Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Sun, 11 Dec 2022 21:51:58 +0000 Subject: [PATCH 16/51] remove todo comments. --- src/ML-MACE/pair_mace.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 3dae6514ee7..e4bd09f3624 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -89,7 +89,7 @@ void PairMACE::compute(int eflag, int vflag) int jnum = list->numneigh[i]; for (int jj = 0; jj < jnum; jj++) { int j = jlist[jj]; - j &= NEIGHMASK; // TODO: what is this? + j &= NEIGHMASK; double delx = xtmp - atom->x[j][0]; double dely = ytmp - atom->x[j][1]; double delz = ztmp - atom->x[j][2]; @@ -110,7 +110,7 @@ void PairMACE::compute(int eflag, int vflag) int jnum = list->numneigh[i]; for (int jj = 0; jj < jnum; jj++) { int j = jlist[jj]; - j &= NEIGHMASK; // TODO: what is this? + j &= NEIGHMASK; double delx = xtmp - atom->x[j][0]; double dely = ytmp - atom->x[j][1]; double delz = ztmp - atom->x[j][2]; @@ -118,7 +118,6 @@ void PairMACE::compute(int eflag, int vflag) if (rsq < r_max*r_max) { edge_index[0][k] = i; edge_index[1][k] = j; - //edge_index[1,k] = (jlist[jj] & NEIGHMASK) + 1; k++; } } From 577b043a3104756636d39244e1885c349fc8e325 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Sun, 11 Dec 2022 23:48:09 +0000 Subject: [PATCH 17/51] Revert "add ghostneigh=1 to pair_mace construction." We don't set cutghost explicitly, so unnecessary. This reverts commit 3bd488c03a452aaf664ee43ca61565a313e2256f. --- src/ML-MACE/pair_mace.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index e4bd09f3624..3d3d1cec661 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -37,7 +37,6 @@ using namespace LAMMPS_NS; PairMACE::PairMACE(LAMMPS *lmp) : Pair(lmp) { no_virial_fdotr_compute = 1; - ghostneigh = 1; } /* ---------------------------------------------------------------------- */ From 42c459ff4442d66b0b7b4d1faac3bbe93ab9cf5e Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 20 Dec 2022 22:25:13 +0000 Subject: [PATCH 18/51] add no_domain_decomposition option and other tweaks. --- src/ML-MACE/pair_mace.cpp | 152 ++++++++++++++++++++++++++++---------- src/ML-MACE/pair_mace.h | 3 + 2 files changed, 114 insertions(+), 41 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 3d3d1cec661..3d813c09d65 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -52,12 +52,24 @@ void PairMACE::compute(int eflag, int vflag) ev_init(eflag, vflag); if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); - if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); + if (domain_decomposition) { + if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); + } // ----- positions ----- - int n_nodes = list->inum + list->gnum; - auto positions = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); - for (int ii = 0; ii < n_nodes; ii++) { + int n_nodes; + if (domain_decomposition) { + n_nodes = atom->nlocal + atom->nghost; + } else { + // normally, ghost atoms are included in the graph as independent + // nodes, as required when the local domain does not have PBC. + // however, in no_domain_decomposition mode, ghost atoms are known to + // be shifted versions of local atoms. + n_nodes = atom->nlocal; + } + auto positions = torch::empty({n_nodes,3}, torch_float_dtype); + #pragma omp parallel for + for (int ii=0; iiilist[ii]; positions[i][0] = atom->x[i][0]; positions[i][1] = atom->x[i][1]; @@ -65,65 +77,95 @@ void PairMACE::compute(int eflag, int vflag) } // ----- cell ----- - auto cell = torch::zeros({3,3}, torch::dtype(torch::kFloat64)); - cell[0][0] = domain->xprd; + auto cell = torch::zeros({3,3}, torch_float_dtype); + cell[0][0] = domain->h[0]; cell[0][1] = 0.0; cell[0][2] = 0.0; - cell[1][0] = domain->xy; - cell[1][1] = domain->yprd; + cell[1][0] = domain->h[5]; + cell[1][1] = domain->h[1]; cell[1][2] = 0.0; - cell[2][0] = domain->xz; - cell[2][1] = domain->yz; - cell[2][2] = domain->zprd; + cell[2][0] = domain->h[4]; + cell[2][1] = domain->h[3]; + cell[2][2] = domain->h[2]; - // ----- edge_index ----- + // ----- edge_index and unit_shifts ----- // count total number of edges int n_edges = 0; - for (int ii = 0; ii < n_nodes; ii++) { + std::vector n_edges_vec(n_nodes, 0); + #pragma omp parallel for reduction(+:n_edges) + for (int ii=0; iiilist[ii]; double xtmp = atom->x[i][0]; double ytmp = atom->x[i][1]; double ztmp = atom->x[i][2]; int *jlist = list->firstneigh[i]; int jnum = list->numneigh[i]; - for (int jj = 0; jj < jnum; jj++) { + for (int jj=0; jjx[j][0]; double dely = ytmp - atom->x[j][1]; double delz = ztmp - atom->x[j][2]; double rsq = delx * delx + dely * dely + delz * delz; - if (rsq < r_max*r_max) n_edges += 1; + if (rsq < r_max_squared) { + n_edges += 1; + n_edges_vec[ii] += 1; + } } } - - + // make first_edge vector to help with parallelizing following loop + std::vector first_edge(n_nodes); + first_edge[0] = 0; + for (int ii=0; iiilist[ii]; double xtmp = atom->x[i][0]; double ytmp = atom->x[i][1]; double ztmp = atom->x[i][2]; int *jlist = list->firstneigh[i]; int jnum = list->numneigh[i]; - for (int jj = 0; jj < jnum; jj++) { + int k = first_edge[ii]; + for (int jj=0; jjx[j][0]; double dely = ytmp - atom->x[j][1]; double delz = ztmp - atom->x[j][2]; double rsq = delx * delx + dely * dely + delz * delz; - if (rsq < r_max*r_max) { + if (rsq < r_max_squared) { edge_index[0][k] = i; - edge_index[1][k] = j; + if (domain_decomposition) { + edge_index[1][k] = j; + } else { + int j_local = atom->map(atom->tag[j]); + edge_index[1][k] = j_local; + double shiftx = atom->x[j][0] - atom->x[j_local][0]; + double shifty = atom->x[j][1] - atom->x[j_local][1]; + double shiftz = atom->x[j][2] - atom->x[j_local][2]; + double shiftxs = std::round(domain->h_inv[0]*shiftx + domain->h_inv[5]*shifty + domain->h_inv[4]*shiftz); + double shiftys = std::round(domain->h_inv[1]*shifty + domain->h_inv[3]*shiftz); + double shiftzs = std::round(domain->h_inv[2]*shiftz); + unit_shifts[k][0] = shiftxs; + unit_shifts[k][1] = shiftys; + unit_shifts[k][2] = shiftzs; + shifts[k][0] = domain->h[0]*shiftxs + domain->h[5]*shiftys + domain->h[4]*shiftzs; + shifts[k][1] = domain->h[1]*shiftys + domain->h[3]*shiftzs; + shifts[k][2] = domain->h[2]*shiftzs; + } k++; } } } - // - + // ----- node_attrs ----- + // node_attrs is one-hot encoding for atomic numbers auto mace_type = [this](int lammps_type) { for (int i=0; iall(FLERR, "ERROR: problem converting lammps_type to mace_type."); return -1; }; - - // node_attrs is one-hot encoding for atomic numbers int n_node_feats = mace_atomic_numbers.size(); - auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch::dtype(torch::kFloat64)); - for (int ii = 0; ii < n_nodes; ii++) { + auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch_float_dtype); + #pragma omp parallel for + for (int ii=0; iiilist[ii]; node_attrs[i][mace_type(atom->type[i])-1] = 1.0; } // ----- mask for ghost ----- auto mask = torch::zeros(n_nodes, torch::dtype(torch::kBool)); - for (int ii = 0; ii < list->inum; ii++) { + #pragma omp parallel for + for (int ii=0; iinlocal; ++ii) { int i = list->ilist[ii]; mask[i] = true; } auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); - auto energy = torch::empty({1}, torch::dtype(torch::kFloat64)); - auto forces = torch::empty({n_nodes,3}, torch::dtype(torch::kFloat64)); + auto energy = torch::empty({1}, torch_float_dtype); + auto forces = torch::empty({n_nodes,3}, torch_float_dtype); auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); - auto shifts = torch::zeros({n_edges,3}, torch::dtype(torch::kFloat64)); - auto unit_shifts = torch::zeros({n_edges,3}, torch::dtype(torch::kFloat64)); - auto weight = torch::empty({1}, torch::dtype(torch::kFloat64)); + auto weight = torch::empty({1}, torch_float_dtype); ptr[0] = 0; ptr[1] = n_nodes; weight[0] = 1.0; @@ -175,14 +215,13 @@ void PairMACE::compute(int eflag, int vflag) input.insert("weight", weight); auto output = model.forward({input, mask, true, true, false}).toGenericDict(); - auto stress = output.at("stress").toTensor(); - // mace energy // -> sum of site energies of local atoms if (eflag_global) { auto node_energy = output.at("node_energy").toTensor(); eng_vdwl = 0.0; - for (int ii = 0; ii < list->inum; ii++) { + #pragma omp parallel for reduction(+:eng_vdwl) + for (int ii=0; iiinum; ++ii) { int i = list->ilist[ii]; eng_vdwl += node_energy[i].item(); } @@ -191,7 +230,8 @@ void PairMACE::compute(int eflag, int vflag) // mace forces // -> derivatives of total mace energy forces = output.at("forces").toTensor(); - for (int ii = 0; ii < list->inum; ii++) { + #pragma omp parallel for + for (int ii=0; iiinum; ++ii) { int i = list->ilist[ii]; atom->f[i][0] = forces[i][0].item(); atom->f[i][1] = forces[i][1].item(); @@ -202,7 +242,8 @@ void PairMACE::compute(int eflag, int vflag) // -> local atoms only if (eflag_atom) { auto node_energy = output.at("node_energy").toTensor(); - for (int ii = 0; ii < list->inum; ii++) { + #pragma omp parallel for + for (int ii=0; iiinum; ++ii) { int i = list->ilist[ii]; eatom[i] = node_energy[i].item(); } @@ -232,6 +273,15 @@ void PairMACE::compute(int eflag, int vflag) void PairMACE::settings(int narg, char **arg) { + if (narg == 1) { + if (strcmp(arg[0], "no_domain_decomposition") == 0) { + domain_decomposition = false; + } else { + error->all(FLERR, "Invalid option for pair_style mace."); + } + } else if (narg > 1) { + error->all(FLERR, "Too many pair_style arguments for pair_style mace."); + } } /* ---------------------------------------------------------------------- */ @@ -246,7 +296,22 @@ void PairMACE::coeff(int narg, char **arg) model = torch::jit::load(arg[2]); std::cout << " finished." << std::endl; + // extract default dtype from mace model + for (auto p: model.named_attributes()) { + // this is a somewhat random choice of variable to check. could it be improved? + if (p.name == "model.node_embedding.linear.weight") { + if (p.value.toTensor().dtype() == caffe2::TypeMeta::Make()) { + torch_float_dtype = torch::kFloat32; + } else if (p.value.toTensor().dtype() == caffe2::TypeMeta::Make()) { + torch_float_dtype = torch::kFloat64; + } + } + } + std::cout << " - The torch_float_dtype is: " << torch_float_dtype << std::endl; + + // extract r_max from mace model r_max = model.attr("r_max").toTensor().item(); + r_max_squared = r_max*r_max; std::cout << " - The r_max is: " << r_max << "." << std::endl; num_interactions = model.attr("num_interactions").toTensor().item(); std::cout << " - The model has: " << num_interactions << " layers." << std::endl; @@ -285,13 +350,18 @@ void PairMACE::init_style() list->gnum == atom->nghost list->ilist includes ghost atoms */ - neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); + if (domain_decomposition) { + neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); + } else { + neighbor->add_request(this, NeighConst::REQ_FULL); + } } double PairMACE::init_one(int i, int j) { // to account for message passing, require cutoff of n_layers * r_max - return num_interactions*model.attr("r_max").toTensor().item(); +// return num_interactions*model.attr("r_max").toTensor().item(); + return (1+num_interactions)*model.attr("r_max").toTensor().item(); } void PairMACE::allocate() diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h index 1ce687cc274..91aafc663b5 100644 --- a/src/ML-MACE/pair_mace.h +++ b/src/ML-MACE/pair_mace.h @@ -46,8 +46,11 @@ class PairMACE : public Pair { protected: + bool domain_decomposition = true; torch::jit::script::Module model; + torch::ScalarType torch_float_dtype; double r_max; + double r_max_squared; int64_t num_interactions; std::vector mace_atomic_numbers; std::vector lammps_atomic_numbers; From d60bd8ab259d1efa65b453455fd0d758a2677bc9 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Thu, 12 Jan 2023 09:05:55 +0000 Subject: [PATCH 19/51] restore approach for determining effective interaction. --- src/ML-MACE/pair_mace.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 3d813c09d65..4346a2336d7 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -360,8 +360,7 @@ void PairMACE::init_style() double PairMACE::init_one(int i, int j) { // to account for message passing, require cutoff of n_layers * r_max -// return num_interactions*model.attr("r_max").toTensor().item(); - return (1+num_interactions)*model.attr("r_max").toTensor().item(); + return num_interactions*model.attr("r_max").toTensor().item(); } void PairMACE::allocate() From 6fe7bd8800a92ee0cc56d3dbb1dc43f90fb1134f Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Thu, 23 Feb 2023 21:15:33 +0000 Subject: [PATCH 20/51] gpu work. --- cmake/Modules/Packages/ML-MACE.cmake | 3 +- src/ML-MACE/pair_mace.cpp | 143 ++++++++++++++++++++++++--- src/ML-MACE/pair_mace.h | 3 + 3 files changed, 135 insertions(+), 14 deletions(-) diff --git a/cmake/Modules/Packages/ML-MACE.cmake b/cmake/Modules/Packages/ML-MACE.cmake index 984298c75fd..c22a971c225 100644 --- a/cmake/Modules/Packages/ML-MACE.cmake +++ b/cmake/Modules/Packages/ML-MACE.cmake @@ -1,10 +1,9 @@ cmake_minimum_required(VERSION 3.0 FATAL_ERROR) -message("Hello from ML-MACE.cmake.") - find_package(Torch REQUIRED) #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") #target_include_directories(lammps PRIVATE "${TORCH_INCLUDE_DIRS}") target_link_libraries(lammps PRIVATE "${TORCH_LIBRARIES}") set_property(TARGET lammps PROPERTY CXX_STANDARD 14) +add_compile_definitions(MACE_DEBUG) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 4346a2336d7..d37abc5facd 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -30,6 +30,10 @@ #include #include +#ifdef MACE_DEBUG +#include +#endif + using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ @@ -51,6 +55,16 @@ void PairMACE::compute(int eflag, int vflag) { ev_init(eflag, vflag); +std::cout << " " << std::endl; + + #ifdef MACE_DEBUG + std::chrono::time_point t0, t1; + #endif + + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif + if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); if (domain_decomposition) { if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); @@ -88,6 +102,15 @@ void PairMACE::compute(int eflag, int vflag) cell[2][1] = domain->h[3]; cell[2][2] = domain->h[2]; + #ifdef MACE_DEBUG + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "positions+cell: " << std::chrono::duration_cast(t1-t0).count() << std::endl; + #endif + + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif + // ----- edge_index and unit_shifts ----- // count total number of edges int n_edges = 0; @@ -119,6 +142,13 @@ void PairMACE::compute(int eflag, int vflag) for (int ii=0; ii(t1-t0).count() << std::endl; + #endif + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif // fill edge_index and unit_shifts tensors auto edge_index = torch::empty({2,n_edges}, torch::dtype(torch::kInt64)); auto unit_shifts = torch::zeros({n_edges,3}, torch_float_dtype); @@ -163,6 +193,14 @@ void PairMACE::compute(int eflag, int vflag) } } } + #ifdef MACE_DEBUG + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "edge fill: " << std::chrono::duration_cast(t1-t0).count() << std::endl; + #endif + + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif // ----- node_attrs ----- // node_attrs is one-hot encoding for atomic numbers @@ -183,6 +221,15 @@ void PairMACE::compute(int eflag, int vflag) node_attrs[i][mace_type(atom->type[i])-1] = 1.0; } + #ifdef MACE_DEBUG + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "node attrs: " << std::chrono::duration_cast(t1-t0).count() << std::endl; + #endif + + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif + // ----- mask for ghost ----- auto mask = torch::zeros(n_nodes, torch::dtype(torch::kBool)); #pragma omp parallel for @@ -200,7 +247,35 @@ void PairMACE::compute(int eflag, int vflag) ptr[1] = n_nodes; weight[0] = 1.0; + #ifdef MACE_DEBUG + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "other setup: " << std::chrono::duration_cast(t1-t0).count() << std::endl; + #endif + + // transfer data to device + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif + batch = batch.to(device); + cell = cell.to(device); + edge_index = edge_index.to(device); + energy = energy.to(device); + forces = forces.to(device); + node_attrs = node_attrs.to(device); + positions = positions.to(device); + ptr = ptr.to(device); + shifts = shifts.to(device); + unit_shifts = unit_shifts.to(device); + weight = weight.to(device); + #ifdef MACE_DEBUG + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "transfer: " << std::chrono::duration_cast(t1-t0).count() << std::endl; + #endif + // pack the input, call the model, extract the output + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif c10::Dict input; input.insert("batch", batch); input.insert("cell", cell); @@ -213,12 +288,38 @@ void PairMACE::compute(int eflag, int vflag) input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); - auto output = model.forward({input, mask, true, true, false}).toGenericDict(); + #ifdef MACE_DEBUG + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "pack: " << std::chrono::duration_cast(t1-t0).count() << std::endl; + #endif + + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif + auto output = model.forward({input, mask.to(device), true, true, false}).toGenericDict(); + #ifdef MACE_DEBUG + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "model: " << std::chrono::duration_cast(t1-t0).count() << std::endl; + #endif + + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif + auto node_energy = output.at("node_energy").toTensor().cpu(); + forces = output.at("forces").toTensor().cpu(); + auto vir = output.at("virials").toTensor().cpu(); + #ifdef MACE_DEBUG + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "transfer back: " << std::chrono::duration_cast(t1-t0).count() << std::endl; + #endif + + #ifdef MACE_DEBUG + t0 = std::chrono::high_resolution_clock::now(); + #endif // mace energy // -> sum of site energies of local atoms if (eflag_global) { - auto node_energy = output.at("node_energy").toTensor(); eng_vdwl = 0.0; #pragma omp parallel for reduction(+:eng_vdwl) for (int ii=0; iiinum; ++ii) { @@ -229,7 +330,6 @@ void PairMACE::compute(int eflag, int vflag) // mace forces // -> derivatives of total mace energy - forces = output.at("forces").toTensor(); #pragma omp parallel for for (int ii=0; iiinum; ++ii) { int i = list->ilist[ii]; @@ -241,7 +341,6 @@ void PairMACE::compute(int eflag, int vflag) // mace site energies // -> local atoms only if (eflag_atom) { - auto node_energy = output.at("node_energy").toTensor(); #pragma omp parallel for for (int ii=0; iiinum; ++ii) { int i = list->ilist[ii]; @@ -252,7 +351,6 @@ void PairMACE::compute(int eflag, int vflag) // mace virials (local atoms only) // -> derivatives of sum of site energies of local atoms if (vflag_global) { - auto vir = output.at("virials").toTensor(); virial[0] = vir[0][0][0].item(); virial[1] = vir[0][1][1].item(); virial[2] = vir[0][2][2].item(); @@ -267,20 +365,32 @@ void PairMACE::compute(int eflag, int vflag) error->all(FLERR, "ERROR: pair_mace does not support vflag_atom."); } + #ifdef MACE_DEBUG + t1 = std::chrono::high_resolution_clock::now(); + std::cout << "unpack: " << std::chrono::duration_cast(t1-t0).count() << std::endl; + #endif + } /* ---------------------------------------------------------------------- */ void PairMACE::settings(int narg, char **arg) { - if (narg == 1) { - if (strcmp(arg[0], "no_domain_decomposition") == 0) { + if (narg > 2) { + error->all(FLERR, "Too many pair_style arguments for pair_style mace."); + } + + if (narg >= 1) { + if (strcmp(arg[0], "gpu") == 0) { + device_type = "gpu"; + } + } + + if (narg >= 2) { + if (strcmp(arg[1], "no_domain_decomposition") == 0) { domain_decomposition = false; - } else { - error->all(FLERR, "Invalid option for pair_style mace."); + // TODO: add check against MPI rank } - } else if (narg > 1) { - error->all(FLERR, "Too many pair_style arguments for pair_style mace."); } } @@ -292,8 +402,17 @@ void PairMACE::coeff(int narg, char **arg) if (!allocated) allocate(); + if (device_type == "cpu") { + device = c10::Device(torch::kCPU); + } else { + int rank; + MPI_Comm_rank(world, &rank); + std::cout << "MPI rank: " << rank << std::endl; + device = c10::Device(torch::kCUDA,rank); + } + std::cout << "Loading MACE model from \"" << arg[2] << "\" ..."; - model = torch::jit::load(arg[2]); + model = torch::jit::load(arg[2], device); std::cout << " finished." << std::endl; // extract default dtype from mace model diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h index 91aafc663b5..9616d18286e 100644 --- a/src/ML-MACE/pair_mace.h +++ b/src/ML-MACE/pair_mace.h @@ -27,6 +27,7 @@ PairStyle(mace,PairMACE); #include "pair.h" +#include #include namespace LAMMPS_NS { @@ -46,7 +47,9 @@ class PairMACE : public Pair { protected: + std::string device_type = "cpu"; bool domain_decomposition = true; + torch::Device device = torch::kCPU; torch::jit::script::Module model; torch::ScalarType torch_float_dtype; double r_max; From 0246fad8327658b388069d7ab5af9d90fb418990 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Thu, 23 Feb 2023 22:58:10 +0000 Subject: [PATCH 21/51] first steps. --- cmake/Modules/Packages/ML-MACE.cmake | 3 +- src/ML-MACE/pair_mace_kokkos.cpp | 384 +++++++++++++++++++++++++++ src/ML-MACE/pair_mace_kokkos.h | 78 ++++++ 3 files changed, 463 insertions(+), 2 deletions(-) create mode 100644 src/ML-MACE/pair_mace_kokkos.cpp create mode 100644 src/ML-MACE/pair_mace_kokkos.h diff --git a/cmake/Modules/Packages/ML-MACE.cmake b/cmake/Modules/Packages/ML-MACE.cmake index 984298c75fd..4646648e370 100644 --- a/cmake/Modules/Packages/ML-MACE.cmake +++ b/cmake/Modules/Packages/ML-MACE.cmake @@ -3,8 +3,7 @@ cmake_minimum_required(VERSION 3.0 FATAL_ERROR) message("Hello from ML-MACE.cmake.") find_package(Torch REQUIRED) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") -#target_include_directories(lammps PRIVATE "${TORCH_INCLUDE_DIRS}") target_link_libraries(lammps PRIVATE "${TORCH_LIBRARIES}") set_property(TARGET lammps PROPERTY CXX_STANDARD 14) diff --git a/src/ML-MACE/pair_mace_kokkos.cpp b/src/ML-MACE/pair_mace_kokkos.cpp new file mode 100644 index 00000000000..b52a48eed78 --- /dev/null +++ b/src/ML-MACE/pair_mace_kokkos.cpp @@ -0,0 +1,384 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributors + William C Witt (University of Cambridge) +------------------------------------------------------------------------- */ + +#include "pair_mace_kokkos.h" + +#include "atom.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "neigh_list.h" +#include "neighbor.h" + +#include +#include +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +PairMACEKokkos::PairMACEKokkos(LAMMPS *lmp) : PairMACE(lmp) +{ + no_virial_fdotr_compute = 1; +} + +/* ---------------------------------------------------------------------- */ + +template +PairMACEKokkos::~PairMACEKokkos() +{ +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMACEKokkos::compute(int eflag, int vflag) +{ + ev_init(eflag, vflag); + + if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); + if (domain_decomposition) { + if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); + } + + // ----- positions ----- + int n_nodes; + if (domain_decomposition) { + n_nodes = atom->nlocal + atom->nghost; + } else { + // normally, ghost atoms are included in the graph as independent + // nodes, as required when the local domain does not have PBC. + // however, in no_domain_decomposition mode, ghost atoms are known to + // be shifted versions of local atoms. + n_nodes = atom->nlocal; + } + auto positions = torch::empty({n_nodes,3}, torch_float_dtype); + #pragma omp parallel for + for (int ii=0; iiilist[ii]; + positions[i][0] = atom->x[i][0]; + positions[i][1] = atom->x[i][1]; + positions[i][2] = atom->x[i][2]; + } + + // ----- cell ----- + auto cell = torch::zeros({3,3}, torch_float_dtype); + cell[0][0] = domain->h[0]; + cell[0][1] = 0.0; + cell[0][2] = 0.0; + cell[1][0] = domain->h[5]; + cell[1][1] = domain->h[1]; + cell[1][2] = 0.0; + cell[2][0] = domain->h[4]; + cell[2][1] = domain->h[3]; + cell[2][2] = domain->h[2]; + + // ----- edge_index and unit_shifts ----- + // count total number of edges + int n_edges = 0; + std::vector n_edges_vec(n_nodes, 0); + #pragma omp parallel for reduction(+:n_edges) + for (int ii=0; iiilist[ii]; + double xtmp = atom->x[i][0]; + double ytmp = atom->x[i][1]; + double ztmp = atom->x[i][2]; + int *jlist = list->firstneigh[i]; + int jnum = list->numneigh[i]; + for (int jj=0; jjx[j][0]; + double dely = ytmp - atom->x[j][1]; + double delz = ztmp - atom->x[j][2]; + double rsq = delx * delx + dely * dely + delz * delz; + if (rsq < r_max_squared) { + n_edges += 1; + n_edges_vec[ii] += 1; + } + } + } + // make first_edge vector to help with parallelizing following loop + std::vector first_edge(n_nodes); + first_edge[0] = 0; + for (int ii=0; iiilist[ii]; + double xtmp = atom->x[i][0]; + double ytmp = atom->x[i][1]; + double ztmp = atom->x[i][2]; + int *jlist = list->firstneigh[i]; + int jnum = list->numneigh[i]; + int k = first_edge[ii]; + for (int jj=0; jjx[j][0]; + double dely = ytmp - atom->x[j][1]; + double delz = ztmp - atom->x[j][2]; + double rsq = delx * delx + dely * dely + delz * delz; + if (rsq < r_max_squared) { + edge_index[0][k] = i; + if (domain_decomposition) { + edge_index[1][k] = j; + } else { + int j_local = atom->map(atom->tag[j]); + edge_index[1][k] = j_local; + double shiftx = atom->x[j][0] - atom->x[j_local][0]; + double shifty = atom->x[j][1] - atom->x[j_local][1]; + double shiftz = atom->x[j][2] - atom->x[j_local][2]; + double shiftxs = std::round(domain->h_inv[0]*shiftx + domain->h_inv[5]*shifty + domain->h_inv[4]*shiftz); + double shiftys = std::round(domain->h_inv[1]*shifty + domain->h_inv[3]*shiftz); + double shiftzs = std::round(domain->h_inv[2]*shiftz); + unit_shifts[k][0] = shiftxs; + unit_shifts[k][1] = shiftys; + unit_shifts[k][2] = shiftzs; + shifts[k][0] = domain->h[0]*shiftxs + domain->h[5]*shiftys + domain->h[4]*shiftzs; + shifts[k][1] = domain->h[1]*shiftys + domain->h[3]*shiftzs; + shifts[k][2] = domain->h[2]*shiftzs; + } + k++; + } + } + } + + // ----- node_attrs ----- + // node_attrs is one-hot encoding for atomic numbers + auto mace_type = [this](int lammps_type) { + for (int i=0; iall(FLERR, "ERROR: problem converting lammps_type to mace_type."); + return -1; + }; + int n_node_feats = mace_atomic_numbers.size(); + auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch_float_dtype); + #pragma omp parallel for + for (int ii=0; iiilist[ii]; + node_attrs[i][mace_type(atom->type[i])-1] = 1.0; + } + + // ----- mask for ghost ----- + auto mask = torch::zeros(n_nodes, torch::dtype(torch::kBool)); + #pragma omp parallel for + for (int ii=0; iinlocal; ++ii) { + int i = list->ilist[ii]; + mask[i] = true; + } + + auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); + auto energy = torch::empty({1}, torch_float_dtype); + auto forces = torch::empty({n_nodes,3}, torch_float_dtype); + auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); + auto weight = torch::empty({1}, torch_float_dtype); + ptr[0] = 0; + ptr[1] = n_nodes; + weight[0] = 1.0; + + // pack the input, call the model, extract the output + c10::Dict input; + input.insert("batch", batch); + input.insert("cell", cell); + input.insert("edge_index", edge_index); + input.insert("energy", energy); + input.insert("forces", forces); + input.insert("node_attrs", node_attrs); + input.insert("positions", positions); + input.insert("ptr", ptr); + input.insert("shifts", shifts); + input.insert("unit_shifts", unit_shifts); + input.insert("weight", weight); + auto output = model.forward({input, mask, true, true, false}).toGenericDict(); + + // mace energy + // -> sum of site energies of local atoms + if (eflag_global) { + auto node_energy = output.at("node_energy").toTensor(); + eng_vdwl = 0.0; + #pragma omp parallel for reduction(+:eng_vdwl) + for (int ii=0; iiinum; ++ii) { + int i = list->ilist[ii]; + eng_vdwl += node_energy[i].item(); + } + } + + // mace forces + // -> derivatives of total mace energy + forces = output.at("forces").toTensor(); + #pragma omp parallel for + for (int ii=0; iiinum; ++ii) { + int i = list->ilist[ii]; + atom->f[i][0] = forces[i][0].item(); + atom->f[i][1] = forces[i][1].item(); + atom->f[i][2] = forces[i][2].item(); + } + + // mace site energies + // -> local atoms only + if (eflag_atom) { + auto node_energy = output.at("node_energy").toTensor(); + #pragma omp parallel for + for (int ii=0; iiinum; ++ii) { + int i = list->ilist[ii]; + eatom[i] = node_energy[i].item(); + } + } + + // mace virials (local atoms only) + // -> derivatives of sum of site energies of local atoms + if (vflag_global) { + auto vir = output.at("virials").toTensor(); + virial[0] = vir[0][0][0].item(); + virial[1] = vir[0][1][1].item(); + virial[2] = vir[0][2][2].item(); + virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); + virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); + virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); + } + + // mace site virials + // -> not available + if (vflag_atom) { + error->all(FLERR, "ERROR: pair_mace does not support vflag_atom."); + } + +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMACEKokkos::settings(int narg, char **arg) +{ + if (narg == 1) { + if (strcmp(arg[0], "no_domain_decomposition") == 0) { + domain_decomposition = false; + } else { + error->all(FLERR, "Invalid option for pair_style mace."); + } + } else if (narg > 1) { + error->all(FLERR, "Too many pair_style arguments for pair_style mace."); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMACEKokkos::coeff(int narg, char **arg) +{ + // TODO: remove print statements from this routine, or have a single proc print + + if (!allocated) allocate(); + + std::cout << "Loading MACEKokkos model from \"" << arg[2] << "\" ..."; + model = torch::jit::load(arg[2]); + std::cout << " finished." << std::endl; + + // extract default dtype from mace model + for (auto p: model.named_attributes()) { + // this is a somewhat random choice of variable to check. could it be improved? + if (p.name == "model.node_embedding.linear.weight") { + if (p.value.toTensor().dtype() == caffe2::TypeMeta::Make()) { + torch_float_dtype = torch::kFloat32; + } else if (p.value.toTensor().dtype() == caffe2::TypeMeta::Make()) { + torch_float_dtype = torch::kFloat64; + } + } + } + std::cout << " - The torch_float_dtype is: " << torch_float_dtype << std::endl; + + // extract r_max from mace model + r_max = model.attr("r_max").toTensor().item(); + r_max_squared = r_max*r_max; + std::cout << " - The r_max is: " << r_max << "." << std::endl; + num_interactions = model.attr("num_interactions").toTensor().item(); + std::cout << " - The model has: " << num_interactions << " layers." << std::endl; + + // extract atomic numbers from mace model + auto a_n = model.attr("atomic_numbers").toTensor(); + for (int i=0; i()); + } + std::cout << " - The model atomic numbers are: " << mace_atomic_numbers << "." << std::endl; + + // extract atomic numbers from pair_coeff + for (int i=3; intypes+1; i++) + for (int j=i; jntypes+1; j++) + setflag[i][j] = 1; +} + +template +void PairMACEKokkos::init_style() +{ + if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); + + /* + MACE requires the full neighbor list AND neighbors of ghost atoms + it appears that: + * without REQ_GHOST + list->gnum == 0 + list->ilist does not include ghost atoms, but the jlists do + * with REQ_GHOST + list->gnum == atom->nghost + list->ilist includes ghost atoms + */ + if (domain_decomposition) { + neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); + } else { + neighbor->add_request(this, NeighConst::REQ_FULL); + } +} + +template +double PairMACEKokkos::init_one(int i, int j) +{ + // to account for message passing, require cutoff of n_layers * r_max + return num_interactions*model.attr("r_max").toTensor().item(); +} + +template +void PairMACEKokkos::allocate() +{ + allocated = 1; + + memory->create(setflag, atom->ntypes+1, atom->ntypes+1, "pair:setflag"); + for (int i=1; intypes+1; i++) + for (int j=i; jntypes+1; j++) + setflag[i][j] = 0; + + memory->create(cutsq, atom->ntypes+1, atom->ntypes+1, "pair:cutsq"); +} diff --git a/src/ML-MACE/pair_mace_kokkos.h b/src/ML-MACE/pair_mace_kokkos.h new file mode 100644 index 00000000000..1e269c2ce0f --- /dev/null +++ b/src/ML-MACE/pair_mace_kokkos.h @@ -0,0 +1,78 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributors + William C Witt (University of Cambridge) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(mace/kk,PairMACEKokkos); +PairStyle(mace/kk/device,PairLJCutKokkos); +PairStyle(mace/kk/host,PairLJCutKokkos); +// clang-format on +#else + +#ifndef LMP_PAIR_MACE_KOKKOS_H +#define LMP_PAIR_MACE_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_mace.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +class PairMACEKokkos : public PairMACE { + + public: + + //enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + PairMACEKokkos(class LAMMPS *); + ~PairMACEKokkos() override; + void compute(int, int) override; + void settings(int, char **) override; + void coeff(int, char **) override; + void init_style() override; + double init_one(int, int) override; + void allocate(); + + protected: + + bool domain_decomposition = true; + torch::jit::script::Module model; + torch::ScalarType torch_float_dtype; + double r_max; + double r_max_squared; + int64_t num_interactions; + std::vector mace_atomic_numbers; + std::vector lammps_atomic_numbers; + const std::array periodic_table = + { "H", "He", + "Li", "Be", "B", "C", "N", "O", "F", "Ne", + "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", + "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", + "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I", "Xe", + "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", + "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", + "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", + "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og"}; + +}; +} // namespace LAMMPS_NS + +#endif +#endif From 4ee087496324a8ce320459dc0bdf58b8e5e74ab0 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 27 Feb 2023 20:51:24 +0000 Subject: [PATCH 22/51] fix virial ordering as diagnosed by @imagdau. --- src/ML-MACE/pair_mace.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 4346a2336d7..7fe0f1451b6 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -256,9 +256,9 @@ void PairMACE::compute(int eflag, int vflag) virial[0] = vir[0][0][0].item(); virial[1] = vir[0][1][1].item(); virial[2] = vir[0][2][2].item(); - virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); + virial[3] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); - virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); + virial[5] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); } // mace site virials From 5290e1126b3f98203dd2eafc21ac6654a60f16de Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 28 Feb 2023 18:42:10 +0000 Subject: [PATCH 23/51] wip. --- src/ML-MACE/pair_mace_kokkos.cpp | 611 +++++++++++++++++++------------ src/ML-MACE/pair_mace_kokkos.h | 4 +- 2 files changed, 370 insertions(+), 245 deletions(-) diff --git a/src/ML-MACE/pair_mace_kokkos.cpp b/src/ML-MACE/pair_mace_kokkos.cpp index b52a48eed78..55ef528330b 100644 --- a/src/ML-MACE/pair_mace_kokkos.cpp +++ b/src/ML-MACE/pair_mace_kokkos.cpp @@ -18,13 +18,17 @@ #include "pair_mace_kokkos.h" -#include "atom.h" -#include "domain.h" +// TODO: do i need all these? +#include "atom_kokkos.h" +#include "atom_masks.h" #include "error.h" #include "force.h" -#include "memory.h" +#include "kokkos.h" +#include "memory_kokkos.h" #include "neigh_list.h" +#include "neigh_request.h" #include "neighbor.h" +#include "update.h" #include #include @@ -38,6 +42,13 @@ template PairMACEKokkos::PairMACEKokkos(LAMMPS *lmp) : PairMACE(lmp) { no_virial_fdotr_compute = 1; + + // TODO: from lj_cut ... possibly delete + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + //datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; } /* ---------------------------------------------------------------------- */ @@ -45,6 +56,15 @@ PairMACEKokkos::PairMACEKokkos(LAMMPS *lmp) : PairMACE(lmp) template PairMACEKokkos::~PairMACEKokkos() { + // TODO: from lj_cut, possibly delete + if (copymode) return; + + // TODO: from lj_cut, possibly delete + //if (allocated) { + // memoryKK->destroy_kokkos(k_eatom,eatom); + // memoryKK->destroy_kokkos(k_vatom,vatom); + // memoryKK->destroy_kokkos(k_cutsq,cutsq); + //} } /* ---------------------------------------------------------------------- */ @@ -52,223 +72,287 @@ PairMACEKokkos::~PairMACEKokkos() template void PairMACEKokkos::compute(int eflag, int vflag) { - ev_init(eflag, vflag); - - if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); - if (domain_decomposition) { - if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); - } - - // ----- positions ----- - int n_nodes; - if (domain_decomposition) { - n_nodes = atom->nlocal + atom->nghost; - } else { - // normally, ghost atoms are included in the graph as independent - // nodes, as required when the local domain does not have PBC. - // however, in no_domain_decomposition mode, ghost atoms are known to - // be shifted versions of local atoms. - n_nodes = atom->nlocal; - } - auto positions = torch::empty({n_nodes,3}, torch_float_dtype); - #pragma omp parallel for - for (int ii=0; iiilist[ii]; - positions[i][0] = atom->x[i][0]; - positions[i][1] = atom->x[i][1]; - positions[i][2] = atom->x[i][2]; - } - - // ----- cell ----- - auto cell = torch::zeros({3,3}, torch_float_dtype); - cell[0][0] = domain->h[0]; - cell[0][1] = 0.0; - cell[0][2] = 0.0; - cell[1][0] = domain->h[5]; - cell[1][1] = domain->h[1]; - cell[1][2] = 0.0; - cell[2][0] = domain->h[4]; - cell[2][1] = domain->h[3]; - cell[2][2] = domain->h[2]; - - // ----- edge_index and unit_shifts ----- - // count total number of edges - int n_edges = 0; - std::vector n_edges_vec(n_nodes, 0); - #pragma omp parallel for reduction(+:n_edges) - for (int ii=0; iiilist[ii]; - double xtmp = atom->x[i][0]; - double ytmp = atom->x[i][1]; - double ztmp = atom->x[i][2]; - int *jlist = list->firstneigh[i]; - int jnum = list->numneigh[i]; - for (int jj=0; jjx[j][0]; - double dely = ytmp - atom->x[j][1]; - double delz = ztmp - atom->x[j][2]; - double rsq = delx * delx + dely * dely + delz * delz; - if (rsq < r_max_squared) { - n_edges += 1; - n_edges_vec[ii] += 1; - } - } - } - // make first_edge vector to help with parallelizing following loop - std::vector first_edge(n_nodes); - first_edge[0] = 0; - for (int ii=0; iiilist[ii]; - double xtmp = atom->x[i][0]; - double ytmp = atom->x[i][1]; - double ztmp = atom->x[i][2]; - int *jlist = list->firstneigh[i]; - int jnum = list->numneigh[i]; - int k = first_edge[ii]; - for (int jj=0; jjx[j][0]; - double dely = ytmp - atom->x[j][1]; - double delz = ztmp - atom->x[j][2]; - double rsq = delx * delx + dely * dely + delz * delz; - if (rsq < r_max_squared) { - edge_index[0][k] = i; - if (domain_decomposition) { - edge_index[1][k] = j; - } else { - int j_local = atom->map(atom->tag[j]); - edge_index[1][k] = j_local; - double shiftx = atom->x[j][0] - atom->x[j_local][0]; - double shifty = atom->x[j][1] - atom->x[j_local][1]; - double shiftz = atom->x[j][2] - atom->x[j_local][2]; - double shiftxs = std::round(domain->h_inv[0]*shiftx + domain->h_inv[5]*shifty + domain->h_inv[4]*shiftz); - double shiftys = std::round(domain->h_inv[1]*shifty + domain->h_inv[3]*shiftz); - double shiftzs = std::round(domain->h_inv[2]*shiftz); - unit_shifts[k][0] = shiftxs; - unit_shifts[k][1] = shiftys; - unit_shifts[k][2] = shiftzs; - shifts[k][0] = domain->h[0]*shiftxs + domain->h[5]*shiftys + domain->h[4]*shiftzs; - shifts[k][1] = domain->h[1]*shiftys + domain->h[3]*shiftzs; - shifts[k][2] = domain->h[2]*shiftzs; - } - k++; - } - } - } + ev_init(eflag,vflag,0); - // ----- node_attrs ----- - // node_attrs is one-hot encoding for atomic numbers - auto mace_type = [this](int lammps_type) { - for (int i=0; iall(FLERR, "ERROR: problem converting lammps_type to mace_type."); - return -1; - }; - int n_node_feats = mace_atomic_numbers.size(); - auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch_float_dtype); - #pragma omp parallel for - for (int ii=0; iiilist[ii]; - node_attrs[i][mace_type(atom->type[i])-1] = 1.0; + if (eflag_atom || vflag_atom) { + error->all(FLERR, "ERROR: kokkos eflag_atom not implemented."); } - // ----- mask for ghost ----- - auto mask = torch::zeros(n_nodes, torch::dtype(torch::kBool)); - #pragma omp parallel for - for (int ii=0; iinlocal; ++ii) { - int i = list->ilist[ii]; - mask[i] = true; - } - - auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); - auto energy = torch::empty({1}, torch_float_dtype); - auto forces = torch::empty({n_nodes,3}, torch_float_dtype); - auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); - auto weight = torch::empty({1}, torch_float_dtype); - ptr[0] = 0; - ptr[1] = n_nodes; - weight[0] = 1.0; - - // pack the input, call the model, extract the output - c10::Dict input; - input.insert("batch", batch); - input.insert("cell", cell); - input.insert("edge_index", edge_index); - input.insert("energy", energy); - input.insert("forces", forces); - input.insert("node_attrs", node_attrs); - input.insert("positions", positions); - input.insert("ptr", ptr); - input.insert("shifts", shifts); - input.insert("unit_shifts", unit_shifts); - input.insert("weight", weight); - auto output = model.forward({input, mask, true, true, false}).toGenericDict(); - - // mace energy - // -> sum of site energies of local atoms - if (eflag_global) { - auto node_energy = output.at("node_energy").toTensor(); - eng_vdwl = 0.0; - #pragma omp parallel for reduction(+:eng_vdwl) - for (int ii=0; iiinum; ++ii) { - int i = list->ilist[ii]; - eng_vdwl += node_energy[i].item(); - } - } - - // mace forces - // -> derivatives of total mace energy - forces = output.at("forces").toTensor(); - #pragma omp parallel for - for (int ii=0; iiinum; ++ii) { - int i = list->ilist[ii]; - atom->f[i][0] = forces[i][0].item(); - atom->f[i][1] = forces[i][1].item(); - atom->f[i][2] = forces[i][2].item(); - } - - // mace site energies - // -> local atoms only - if (eflag_atom) { - auto node_energy = output.at("node_energy").toTensor(); - #pragma omp parallel for - for (int ii=0; iiinum; ++ii) { - int i = list->ilist[ii]; - eatom[i] = node_energy[i].item(); - } - } - - // mace virials (local atoms only) - // -> derivatives of sum of site energies of local atoms - if (vflag_global) { - auto vir = output.at("virials").toTensor(); - virial[0] = vir[0][0][0].item(); - virial[1] = vir[0][1][1].item(); - virial[2] = vir[0][2][2].item(); - virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); - virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); - virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); - } - - // mace site virials - // -> not available - if (vflag_atom) { - error->all(FLERR, "ERROR: pair_mace does not support vflag_atom."); - } +// if (eflag_atom) { +// memoryKK->destroy_kokkos(k_eatom,eatom); +// memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); +// d_eatom = k_eatom.view(); +// } +// if (vflag_atom) { +// memoryKK->destroy_kokkos(k_vatom,vatom); +// memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); +// d_vatom = k_vatom.view(); +// } + + atomKK->sync(execution_space,datamask_read); + //k_cutsq.template sync(); + //k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + //newton_pair = force->newton_pair; + //special_lj[0] = force->special_lj[0]; + //special_lj[1] = force->special_lj[1]; + //special_lj[2] = force->special_lj[2]; + //special_lj[3] = force->special_lj[3]; + + // loop over neighbors of my atoms + + //copymode = 1; + + //EV_FLOAT ev = pair_compute,void >(this,(NeighListKokkos*)list); + + //if (eflag_global) eng_vdwl += ev.evdwl; + //if (vflag_global) { + // virial[0] += ev.v[0]; + // virial[1] += ev.v[1]; + // virial[2] += ev.v[2]; + // virial[3] += ev.v[3]; + // virial[4] += ev.v[4]; + // virial[5] += ev.v[5]; + //} + + //if (eflag_atom) { + // k_eatom.template modify(); + // k_eatom.template sync(); + //} + + //if (vflag_atom) { + // k_vatom.template modify(); + // k_vatom.template sync(); + //} + + copymode = 0; + + +// ev_init(eflag, vflag); +// +// if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); +// if (domain_decomposition) { +// if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); +// } +// +// // ----- positions ----- +// int n_nodes; +// if (domain_decomposition) { +// n_nodes = atom->nlocal + atom->nghost; +// } else { +// // normally, ghost atoms are included in the graph as independent +// // nodes, as required when the local domain does not have PBC. +// // however, in no_domain_decomposition mode, ghost atoms are known to +// // be shifted versions of local atoms. +// n_nodes = atom->nlocal; +// } +// auto positions = torch::empty({n_nodes,3}, torch_float_dtype); +// #pragma omp parallel for +// for (int ii=0; iiilist[ii]; +// positions[i][0] = atom->x[i][0]; +// positions[i][1] = atom->x[i][1]; +// positions[i][2] = atom->x[i][2]; +// } +// +// // ----- cell ----- +// auto cell = torch::zeros({3,3}, torch_float_dtype); +// cell[0][0] = domain->h[0]; +// cell[0][1] = 0.0; +// cell[0][2] = 0.0; +// cell[1][0] = domain->h[5]; +// cell[1][1] = domain->h[1]; +// cell[1][2] = 0.0; +// cell[2][0] = domain->h[4]; +// cell[2][1] = domain->h[3]; +// cell[2][2] = domain->h[2]; +// +// // ----- edge_index and unit_shifts ----- +// // count total number of edges +// int n_edges = 0; +// std::vector n_edges_vec(n_nodes, 0); +// #pragma omp parallel for reduction(+:n_edges) +// for (int ii=0; iiilist[ii]; +// double xtmp = atom->x[i][0]; +// double ytmp = atom->x[i][1]; +// double ztmp = atom->x[i][2]; +// int *jlist = list->firstneigh[i]; +// int jnum = list->numneigh[i]; +// for (int jj=0; jjx[j][0]; +// double dely = ytmp - atom->x[j][1]; +// double delz = ztmp - atom->x[j][2]; +// double rsq = delx * delx + dely * dely + delz * delz; +// if (rsq < r_max_squared) { +// n_edges += 1; +// n_edges_vec[ii] += 1; +// } +// } +// } +// // make first_edge vector to help with parallelizing following loop +// std::vector first_edge(n_nodes); +// first_edge[0] = 0; +// for (int ii=0; iiilist[ii]; +// double xtmp = atom->x[i][0]; +// double ytmp = atom->x[i][1]; +// double ztmp = atom->x[i][2]; +// int *jlist = list->firstneigh[i]; +// int jnum = list->numneigh[i]; +// int k = first_edge[ii]; +// for (int jj=0; jjx[j][0]; +// double dely = ytmp - atom->x[j][1]; +// double delz = ztmp - atom->x[j][2]; +// double rsq = delx * delx + dely * dely + delz * delz; +// if (rsq < r_max_squared) { +// edge_index[0][k] = i; +// if (domain_decomposition) { +// edge_index[1][k] = j; +// } else { +// int j_local = atom->map(atom->tag[j]); +// edge_index[1][k] = j_local; +// double shiftx = atom->x[j][0] - atom->x[j_local][0]; +// double shifty = atom->x[j][1] - atom->x[j_local][1]; +// double shiftz = atom->x[j][2] - atom->x[j_local][2]; +// double shiftxs = std::round(domain->h_inv[0]*shiftx + domain->h_inv[5]*shifty + domain->h_inv[4]*shiftz); +// double shiftys = std::round(domain->h_inv[1]*shifty + domain->h_inv[3]*shiftz); +// double shiftzs = std::round(domain->h_inv[2]*shiftz); +// unit_shifts[k][0] = shiftxs; +// unit_shifts[k][1] = shiftys; +// unit_shifts[k][2] = shiftzs; +// shifts[k][0] = domain->h[0]*shiftxs + domain->h[5]*shiftys + domain->h[4]*shiftzs; +// shifts[k][1] = domain->h[1]*shiftys + domain->h[3]*shiftzs; +// shifts[k][2] = domain->h[2]*shiftzs; +// } +// k++; +// } +// } +// } +// +// // ----- node_attrs ----- +// // node_attrs is one-hot encoding for atomic numbers +// auto mace_type = [this](int lammps_type) { +// for (int i=0; iall(FLERR, "ERROR: problem converting lammps_type to mace_type."); +// return -1; +// }; +// int n_node_feats = mace_atomic_numbers.size(); +// auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch_float_dtype); +// #pragma omp parallel for +// for (int ii=0; iiilist[ii]; +// node_attrs[i][mace_type(atom->type[i])-1] = 1.0; +// } +// +// // ----- mask for ghost ----- +// auto mask = torch::zeros(n_nodes, torch::dtype(torch::kBool)); +// #pragma omp parallel for +// for (int ii=0; iinlocal; ++ii) { +// int i = list->ilist[ii]; +// mask[i] = true; +// } +// +// auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); +// auto energy = torch::empty({1}, torch_float_dtype); +// auto forces = torch::empty({n_nodes,3}, torch_float_dtype); +// auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); +// auto weight = torch::empty({1}, torch_float_dtype); +// ptr[0] = 0; +// ptr[1] = n_nodes; +// weight[0] = 1.0; +// +// // pack the input, call the model, extract the output +// c10::Dict input; +// input.insert("batch", batch); +// input.insert("cell", cell); +// input.insert("edge_index", edge_index); +// input.insert("energy", energy); +// input.insert("forces", forces); +// input.insert("node_attrs", node_attrs); +// input.insert("positions", positions); +// input.insert("ptr", ptr); +// input.insert("shifts", shifts); +// input.insert("unit_shifts", unit_shifts); +// input.insert("weight", weight); +// auto output = model.forward({input, mask, true, true, false}).toGenericDict(); +// +// // mace energy +// // -> sum of site energies of local atoms +// if (eflag_global) { +// auto node_energy = output.at("node_energy").toTensor(); +// eng_vdwl = 0.0; +// #pragma omp parallel for reduction(+:eng_vdwl) +// for (int ii=0; iiinum; ++ii) { +// int i = list->ilist[ii]; +// eng_vdwl += node_energy[i].item(); +// } +// } +// +// // mace forces +// // -> derivatives of total mace energy +// forces = output.at("forces").toTensor(); +// #pragma omp parallel for +// for (int ii=0; iiinum; ++ii) { +// int i = list->ilist[ii]; +// atom->f[i][0] = forces[i][0].item(); +// atom->f[i][1] = forces[i][1].item(); +// atom->f[i][2] = forces[i][2].item(); +// } +// +// // mace site energies +// // -> local atoms only +// if (eflag_atom) { +// auto node_energy = output.at("node_energy").toTensor(); +// #pragma omp parallel for +// for (int ii=0; iiinum; ++ii) { +// int i = list->ilist[ii]; +// eatom[i] = node_energy[i].item(); +// } +// } +// +// // mace virials (local atoms only) +// // -> derivatives of sum of site energies of local atoms +// if (vflag_global) { +// auto vir = output.at("virials").toTensor(); +// virial[0] = vir[0][0][0].item(); +// virial[1] = vir[0][1][1].item(); +// virial[2] = vir[0][2][2].item(); +// virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); +// virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); +// virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); +// } +// +// // mace site virials +// // -> not available +// if (vflag_atom) { +// error->all(FLERR, "ERROR: pair_mace does not support vflag_atom."); +// } } @@ -344,41 +428,82 @@ void PairMACEKokkos::coeff(int narg, char **arg) template void PairMACEKokkos::init_style() { - if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); - - /* - MACE requires the full neighbor list AND neighbors of ghost atoms - it appears that: - * without REQ_GHOST - list->gnum == 0 - list->ilist does not include ghost atoms, but the jlists do - * with REQ_GHOST - list->gnum == atom->nghost - list->ilist includes ghost atoms - */ - if (domain_decomposition) { - neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); - } else { - neighbor->add_request(this, NeighConst::REQ_FULL); - } +// if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); +// +// /* +// MACE requires the full neighbor list AND neighbors of ghost atoms +// it appears that: +// * without REQ_GHOST +// list->gnum == 0 +// list->ilist does not include ghost atoms, but the jlists do +// * with REQ_GHOST +// list->gnum == atom->nghost +// list->ilist includes ghost atoms +// */ +// if (domain_decomposition) { +// neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); +// } else { +// neighbor->add_request(this, NeighConst::REQ_FULL); +// } + + PairMACE::init_style(); + + // TODO: from lj_cut, possibly delete + // adjust neighbor list request for KOKKOS + + //neighflag = lmp->kokkos->neighflag; + //auto request = neighbor->find_request(this); + //request->set_kokkos_host(std::is_same::value && + // !std::is_same::value); + //request->set_kokkos_device(std::is_same::value); + //if (neighflag == FULL) request->enable_full(); } template double PairMACEKokkos::init_one(int i, int j) { - // to account for message passing, require cutoff of n_layers * r_max - return num_interactions*model.attr("r_max").toTensor().item(); +// // to account for message passing, require cutoff of n_layers * r_max +// return num_interactions*model.attr("r_max").toTensor().item(); + + double cutone = PairMACE::init_one(i,j); + +// k_params.h_view(i,j).lj1 = lj1[i][j]; +// k_params.h_view(i,j).lj2 = lj2[i][j]; +// k_params.h_view(i,j).lj3 = lj3[i][j]; +// k_params.h_view(i,j).lj4 = lj4[i][j]; +// k_params.h_view(i,j).offset = offset[i][j]; +// k_params.h_view(i,j).cutsq = cutone*cutone; +// k_params.h_view(j,i) = k_params.h_view(i,j); +// if (i(); +// k_params.template modify(); + + return cutone; } template void PairMACEKokkos::allocate() { - allocated = 1; - - memory->create(setflag, atom->ntypes+1, atom->ntypes+1, "pair:setflag"); - for (int i=1; intypes+1; i++) - for (int j=i; jntypes+1; j++) - setflag[i][j] = 0; + PairMACE::allocate(); + + // TODO: from lj_cut, possibly delete + //int n = atom->ntypes; + //memory->destroy(cutsq); + //memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + //d_cutsq = k_cutsq.template view(); + ////k_params = Kokkos::DualView("PairLJCut::params",n+1,n+1); + //params = k_params.template view(); +} - memory->create(cutsq, atom->ntypes+1, atom->ntypes+1, "pair:cutsq"); +namespace LAMMPS_NS { +template class PairMACEKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairMACEKokkos; +#endif } + diff --git a/src/ML-MACE/pair_mace_kokkos.h b/src/ML-MACE/pair_mace_kokkos.h index 1e269c2ce0f..0bf3b813941 100644 --- a/src/ML-MACE/pair_mace_kokkos.h +++ b/src/ML-MACE/pair_mace_kokkos.h @@ -19,8 +19,8 @@ #ifdef PAIR_CLASS // clang-format off PairStyle(mace/kk,PairMACEKokkos); -PairStyle(mace/kk/device,PairLJCutKokkos); -PairStyle(mace/kk/host,PairLJCutKokkos); +PairStyle(mace/kk/device,PairMACEKokkos); +PairStyle(mace/kk/host,PairMACEKokkos); // clang-format on #else From 4239331858f42b113a41413e6ce2aba7b62a4dd4 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Fri, 10 Mar 2023 15:52:14 +0000 Subject: [PATCH 24/51] wip. --- src/KOKKOS/pair_mace_kokkos.cpp | 597 +++++++++++++++++++++ src/{ML-MACE => KOKKOS}/pair_mace_kokkos.h | 22 +- src/ML-MACE/pair_mace_kokkos.cpp | 509 ------------------ 3 files changed, 617 insertions(+), 511 deletions(-) create mode 100644 src/KOKKOS/pair_mace_kokkos.cpp rename src/{ML-MACE => KOKKOS}/pair_mace_kokkos.h (85%) delete mode 100644 src/ML-MACE/pair_mace_kokkos.cpp diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp new file mode 100644 index 00000000000..64a80ae9338 --- /dev/null +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -0,0 +1,597 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributors + William C Witt (University of Cambridge) +------------------------------------------------------------------------- */ + +#include "pair_mace_kokkos.h" + +// TODO: do i need all these? +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" +#include "neigh_list.h" +#include "neighbor_kokkos.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "update.h" + +#include +#include +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +PairMACEKokkos::PairMACEKokkos(LAMMPS *lmp) : PairMACE(lmp) +{ +std::cout << "hello from kokkos mace constructor" << std::endl; + no_virial_fdotr_compute = 1; + + //kokkosable = 1; + //atomKK = (AtomKokkos *) atom; + //execution_space = ExecutionSpaceFromDevice::space; + //datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + //datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + // pair_pace_kokkos has these instead + //datamask_read = EMPTY_MASK; + //datamask_modify = EMPTY_MASK; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + host_flag = (execution_space == Host); +} + +/* ---------------------------------------------------------------------- */ + +template +PairMACEKokkos::~PairMACEKokkos() +{ + if (copymode) return; + + // from lj_cut_kokkos + //if (allocated) { + // memoryKK->destroy_kokkos(k_eatom,eatom); + // memoryKK->destroy_kokkos(k_vatom,vatom); + // memoryKK->destroy_kokkos(k_cutsq,cutsq); + //} + + // from pair_pace_kokkos + //memoryKK->destroy_kokkos(k_eatom,eatom); + //memoryKK->destroy_kokkos(k_vatom,vatom); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMACEKokkos::compute(int eflag, int vflag) +{ + ev_init(eflag,vflag,0); + + if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); + if (domain_decomposition) { + if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); + } + + if (eflag_atom || vflag_atom) { + error->all(FLERR, "ERROR: kokkos eflag_atom not implemented."); + } + +// if (eflag_atom) { +// memoryKK->destroy_kokkos(k_eatom,eatom); +// memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); +// d_eatom = k_eatom.view(); +// } +// if (vflag_atom) { +// memoryKK->destroy_kokkos(k_vatom,vatom); +// memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); +// d_vatom = k_vatom.view(); +// } + + auto r_max_squared = this->r_max_squared; + auto h0 = domain->h[0]; + auto h1 = domain->h[1]; + auto h2 = domain->h[2]; + auto h3 = domain->h[3]; + auto h4 = domain->h[4]; + auto h5 = domain->h[5]; + auto hinv0 = domain->h_inv[0]; + auto hinv1 = domain->h_inv[1]; + auto hinv2 = domain->h_inv[2]; + auto hinv3 = domain->h_inv[3]; + auto hinv4 = domain->h_inv[4]; + auto hinv5 = domain->h_inv[5]; + + auto k_lammps_atomic_numbers = Kokkos::View("k_lammps_atomic_numbers",lammps_atomic_numbers.size()); + auto k_lammps_atomic_numbers_mirror = Kokkos::create_mirror_view(k_lammps_atomic_numbers); + for (int i=0; i("k_mace_atomic_numbers",mace_atomic_numbers.size()); + auto k_mace_atomic_numbers_mirror = Kokkos::create_mirror_view(k_mace_atomic_numbers); + for (int i=0; i("k_positions", n_nodes); +// Kokkos::vector lammps_atomic_numbers = this->lammps_atomic_numbers; +// Kokkos::vector mace_atomic_numbers = this->mace_atomic_numbers; + + // atom map + auto map_style = atom->map_style; + auto k_map_array = atomKK->k_map_array; + auto k_map_hash = atomKK->k_map_hash; + k_map_array.template sync(); + + atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK|TAG_MASK); + + auto x = atomKK->k_x.view(); +// c_x = atomKK->k_x.view(); + auto f = atomKK->k_f.view(); + auto tag = atomKK->k_tag.view(); + auto type = atomKK->k_type.view(); +// int nlocal = atom->nlocal; +// int nall = atom->nlocal + atom->nghost; + + NeighListKokkos* k_list = static_cast*>(list); + auto d_numneigh = k_list->d_numneigh; + auto d_neighbors = k_list->d_neighbors; + auto d_ilist = k_list->d_ilist; + //inum = list->inum; + + // ----- positions ----- + int n_nodes; + if (domain_decomposition) { + n_nodes = atom->nlocal + atom->nghost; + } else { + // normally, ghost atoms are included in the graph as independent + // nodes, as required when the local domain does not have PBC. + // however, in no_domain_decomposition mode, ghost atoms are known to + // be shifted versions of local atoms. + n_nodes = atom->nlocal; + } + auto k_positions = Kokkos::View("k_positions", n_nodes); + Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA (const int i) { + k_positions(i,0) = x(i,0); + k_positions(i,1) = x(i,1); + k_positions(i,2) = x(i,2); + }); + auto positions = torch::from_blob( + k_positions.data(), {n_nodes,3}, torch_float_dtype); + + // ----- cell ----- + // TODO: how to use kokkos here? + auto cell = torch::zeros({3,3}, torch_float_dtype); + cell[0][0] = h0; + cell[0][1] = 0.0; + cell[0][2] = 0.0; + cell[1][0] = h5; + cell[1][1] = h1; + cell[1][2] = 0.0; + cell[2][0] = h4; + cell[2][1] = h3; + cell[2][2] = h2; + + // ----- edge_index and unit_shifts ----- + // count total number of edges + auto k_n_edges_vec = Kokkos::View("k_n_edges_vec", n_nodes); + Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA (const int ii) { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int jnum = d_numneigh[i]; + for (int jj=0; jj("k_first_edge", n_nodes); // initialized to zero + Kokkos::parallel_for(n_nodes-1, KOKKOS_LAMBDA(const int ii) { + k_first_edge[ii+1] = k_first_edge(ii) + k_n_edges_vec(ii); + }); + // fill edge_index and unit_shifts tensors + Kokkos::View k_edge_index("k_edge_index", 2, n_edges); + Kokkos::View k_unit_shifts("k_unit_shifts", n_edges); + Kokkos::View k_shifts("k_shifts", n_edges); + if (domain_decomposition) { + +// if (domain_decomposition) { +// edge_index[1][k] = j; + + } else { + + Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA(const int ii) { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int jnum = d_numneigh[i]; + int k = k_first_edge(ii); + for (int jj=0; jjmap(tag(j)); + int j_local = AtomKokkos::map_kokkos(tag(j),map_style,k_map_array,k_map_hash); + k_edge_index(1,k) = j_local; + double shiftx = x(j,0) - x(j_local,0); + double shifty = x(j,1) - x(j_local,1); + double shiftz = x(j,2) - x(j_local,2); + double shiftxs = std::round(hinv0*shiftx + hinv5*shifty + hinv4*shiftz); + double shiftys = std::round(hinv1*shifty + hinv3*shiftz); + double shiftzs = std::round(hinv2*shiftz); + k_unit_shifts(k,0) = shiftxs; + k_unit_shifts(k,1) = shiftys; + k_unit_shifts(k,2) = shiftzs; + k_shifts(k,0) = h0*shiftxs + h5*shiftys + h4*shiftzs; + k_shifts(k,1) = h1*shiftys + h3*shiftzs; + k_shifts(k,2) = h2*shiftzs; + k++; + } + } + }); + } + auto edge_index = torch::from_blob(k_edge_index.data(), {2,n_edges}, torch::dtype(torch::kInt64)); + auto unit_shifts = torch::from_blob(k_unit_shifts.data(), {n_edges,3}, torch_float_dtype); + auto shifts = torch::from_blob(k_shifts.data(), {n_edges,3}, torch_float_dtype); + + // ----- node_attrs ----- + // node_attrs is one-hot encoding for atomic numbers + int n_node_feats = mace_atomic_numbers.size(); + Kokkos::View k_node_attrs("k_node_attrs", n_nodes, n_node_feats); + Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA(const int ii) { + const int i = d_ilist[ii]; + const int lammps_type = type[i]; + int t = -1; + for (int j=0; jall(FLERR, "ERROR: problem converting lammps_type to mace_type."); + k_node_attrs(i,t-1) = 1.0; + }); + auto node_attrs = torch::from_blob(k_node_attrs.data(), {n_nodes,n_node_feats}, torch_float_dtype); + +// // ----- mask for ghost ----- +// Kokkos::View k_mask("k_mask", n_nodes); +// Kokkos::parallel_for(atom->nlocal, KOKKOS_LAMBDA(const int ii) { +// const int i = d_ilist[ii]; +// k_mask(i) = true; +// }); +// auto mask = torch::from_blob(k_mask.data(), n_nodes, torch::dtype(torch::kBool)); +// +// auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); +// auto energy = torch::empty({1}, torch_float_dtype); +// auto forces = torch::empty({n_nodes,3}, torch_float_dtype); +// auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); +// auto weight = torch::empty({1}, torch_float_dtype); +// ptr[0] = 0; +// ptr[1] = n_nodes; +// weight[0] = 1.0; +// +//std::cout << "packing the input" << std::endl; +// +// // pack the input, call the model, extract the output +// c10::Dict input; +// input.insert("batch", batch); +// input.insert("cell", cell); +// input.insert("edge_index", edge_index); +// input.insert("energy", energy); +// input.insert("forces", forces); +// input.insert("node_attrs", node_attrs); +// input.insert("positions", positions); +// input.insert("ptr", ptr); +// input.insert("shifts", shifts); +// input.insert("unit_shifts", unit_shifts); +// input.insert("weight", weight); +////std::cout << "batch" << batch << std::endl; +////std::cout << "cell" << cell << std::endl; +////std::cout << "edge_index" << edge_index << std::endl; +////// energy +////// forces +////std::cout << "node_attrs" << node_attrs << std::endl; +////std::cout << "positions" << positions << std::endl; +////std::cout << "ptr" << ptr << std::endl; +////std::cout << "shifts" << shifts << std::endl; +////std::cout << "unit_shifts" << unit_shifts << std::endl; +////std::cout << "weight" << weight << std::endl; +////std::cout << "mask" << mask << std::endl; +// auto output = model.forward({input, mask, true, true, false}).toGenericDict(); +// +////std::cout << "energy: " << output.at("energy").toTensor() << std::endl; +////std::cout << "node_energy: " << output.at("node_energy").toTensor() << std::endl; +// +// // mace energy +// // -> sum of site energies of local atoms +// if (eflag_global) { +// auto node_energy = output.at("node_energy").toTensor(); +// eng_vdwl = 0.0; +// Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int ii, double &eng_vdwl) { +// const int i = d_ilist[ii]; +// eng_vdwl += node_energy[i].item(); +// }, eng_vdwl); +// } +// +// // mace forces +// // -> derivatives of total mace energy +// forces = output.at("forces").toTensor(); +// Kokkos::View> +// k_f(forces.data_ptr(), n_nodes); +// Kokkos::parallel_for(atom->nlocal, KOKKOS_LAMBDA(const int ii) { +// const int i = d_ilist[ii]; +// f(i,0) = k_f(i,0); +// f(i,1) = k_f(i,1); +// f(i,2) = k_f(i,2); +// }); +// +//// // mace site energies +//// // -> local atoms only +//// if (eflag_atom) { +//// auto node_energy = output.at("node_energy").toTensor(); +//// #pragma omp parallel for +//// for (int ii=0; iiinum; ++ii) { +//// int i = list->ilist[ii]; +//// eatom[i] = node_energy[i].item(); +//// } +//// } +//// +// // mace virials (local atoms only) +// // -> derivatives of sum of site energies of local atoms +// if (vflag_global) { +// auto vir = output.at("virials").toTensor(); +// virial[0] = vir[0][0][0].item(); +// virial[1] = vir[0][1][1].item(); +// virial[2] = vir[0][2][2].item(); +// virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); +// virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); +// virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); +// } +//// +//// // mace site virials +//// // -> not available +//// if (vflag_atom) { +//// error->all(FLERR, "ERROR: pair_mace does not support vflag_atom."); +//// } +// +// +// //copymode = 1; +// +// //EV_FLOAT ev = pair_compute,void >(this,(NeighListKokkos*)list); +// +// //if (eflag_global) eng_vdwl += ev.evdwl; +// //if (vflag_global) { +// // virial[0] += ev.v[0]; +// // virial[1] += ev.v[1]; +// // virial[2] += ev.v[2]; +// // virial[3] += ev.v[3]; +// // virial[4] += ev.v[4]; +// // virial[5] += ev.v[5]; +// //} +// +// //if (eflag_atom) { +// // k_eatom.template modify(); +// // k_eatom.template sync(); +// //} +// +// //if (vflag_atom) { +// // k_vatom.template modify(); +// // k_vatom.template sync(); +// //} +// +// copymode = 0; + +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMACEKokkos::settings(int narg, char **arg) +{ + if (narg == 1) { + if (strcmp(arg[0], "no_domain_decomposition") == 0) { + domain_decomposition = false; + } else { + error->all(FLERR, "Invalid option for pair_style mace."); + } + } else if (narg > 1) { + error->all(FLERR, "Too many pair_style arguments for pair_style mace."); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMACEKokkos::coeff(int narg, char **arg) +{ +std::cout << "hello from kokkos coeff" << std::endl; + if (!allocated) allocate(); +std::cout << "allocated: " << allocated << std::endl; + PairMACE::coeff(narg,arg); +std::cout << "allocated: " << allocated << std::endl; +} + +template +void PairMACEKokkos::init_style() +{ +std::cout << "hello from kokkos init_style" << std::endl; +// if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); +// +// /* +// MACE requires the full neighbor list AND neighbors of ghost atoms +// it appears that: +// * without REQ_GHOST +// list->gnum == 0 +// list->ilist does not include ghost atoms, but the jlists do +// * with REQ_GHOST +// list->gnum == atom->nghost +// list->ilist includes ghost atoms +// */ +// if (domain_decomposition) { +// neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); +// } else { +// neighbor->add_request(this, NeighConst::REQ_FULL); +// } + + if (host_flag) { + PairMACE::init_style(); + return; + } +std::cout << "after mace init" << std::endl; + + + // neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + + auto request = neighbor->add_request(this, NeighConst::REQ_FULL); + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); +// if (neighflag == FULL) +// error->all(FLERR,"Must use half neighbor list style with pair pace/kk"); + + + + + + + + + + + +// // TODO: from lj_cut, possibly delete +// // adjust neighbor list request for KOKKOS +// auto request = neighbor->add_request(this, NeighConst::REQ_FULL); +// request->set_kokkos_host(std::is_same::value && +// !std::is_same::value); +// request->set_kokkos_device(std::is_same::value); +// +// neighflag = lmp->kokkos->neighflag; +// auto request = neighbor->find_request(this); +//std::cout << "before request" << std::endl; +// request->set_kokkos_host(std::is_same::value && +// !std::is_same::value); +//std::cout << "before request" << std::endl; +// request->set_kokkos_device(std::is_same::value); +//std::cout << "after request" << std::endl; +// if (neighflag == FULL) +// std::cout << "REQUESTING FULL LIST." << std::endl; +// if (neighflag == FULL) request->enable_full(); + + //auto request = neighbor->add_request(this, NeighConst::REQ_FULL); + //request->set_kokkos_host(std::is_same::value && + // !std::is_same::value); + //request->set_kokkos_device(std::is_same::value); + //if (neighflag == FULL) + // error->all(FLERR,"Must use half neighbor list style with pair pace/kk"); + +std::cout << "goodbye from init_style" << std::endl; +} + +template +double PairMACEKokkos::init_one(int i, int j) +{ +std::cout << "hello from kokkos init_one" << std::endl; + double cutone = PairMACE::init_one(i,j); + + //k_scale.h_view(i,j) = k_scale.h_view(j,i) = scale[i][j]; + //k_scale.template modify(); + + k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; + k_cutsq.template modify(); + + return cutone; + + //// to account for message passing, require cutoff of n_layers * r_max + //return num_interactions*model.attr("r_max").toTensor().item(); + +// k_params.h_view(i,j).lj1 = lj1[i][j]; +// k_params.h_view(i,j).lj2 = lj2[i][j]; +// k_params.h_view(i,j).lj3 = lj3[i][j]; +// k_params.h_view(i,j).lj4 = lj4[i][j]; +// k_params.h_view(i,j).offset = offset[i][j]; +// k_params.h_view(i,j).cutsq = cutone*cutone; +// k_params.h_view(j,i) = k_params.h_view(i,j); +// if (i(); +// k_params.template modify(); + +// return cutone; +} + +template +void PairMACEKokkos::allocate() +{ +std::cout << "hello from kokkos allocate" << std::endl; + PairMACE::allocate(); + + int n = atom->ntypes + 1; + //MemKK::realloc_kokkos(d_map, "pace:map", n); + + MemKK::realloc_kokkos(k_cutsq, "mace:cutsq", n, n); + d_cutsq = k_cutsq.template view(); + + // TODO: from lj_cut, possibly delete + //int n = atom->ntypes; + //memory->destroy(cutsq); + //memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + //d_cutsq = k_cutsq.template view(); + //k_params = Kokkos::DualView("PairLJCut::params",n+1,n+1); + //params = k_params.template view(); +} + +namespace LAMMPS_NS { +template class PairMACEKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairMACEKokkos; +#endif +} + diff --git a/src/ML-MACE/pair_mace_kokkos.h b/src/KOKKOS/pair_mace_kokkos.h similarity index 85% rename from src/ML-MACE/pair_mace_kokkos.h rename to src/KOKKOS/pair_mace_kokkos.h index 0bf3b813941..cbb78cc6e6c 100644 --- a/src/ML-MACE/pair_mace_kokkos.h +++ b/src/KOKKOS/pair_mace_kokkos.h @@ -27,8 +27,9 @@ PairStyle(mace/kk/host,PairMACEKokkos); #ifndef LMP_PAIR_MACE_KOKKOS_H #define LMP_PAIR_MACE_KOKKOS_H -#include "pair_kokkos.h" #include "pair_mace.h" +#include "kokkos_type.h" +#include "pair_kokkos.h" #include "neigh_list_kokkos.h" namespace LAMMPS_NS { @@ -38,7 +39,7 @@ class PairMACEKokkos : public PairMACE { public: - //enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; typedef DeviceType device_type; typedef ArrayTypes AT; PairMACEKokkos(class LAMMPS *); @@ -71,6 +72,23 @@ class PairMACEKokkos : public PairMACE { "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og"}; + + // kokkos stuff + int host_flag; + int neighflag; + //typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + + + //Kokkos::View k_positions; + + typedef Kokkos::DualView tdual_fparams; + tdual_fparams k_cutsq; + typedef Kokkos::View t_fparams; + t_fparams d_cutsq; + }; } // namespace LAMMPS_NS diff --git a/src/ML-MACE/pair_mace_kokkos.cpp b/src/ML-MACE/pair_mace_kokkos.cpp deleted file mode 100644 index 55ef528330b..00000000000 --- a/src/ML-MACE/pair_mace_kokkos.cpp +++ /dev/null @@ -1,509 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributors - William C Witt (University of Cambridge) -------------------------------------------------------------------------- */ - -#include "pair_mace_kokkos.h" - -// TODO: do i need all these? -#include "atom_kokkos.h" -#include "atom_masks.h" -#include "error.h" -#include "force.h" -#include "kokkos.h" -#include "memory_kokkos.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "neighbor.h" -#include "update.h" - -#include -#include -#include - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -template -PairMACEKokkos::PairMACEKokkos(LAMMPS *lmp) : PairMACE(lmp) -{ - no_virial_fdotr_compute = 1; - - // TODO: from lj_cut ... possibly delete - kokkosable = 1; - atomKK = (AtomKokkos *) atom; - execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; - //datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; -} - -/* ---------------------------------------------------------------------- */ - -template -PairMACEKokkos::~PairMACEKokkos() -{ - // TODO: from lj_cut, possibly delete - if (copymode) return; - - // TODO: from lj_cut, possibly delete - //if (allocated) { - // memoryKK->destroy_kokkos(k_eatom,eatom); - // memoryKK->destroy_kokkos(k_vatom,vatom); - // memoryKK->destroy_kokkos(k_cutsq,cutsq); - //} -} - -/* ---------------------------------------------------------------------- */ - -template -void PairMACEKokkos::compute(int eflag, int vflag) -{ - ev_init(eflag,vflag,0); - - if (eflag_atom || vflag_atom) { - error->all(FLERR, "ERROR: kokkos eflag_atom not implemented."); - } - -// if (eflag_atom) { -// memoryKK->destroy_kokkos(k_eatom,eatom); -// memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); -// d_eatom = k_eatom.view(); -// } -// if (vflag_atom) { -// memoryKK->destroy_kokkos(k_vatom,vatom); -// memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); -// d_vatom = k_vatom.view(); -// } - - atomKK->sync(execution_space,datamask_read); - //k_cutsq.template sync(); - //k_params.template sync(); - if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); - else atomKK->modified(execution_space,F_MASK); - - x = atomKK->k_x.view(); - c_x = atomKK->k_x.view(); - f = atomKK->k_f.view(); - type = atomKK->k_type.view(); - nlocal = atom->nlocal; - nall = atom->nlocal + atom->nghost; - //newton_pair = force->newton_pair; - //special_lj[0] = force->special_lj[0]; - //special_lj[1] = force->special_lj[1]; - //special_lj[2] = force->special_lj[2]; - //special_lj[3] = force->special_lj[3]; - - // loop over neighbors of my atoms - - //copymode = 1; - - //EV_FLOAT ev = pair_compute,void >(this,(NeighListKokkos*)list); - - //if (eflag_global) eng_vdwl += ev.evdwl; - //if (vflag_global) { - // virial[0] += ev.v[0]; - // virial[1] += ev.v[1]; - // virial[2] += ev.v[2]; - // virial[3] += ev.v[3]; - // virial[4] += ev.v[4]; - // virial[5] += ev.v[5]; - //} - - //if (eflag_atom) { - // k_eatom.template modify(); - // k_eatom.template sync(); - //} - - //if (vflag_atom) { - // k_vatom.template modify(); - // k_vatom.template sync(); - //} - - copymode = 0; - - -// ev_init(eflag, vflag); -// -// if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); -// if (domain_decomposition) { -// if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); -// } -// -// // ----- positions ----- -// int n_nodes; -// if (domain_decomposition) { -// n_nodes = atom->nlocal + atom->nghost; -// } else { -// // normally, ghost atoms are included in the graph as independent -// // nodes, as required when the local domain does not have PBC. -// // however, in no_domain_decomposition mode, ghost atoms are known to -// // be shifted versions of local atoms. -// n_nodes = atom->nlocal; -// } -// auto positions = torch::empty({n_nodes,3}, torch_float_dtype); -// #pragma omp parallel for -// for (int ii=0; iiilist[ii]; -// positions[i][0] = atom->x[i][0]; -// positions[i][1] = atom->x[i][1]; -// positions[i][2] = atom->x[i][2]; -// } -// -// // ----- cell ----- -// auto cell = torch::zeros({3,3}, torch_float_dtype); -// cell[0][0] = domain->h[0]; -// cell[0][1] = 0.0; -// cell[0][2] = 0.0; -// cell[1][0] = domain->h[5]; -// cell[1][1] = domain->h[1]; -// cell[1][2] = 0.0; -// cell[2][0] = domain->h[4]; -// cell[2][1] = domain->h[3]; -// cell[2][2] = domain->h[2]; -// -// // ----- edge_index and unit_shifts ----- -// // count total number of edges -// int n_edges = 0; -// std::vector n_edges_vec(n_nodes, 0); -// #pragma omp parallel for reduction(+:n_edges) -// for (int ii=0; iiilist[ii]; -// double xtmp = atom->x[i][0]; -// double ytmp = atom->x[i][1]; -// double ztmp = atom->x[i][2]; -// int *jlist = list->firstneigh[i]; -// int jnum = list->numneigh[i]; -// for (int jj=0; jjx[j][0]; -// double dely = ytmp - atom->x[j][1]; -// double delz = ztmp - atom->x[j][2]; -// double rsq = delx * delx + dely * dely + delz * delz; -// if (rsq < r_max_squared) { -// n_edges += 1; -// n_edges_vec[ii] += 1; -// } -// } -// } -// // make first_edge vector to help with parallelizing following loop -// std::vector first_edge(n_nodes); -// first_edge[0] = 0; -// for (int ii=0; iiilist[ii]; -// double xtmp = atom->x[i][0]; -// double ytmp = atom->x[i][1]; -// double ztmp = atom->x[i][2]; -// int *jlist = list->firstneigh[i]; -// int jnum = list->numneigh[i]; -// int k = first_edge[ii]; -// for (int jj=0; jjx[j][0]; -// double dely = ytmp - atom->x[j][1]; -// double delz = ztmp - atom->x[j][2]; -// double rsq = delx * delx + dely * dely + delz * delz; -// if (rsq < r_max_squared) { -// edge_index[0][k] = i; -// if (domain_decomposition) { -// edge_index[1][k] = j; -// } else { -// int j_local = atom->map(atom->tag[j]); -// edge_index[1][k] = j_local; -// double shiftx = atom->x[j][0] - atom->x[j_local][0]; -// double shifty = atom->x[j][1] - atom->x[j_local][1]; -// double shiftz = atom->x[j][2] - atom->x[j_local][2]; -// double shiftxs = std::round(domain->h_inv[0]*shiftx + domain->h_inv[5]*shifty + domain->h_inv[4]*shiftz); -// double shiftys = std::round(domain->h_inv[1]*shifty + domain->h_inv[3]*shiftz); -// double shiftzs = std::round(domain->h_inv[2]*shiftz); -// unit_shifts[k][0] = shiftxs; -// unit_shifts[k][1] = shiftys; -// unit_shifts[k][2] = shiftzs; -// shifts[k][0] = domain->h[0]*shiftxs + domain->h[5]*shiftys + domain->h[4]*shiftzs; -// shifts[k][1] = domain->h[1]*shiftys + domain->h[3]*shiftzs; -// shifts[k][2] = domain->h[2]*shiftzs; -// } -// k++; -// } -// } -// } -// -// // ----- node_attrs ----- -// // node_attrs is one-hot encoding for atomic numbers -// auto mace_type = [this](int lammps_type) { -// for (int i=0; iall(FLERR, "ERROR: problem converting lammps_type to mace_type."); -// return -1; -// }; -// int n_node_feats = mace_atomic_numbers.size(); -// auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch_float_dtype); -// #pragma omp parallel for -// for (int ii=0; iiilist[ii]; -// node_attrs[i][mace_type(atom->type[i])-1] = 1.0; -// } -// -// // ----- mask for ghost ----- -// auto mask = torch::zeros(n_nodes, torch::dtype(torch::kBool)); -// #pragma omp parallel for -// for (int ii=0; iinlocal; ++ii) { -// int i = list->ilist[ii]; -// mask[i] = true; -// } -// -// auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); -// auto energy = torch::empty({1}, torch_float_dtype); -// auto forces = torch::empty({n_nodes,3}, torch_float_dtype); -// auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); -// auto weight = torch::empty({1}, torch_float_dtype); -// ptr[0] = 0; -// ptr[1] = n_nodes; -// weight[0] = 1.0; -// -// // pack the input, call the model, extract the output -// c10::Dict input; -// input.insert("batch", batch); -// input.insert("cell", cell); -// input.insert("edge_index", edge_index); -// input.insert("energy", energy); -// input.insert("forces", forces); -// input.insert("node_attrs", node_attrs); -// input.insert("positions", positions); -// input.insert("ptr", ptr); -// input.insert("shifts", shifts); -// input.insert("unit_shifts", unit_shifts); -// input.insert("weight", weight); -// auto output = model.forward({input, mask, true, true, false}).toGenericDict(); -// -// // mace energy -// // -> sum of site energies of local atoms -// if (eflag_global) { -// auto node_energy = output.at("node_energy").toTensor(); -// eng_vdwl = 0.0; -// #pragma omp parallel for reduction(+:eng_vdwl) -// for (int ii=0; iiinum; ++ii) { -// int i = list->ilist[ii]; -// eng_vdwl += node_energy[i].item(); -// } -// } -// -// // mace forces -// // -> derivatives of total mace energy -// forces = output.at("forces").toTensor(); -// #pragma omp parallel for -// for (int ii=0; iiinum; ++ii) { -// int i = list->ilist[ii]; -// atom->f[i][0] = forces[i][0].item(); -// atom->f[i][1] = forces[i][1].item(); -// atom->f[i][2] = forces[i][2].item(); -// } -// -// // mace site energies -// // -> local atoms only -// if (eflag_atom) { -// auto node_energy = output.at("node_energy").toTensor(); -// #pragma omp parallel for -// for (int ii=0; iiinum; ++ii) { -// int i = list->ilist[ii]; -// eatom[i] = node_energy[i].item(); -// } -// } -// -// // mace virials (local atoms only) -// // -> derivatives of sum of site energies of local atoms -// if (vflag_global) { -// auto vir = output.at("virials").toTensor(); -// virial[0] = vir[0][0][0].item(); -// virial[1] = vir[0][1][1].item(); -// virial[2] = vir[0][2][2].item(); -// virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); -// virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); -// virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); -// } -// -// // mace site virials -// // -> not available -// if (vflag_atom) { -// error->all(FLERR, "ERROR: pair_mace does not support vflag_atom."); -// } - -} - -/* ---------------------------------------------------------------------- */ - -template -void PairMACEKokkos::settings(int narg, char **arg) -{ - if (narg == 1) { - if (strcmp(arg[0], "no_domain_decomposition") == 0) { - domain_decomposition = false; - } else { - error->all(FLERR, "Invalid option for pair_style mace."); - } - } else if (narg > 1) { - error->all(FLERR, "Too many pair_style arguments for pair_style mace."); - } -} - -/* ---------------------------------------------------------------------- */ - -template -void PairMACEKokkos::coeff(int narg, char **arg) -{ - // TODO: remove print statements from this routine, or have a single proc print - - if (!allocated) allocate(); - - std::cout << "Loading MACEKokkos model from \"" << arg[2] << "\" ..."; - model = torch::jit::load(arg[2]); - std::cout << " finished." << std::endl; - - // extract default dtype from mace model - for (auto p: model.named_attributes()) { - // this is a somewhat random choice of variable to check. could it be improved? - if (p.name == "model.node_embedding.linear.weight") { - if (p.value.toTensor().dtype() == caffe2::TypeMeta::Make()) { - torch_float_dtype = torch::kFloat32; - } else if (p.value.toTensor().dtype() == caffe2::TypeMeta::Make()) { - torch_float_dtype = torch::kFloat64; - } - } - } - std::cout << " - The torch_float_dtype is: " << torch_float_dtype << std::endl; - - // extract r_max from mace model - r_max = model.attr("r_max").toTensor().item(); - r_max_squared = r_max*r_max; - std::cout << " - The r_max is: " << r_max << "." << std::endl; - num_interactions = model.attr("num_interactions").toTensor().item(); - std::cout << " - The model has: " << num_interactions << " layers." << std::endl; - - // extract atomic numbers from mace model - auto a_n = model.attr("atomic_numbers").toTensor(); - for (int i=0; i()); - } - std::cout << " - The model atomic numbers are: " << mace_atomic_numbers << "." << std::endl; - - // extract atomic numbers from pair_coeff - for (int i=3; intypes+1; i++) - for (int j=i; jntypes+1; j++) - setflag[i][j] = 1; -} - -template -void PairMACEKokkos::init_style() -{ -// if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); -// -// /* -// MACE requires the full neighbor list AND neighbors of ghost atoms -// it appears that: -// * without REQ_GHOST -// list->gnum == 0 -// list->ilist does not include ghost atoms, but the jlists do -// * with REQ_GHOST -// list->gnum == atom->nghost -// list->ilist includes ghost atoms -// */ -// if (domain_decomposition) { -// neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); -// } else { -// neighbor->add_request(this, NeighConst::REQ_FULL); -// } - - PairMACE::init_style(); - - // TODO: from lj_cut, possibly delete - // adjust neighbor list request for KOKKOS - - //neighflag = lmp->kokkos->neighflag; - //auto request = neighbor->find_request(this); - //request->set_kokkos_host(std::is_same::value && - // !std::is_same::value); - //request->set_kokkos_device(std::is_same::value); - //if (neighflag == FULL) request->enable_full(); -} - -template -double PairMACEKokkos::init_one(int i, int j) -{ -// // to account for message passing, require cutoff of n_layers * r_max -// return num_interactions*model.attr("r_max").toTensor().item(); - - double cutone = PairMACE::init_one(i,j); - -// k_params.h_view(i,j).lj1 = lj1[i][j]; -// k_params.h_view(i,j).lj2 = lj2[i][j]; -// k_params.h_view(i,j).lj3 = lj3[i][j]; -// k_params.h_view(i,j).lj4 = lj4[i][j]; -// k_params.h_view(i,j).offset = offset[i][j]; -// k_params.h_view(i,j).cutsq = cutone*cutone; -// k_params.h_view(j,i) = k_params.h_view(i,j); -// if (i(); -// k_params.template modify(); - - return cutone; -} - -template -void PairMACEKokkos::allocate() -{ - PairMACE::allocate(); - - // TODO: from lj_cut, possibly delete - //int n = atom->ntypes; - //memory->destroy(cutsq); - //memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); - //d_cutsq = k_cutsq.template view(); - ////k_params = Kokkos::DualView("PairLJCut::params",n+1,n+1); - //params = k_params.template view(); -} - -namespace LAMMPS_NS { -template class PairMACEKokkos; -#ifdef LMP_KOKKOS_GPU -template class PairMACEKokkos; -#endif -} - From 1fafa1ffc487e09862390fdba1dc20fd3611b19c Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Fri, 10 Mar 2023 22:47:35 +0000 Subject: [PATCH 25/51] wip. --- src/KOKKOS/pair_mace_kokkos.cpp | 149 +++++++++++++++++--------------- src/KOKKOS/pair_mace_kokkos.h | 26 +----- 2 files changed, 84 insertions(+), 91 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index 64a80ae9338..179ec3849f3 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -89,6 +89,13 @@ void PairMACEKokkos::compute(int eflag, int vflag) { ev_init(eflag,vflag,0); + atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK|TAG_MASK); + + NeighListKokkos* k_list = static_cast*>(list); + auto d_numneigh = k_list->d_numneigh; + auto d_neighbors = k_list->d_neighbors; + auto d_ilist = k_list->d_ilist; + if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); if (domain_decomposition) { if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); @@ -109,6 +116,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) // d_vatom = k_vatom.view(); // } + int nlocal = atom->nlocal; auto r_max_squared = this->r_max_squared; auto h0 = domain->h[0]; auto h1 = domain->h[1]; @@ -123,29 +131,24 @@ void PairMACEKokkos::compute(int eflag, int vflag) auto hinv4 = domain->h_inv[4]; auto hinv5 = domain->h_inv[5]; - auto k_lammps_atomic_numbers = Kokkos::View("k_lammps_atomic_numbers",lammps_atomic_numbers.size()); + auto k_lammps_atomic_numbers = Kokkos::View("k_lammps_atomic_numbers",lammps_atomic_numbers.size()); auto k_lammps_atomic_numbers_mirror = Kokkos::create_mirror_view(k_lammps_atomic_numbers); for (int i=0; i("k_mace_atomic_numbers",mace_atomic_numbers.size()); + auto k_mace_atomic_numbers = Kokkos::View("k_mace_atomic_numbers",mace_atomic_numbers.size()); auto k_mace_atomic_numbers_mirror = Kokkos::create_mirror_view(k_mace_atomic_numbers); for (int i=0; i("k_positions", n_nodes); -// Kokkos::vector lammps_atomic_numbers = this->lammps_atomic_numbers; -// Kokkos::vector mace_atomic_numbers = this->mace_atomic_numbers; - // atom map auto map_style = atom->map_style; auto k_map_array = atomKK->k_map_array; auto k_map_hash = atomKK->k_map_hash; k_map_array.template sync(); - atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK|TAG_MASK); auto x = atomKK->k_x.view(); // c_x = atomKK->k_x.view(); @@ -155,11 +158,6 @@ void PairMACEKokkos::compute(int eflag, int vflag) // int nlocal = atom->nlocal; // int nall = atom->nlocal + atom->nghost; - NeighListKokkos* k_list = static_cast*>(list); - auto d_numneigh = k_list->d_numneigh; - auto d_neighbors = k_list->d_neighbors; - auto d_ilist = k_list->d_ilist; - //inum = list->inum; // ----- positions ----- int n_nodes; @@ -179,11 +177,13 @@ void PairMACEKokkos::compute(int eflag, int vflag) k_positions(i,2) = x(i,2); }); auto positions = torch::from_blob( - k_positions.data(), {n_nodes,3}, torch_float_dtype); + k_positions.data(), + {n_nodes,3}, + torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); // ----- cell ----- // TODO: how to use kokkos here? - auto cell = torch::zeros({3,3}, torch_float_dtype); + auto cell = torch::zeros({3,3}, torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); cell[0][0] = h0; cell[0][1] = 0.0; cell[0][2] = 0.0; @@ -198,11 +198,11 @@ void PairMACEKokkos::compute(int eflag, int vflag) // count total number of edges auto k_n_edges_vec = Kokkos::View("k_n_edges_vec", n_nodes); Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA (const int ii) { - const int i = d_ilist[ii]; + const int i = d_ilist(ii); const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); - const int jnum = d_numneigh[i]; + const int jnum = d_numneigh(i); for (int jj=0; jj::compute(int eflag, int vflag) // make first_edge vector to help with parallelizing following loop auto k_first_edge = Kokkos::View("k_first_edge", n_nodes); // initialized to zero Kokkos::parallel_for(n_nodes-1, KOKKOS_LAMBDA(const int ii) { - k_first_edge[ii+1] = k_first_edge(ii) + k_n_edges_vec(ii); + k_first_edge(ii+1) = k_first_edge(ii) + k_n_edges_vec(ii); }); // fill edge_index and unit_shifts tensors - Kokkos::View k_edge_index("k_edge_index", 2, n_edges); - Kokkos::View k_unit_shifts("k_unit_shifts", n_edges); - Kokkos::View k_shifts("k_shifts", n_edges); + auto k_edge_index = Kokkos::View("k_edge_index", 2, n_edges); + auto k_unit_shifts = Kokkos::View("k_unit_shifts", n_edges); + auto k_shifts = Kokkos::View("k_shifts", n_edges); if (domain_decomposition) { // if (domain_decomposition) { @@ -236,11 +236,11 @@ void PairMACEKokkos::compute(int eflag, int vflag) } else { Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA(const int ii) { - const int i = d_ilist[ii]; + const int i = d_ilist(ii); const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); - const int jnum = d_numneigh[i]; + const int jnum = d_numneigh(i); int k = k_first_edge(ii); for (int jj=0; jj::compute(int eflag, int vflag) const X_FLOAT dely = ytmp - x(j,1); const X_FLOAT delz = ztmp - x(j,2); const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - if (rsq < r_max_squared) { k_edge_index(0,k) = i; //int j_local = atom->map(tag(j)); @@ -272,17 +271,26 @@ void PairMACEKokkos::compute(int eflag, int vflag) } }); } - auto edge_index = torch::from_blob(k_edge_index.data(), {2,n_edges}, torch::dtype(torch::kInt64)); - auto unit_shifts = torch::from_blob(k_unit_shifts.data(), {n_edges,3}, torch_float_dtype); - auto shifts = torch::from_blob(k_shifts.data(), {n_edges,3}, torch_float_dtype); + auto edge_index = torch::from_blob( + k_edge_index.data(), + {2,n_edges}, + torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA)); + auto unit_shifts = torch::from_blob( + k_unit_shifts.data(), + {n_edges,3}, + torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + auto shifts = torch::from_blob( + k_shifts.data(), + {n_edges,3}, + torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); // ----- node_attrs ----- // node_attrs is one-hot encoding for atomic numbers - int n_node_feats = mace_atomic_numbers.size(); - Kokkos::View k_node_attrs("k_node_attrs", n_nodes, n_node_feats); + int n_node_feats = mace_atomic_numbers_size; + auto k_node_attrs = Kokkos::View("k_node_attrs", n_nodes, n_node_feats); Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA(const int ii) { - const int i = d_ilist[ii]; - const int lammps_type = type[i]; + const int i = d_ilist(ii); + const int lammps_type = type(i); int t = -1; for (int j=0; j::compute(int eflag, int vflag) //if (t==-1) error->all(FLERR, "ERROR: problem converting lammps_type to mace_type."); k_node_attrs(i,t-1) = 1.0; }); - auto node_attrs = torch::from_blob(k_node_attrs.data(), {n_nodes,n_node_feats}, torch_float_dtype); - -// // ----- mask for ghost ----- -// Kokkos::View k_mask("k_mask", n_nodes); -// Kokkos::parallel_for(atom->nlocal, KOKKOS_LAMBDA(const int ii) { -// const int i = d_ilist[ii]; -// k_mask(i) = true; -// }); -// auto mask = torch::from_blob(k_mask.data(), n_nodes, torch::dtype(torch::kBool)); -// -// auto batch = torch::zeros({n_nodes}, torch::dtype(torch::kInt64)); -// auto energy = torch::empty({1}, torch_float_dtype); -// auto forces = torch::empty({n_nodes,3}, torch_float_dtype); -// auto ptr = torch::empty({2}, torch::dtype(torch::kInt64)); -// auto weight = torch::empty({1}, torch_float_dtype); -// ptr[0] = 0; -// ptr[1] = n_nodes; -// weight[0] = 1.0; -// -//std::cout << "packing the input" << std::endl; -// -// // pack the input, call the model, extract the output -// c10::Dict input; -// input.insert("batch", batch); -// input.insert("cell", cell); -// input.insert("edge_index", edge_index); -// input.insert("energy", energy); -// input.insert("forces", forces); -// input.insert("node_attrs", node_attrs); -// input.insert("positions", positions); -// input.insert("ptr", ptr); -// input.insert("shifts", shifts); -// input.insert("unit_shifts", unit_shifts); -// input.insert("weight", weight); + auto node_attrs = torch::from_blob( + k_node_attrs.data(), + {n_nodes, n_node_feats}, + torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + + // ----- mask for ghost ----- + Kokkos::View k_mask("k_mask", n_nodes); + Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int ii) { + const int i = d_ilist(ii); + k_mask(i) = true; + }); + auto mask = torch::from_blob( + k_mask.data(), + n_nodes, + torch::TensorOptions().dtype(torch::kBool).device(torch::kCUDA)); + + // TODO: add device? + auto batch = torch::zeros({n_nodes}, torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA)); + auto energy = torch::empty({1}, torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + auto forces = torch::empty({n_nodes,3}, torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + auto ptr = torch::empty({2}, torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA)); + auto weight = torch::empty({1}, torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + ptr[0] = 0; + ptr[1] = n_nodes; + weight[0] = 1.0; + + // pack the input, call the model, extract the output + c10::Dict input; + input.insert("batch", batch); + input.insert("cell", cell); + input.insert("edge_index", edge_index); + input.insert("energy", energy); + input.insert("forces", forces); + input.insert("node_attrs", node_attrs); + input.insert("positions", positions); + input.insert("ptr", ptr); + input.insert("shifts", shifts); + input.insert("unit_shifts", unit_shifts); + input.insert("weight", weight); ////std::cout << "batch" << batch << std::endl; ////std::cout << "cell" << cell << std::endl; ////std::cout << "edge_index" << edge_index << std::endl; @@ -338,8 +351,8 @@ void PairMACEKokkos::compute(int eflag, int vflag) ////std::cout << "unit_shifts" << unit_shifts << std::endl; ////std::cout << "weight" << weight << std::endl; ////std::cout << "mask" << mask << std::endl; -// auto output = model.forward({input, mask, true, true, false}).toGenericDict(); -// + auto output = model.forward({input, mask, true, true, false}).toGenericDict(); + ////std::cout << "energy: " << output.at("energy").toTensor() << std::endl; ////std::cout << "node_energy: " << output.at("node_energy").toTensor() << std::endl; // diff --git a/src/KOKKOS/pair_mace_kokkos.h b/src/KOKKOS/pair_mace_kokkos.h index cbb78cc6e6c..ab658b70f49 100644 --- a/src/KOKKOS/pair_mace_kokkos.h +++ b/src/KOKKOS/pair_mace_kokkos.h @@ -53,33 +53,13 @@ class PairMACEKokkos : public PairMACE { protected: - bool domain_decomposition = true; - torch::jit::script::Module model; - torch::ScalarType torch_float_dtype; - double r_max; - double r_max_squared; - int64_t num_interactions; - std::vector mace_atomic_numbers; - std::vector lammps_atomic_numbers; - const std::array periodic_table = - { "H", "He", - "Li", "Be", "B", "C", "N", "O", "F", "Ne", - "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", - "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", - "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I", "Xe", - "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", - "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", - "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", - "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og"}; - - // kokkos stuff int host_flag; int neighflag; //typename AT::t_x_array_randomread x; - typename AT::t_x_array c_x; - typename AT::t_f_array f; - typename AT::t_int_1d_randomread type; + //typename AT::t_x_array c_x; + //typename AT::t_f_array f; + //typename AT::t_int_1d_randomread type; //Kokkos::View k_positions; From d0f1c8aee27d1008e98359e574c894098c3d5a81 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 13 Mar 2023 16:53:09 +0000 Subject: [PATCH 26/51] wip. --- src/KOKKOS/pair_mace_kokkos.cpp | 150 +++++++++++++++++++------------- 1 file changed, 90 insertions(+), 60 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index 179ec3849f3..ed65fde8a93 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -96,6 +96,9 @@ void PairMACEKokkos::compute(int eflag, int vflag) auto d_neighbors = k_list->d_neighbors; auto d_ilist = k_list->d_ilist; +std::cout << "nlocal inum " << atom->nlocal << " " << list->inum << std::endl; +std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; + if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); if (domain_decomposition) { if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); @@ -155,8 +158,6 @@ void PairMACEKokkos::compute(int eflag, int vflag) auto f = atomKK->k_f.view(); auto tag = atomKK->k_tag.view(); auto type = atomKK->k_type.view(); -// int nlocal = atom->nlocal; -// int nall = atom->nlocal + atom->nghost; // ----- positions ----- @@ -199,17 +200,17 @@ void PairMACEKokkos::compute(int eflag, int vflag) auto k_n_edges_vec = Kokkos::View("k_n_edges_vec", n_nodes); Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA (const int ii) { const int i = d_ilist(ii); - const X_FLOAT xtmp = x(i,0); - const X_FLOAT ytmp = x(i,1); - const X_FLOAT ztmp = x(i,2); + const double xtmp = x(i,0); + const double ytmp = x(i,1); + const double ztmp = x(i,2); const int jnum = d_numneigh(i); for (int jj=0; jj::compute(int eflag, int vflag) auto k_edge_index = Kokkos::View("k_edge_index", 2, n_edges); auto k_unit_shifts = Kokkos::View("k_unit_shifts", n_edges); auto k_shifts = Kokkos::View("k_shifts", n_edges); + if (domain_decomposition) { -// if (domain_decomposition) { -// edge_index[1][k] = j; + Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA(const int ii) { + const int i = d_ilist(ii); + const double xtmp = x(i,0); + const double ytmp = x(i,1); + const double ztmp = x(i,2); + const int jnum = d_numneigh(i); + int k = k_first_edge(ii); + for (int jj=0; jjmap(tag(j)); @@ -339,45 +360,43 @@ void PairMACEKokkos::compute(int eflag, int vflag) input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); -////std::cout << "batch" << batch << std::endl; -////std::cout << "cell" << cell << std::endl; -////std::cout << "edge_index" << edge_index << std::endl; -////// energy -////// forces -////std::cout << "node_attrs" << node_attrs << std::endl; -////std::cout << "positions" << positions << std::endl; -////std::cout << "ptr" << ptr << std::endl; -////std::cout << "shifts" << shifts << std::endl; -////std::cout << "unit_shifts" << unit_shifts << std::endl; -////std::cout << "weight" << weight << std::endl; -////std::cout << "mask" << mask << std::endl; +std::cout << "batch" << batch.to("cpu") << std::endl; +std::cout << "cell" << cell.to("cpu") << std::endl; +std::cout << "edge_index" << edge_index.to("cpu") << std::endl; +// energy +// forces +std::cout << "node_attrs" << node_attrs.to("cpu") << std::endl; +std::cout << "positions" << positions.to("cpu") << std::endl; +std::cout << "ptr" << ptr.to("cpu") << std::endl; +std::cout << "shifts" << shifts.to("cpu") << std::endl; +std::cout << "unit_shifts" << unit_shifts.to("cpu") << std::endl; +std::cout << "weight" << weight.to("cpu") << std::endl; +std::cout << "mask" << mask.to("cpu") << std::endl; auto output = model.forward({input, mask, true, true, false}).toGenericDict(); -////std::cout << "energy: " << output.at("energy").toTensor() << std::endl; -////std::cout << "node_energy: " << output.at("node_energy").toTensor() << std::endl; -// -// // mace energy -// // -> sum of site energies of local atoms -// if (eflag_global) { -// auto node_energy = output.at("node_energy").toTensor(); -// eng_vdwl = 0.0; -// Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int ii, double &eng_vdwl) { -// const int i = d_ilist[ii]; -// eng_vdwl += node_energy[i].item(); -// }, eng_vdwl); -// } // -// // mace forces -// // -> derivatives of total mace energy -// forces = output.at("forces").toTensor(); -// Kokkos::View> -// k_f(forces.data_ptr(), n_nodes); -// Kokkos::parallel_for(atom->nlocal, KOKKOS_LAMBDA(const int ii) { -// const int i = d_ilist[ii]; -// f(i,0) = k_f(i,0); -// f(i,1) = k_f(i,1); -// f(i,2) = k_f(i,2); -// }); + // mace energy + // -> sum of site energies of local atoms + if (eflag_global) { + auto node_energy = output.at("node_energy").toTensor(); + auto k_node_energy = Kokkos::View>(node_energy.data_ptr(),n_nodes); + eng_vdwl = 0.0; + Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int ii, double &eng_vdwl) { + const int i = d_ilist(ii); + eng_vdwl += k_node_energy(i); + }, eng_vdwl); + } + + // mace forces + // -> derivatives of total mace energy + forces = output.at("forces").toTensor(); + auto k_forces = Kokkos::View>(forces.data_ptr(),n_nodes); + Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int ii) { + const int i = d_ilist(ii); + f(i,0) = k_forces(i,0); + f(i,1) = k_forces(i,1); + f(i,2) = k_forces(i,2); + }); // //// // mace site energies //// // -> local atoms only @@ -435,6 +454,9 @@ void PairMACEKokkos::compute(int eflag, int vflag) // // copymode = 0; +std::cout << "energy: " << output.at("energy").toTensor().to("cpu") << std::endl; +std::cout << "node_energy: " << output.at("node_energy").toTensor().to("cpu") << std::endl; + } /* ---------------------------------------------------------------------- */ @@ -469,8 +491,8 @@ template void PairMACEKokkos::init_style() { std::cout << "hello from kokkos init_style" << std::endl; -// if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); -// + if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); + // /* // MACE requires the full neighbor list AND neighbors of ghost atoms // it appears that: @@ -493,15 +515,23 @@ std::cout << "hello from kokkos init_style" << std::endl; } std::cout << "after mace init" << std::endl; - // neighbor list request for KOKKOS neighflag = lmp->kokkos->neighflag; auto request = neighbor->add_request(this, NeighConst::REQ_FULL); - request->set_kokkos_host(std::is_same::value && - !std::is_same::value); - request->set_kokkos_device(std::is_same::value); + //if (domain_decomposition) { + //auto request = neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); + if (neighflag == FULL) request->enable_full(); + //} else { + // auto request = neighbor->add_request(this, NeighConst::REQ_FULL); + // request->set_kokkos_host(std::is_same::value && + // !std::is_same::value); + // request->set_kokkos_device(std::is_same::value); + //} // if (neighflag == FULL) // error->all(FLERR,"Must use half neighbor list style with pair pace/kk"); From 1c59b0a6b7d655b550bdcb0c7432d8ce9c4aff59 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 13 Mar 2023 17:20:20 +0000 Subject: [PATCH 27/51] wip. --- src/KOKKOS/pair_mace_kokkos.cpp | 44 ++++++++++++++------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index ed65fde8a93..4c2af6f54c4 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -134,12 +134,12 @@ std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; auto hinv4 = domain->h_inv[4]; auto hinv5 = domain->h_inv[5]; - auto k_lammps_atomic_numbers = Kokkos::View("k_lammps_atomic_numbers",lammps_atomic_numbers.size()); + auto k_lammps_atomic_numbers = Kokkos::View("k_lammps_atomic_numbers",lammps_atomic_numbers.size()); auto k_lammps_atomic_numbers_mirror = Kokkos::create_mirror_view(k_lammps_atomic_numbers); for (int i=0; i("k_mace_atomic_numbers",mace_atomic_numbers.size()); + auto k_mace_atomic_numbers = Kokkos::View("k_mace_atomic_numbers",mace_atomic_numbers.size()); auto k_mace_atomic_numbers_mirror = Kokkos::create_mirror_view(k_mace_atomic_numbers); for (int i=0; inghost << " " << list->gnum << std::endl; auto positions = torch::from_blob( k_positions.data(), {n_nodes,3}, - torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + torch::TensorOptions().dtype(torch_float_dtype).device(device)); // ----- cell ----- // TODO: how to use kokkos here? - auto cell = torch::zeros({3,3}, torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + auto cell = torch::zeros({3,3}, torch::TensorOptions().dtype(torch_float_dtype).device(device)); cell[0][0] = h0; cell[0][1] = 0.0; cell[0][2] = 0.0; @@ -197,7 +197,7 @@ std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; // ----- edge_index and unit_shifts ----- // count total number of edges - auto k_n_edges_vec = Kokkos::View("k_n_edges_vec", n_nodes); + auto k_n_edges_vec = Kokkos::View("k_n_edges_vec", n_nodes); Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA (const int ii) { const int i = d_ilist(ii); const double xtmp = x(i,0); @@ -221,12 +221,12 @@ std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; n_edges += k_n_edges_vec(ii); }, n_edges); // make first_edge vector to help with parallelizing following loop - auto k_first_edge = Kokkos::View("k_first_edge", n_nodes); // initialized to zero + auto k_first_edge = Kokkos::View("k_first_edge", n_nodes); // initialized to zero Kokkos::parallel_for(n_nodes-1, KOKKOS_LAMBDA(const int ii) { k_first_edge(ii+1) = k_first_edge(ii) + k_n_edges_vec(ii); }); // fill edge_index and unit_shifts tensors - auto k_edge_index = Kokkos::View("k_edge_index", 2, n_edges); + auto k_edge_index = Kokkos::View("k_edge_index", 2, n_edges); auto k_unit_shifts = Kokkos::View("k_unit_shifts", n_edges); auto k_shifts = Kokkos::View("k_shifts", n_edges); @@ -295,15 +295,15 @@ std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; auto edge_index = torch::from_blob( k_edge_index.data(), {2,n_edges}, - torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA)); + torch::TensorOptions().dtype(torch::kInt64).device(device)); auto unit_shifts = torch::from_blob( k_unit_shifts.data(), {n_edges,3}, - torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + torch::TensorOptions().dtype(torch_float_dtype).device(device)); auto shifts = torch::from_blob( k_shifts.data(), {n_edges,3}, - torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + torch::TensorOptions().dtype(torch_float_dtype).device(device)); // ----- node_attrs ----- // node_attrs is one-hot encoding for atomic numbers @@ -324,7 +324,7 @@ std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; auto node_attrs = torch::from_blob( k_node_attrs.data(), {n_nodes, n_node_feats}, - torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + torch::TensorOptions().dtype(torch_float_dtype).device(device)); // ----- mask for ghost ----- Kokkos::View k_mask("k_mask", n_nodes); @@ -335,14 +335,14 @@ std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; auto mask = torch::from_blob( k_mask.data(), n_nodes, - torch::TensorOptions().dtype(torch::kBool).device(torch::kCUDA)); + torch::TensorOptions().dtype(torch::kBool).device(device)); // TODO: add device? - auto batch = torch::zeros({n_nodes}, torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA)); - auto energy = torch::empty({1}, torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); - auto forces = torch::empty({n_nodes,3}, torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); - auto ptr = torch::empty({2}, torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA)); - auto weight = torch::empty({1}, torch::TensorOptions().dtype(torch_float_dtype).device(torch::kCUDA)); + auto batch = torch::zeros({n_nodes}, torch::TensorOptions().dtype(torch::kInt64).device(device)); + auto energy = torch::empty({1}, torch::TensorOptions().dtype(torch_float_dtype).device(device)); + auto forces = torch::empty({n_nodes,3}, torch::TensorOptions().dtype(torch_float_dtype).device(device)); + auto ptr = torch::empty({2}, torch::TensorOptions().dtype(torch::kInt64).device(device)); + auto weight = torch::empty({1}, torch::TensorOptions().dtype(torch_float_dtype).device(device)); ptr[0] = 0; ptr[1] = n_nodes; weight[0] = 1.0; @@ -464,15 +464,7 @@ std::cout << "node_energy: " << output.at("node_energy").toTensor().to("cpu") << template void PairMACEKokkos::settings(int narg, char **arg) { - if (narg == 1) { - if (strcmp(arg[0], "no_domain_decomposition") == 0) { - domain_decomposition = false; - } else { - error->all(FLERR, "Invalid option for pair_style mace."); - } - } else if (narg > 1) { - error->all(FLERR, "Too many pair_style arguments for pair_style mace."); - } + PairMACE::settings(narg, arg); } /* ---------------------------------------------------------------------- */ From cbcc05cecf13068cdfbfdf21cec15d9bc0cece2b Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 14 Mar 2023 22:42:35 +0000 Subject: [PATCH 28/51] wip. --- src/KOKKOS/pair_mace_kokkos.cpp | 85 +++++++++++++++++---------------- src/KOKKOS/pair_mace_kokkos.h | 3 +- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index 4c2af6f54c4..521a48fc34c 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -96,9 +96,6 @@ void PairMACEKokkos::compute(int eflag, int vflag) auto d_neighbors = k_list->d_neighbors; auto d_ilist = k_list->d_ilist; -std::cout << "nlocal inum " << atom->nlocal << " " << list->inum << std::endl; -std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; - if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); if (domain_decomposition) { if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); @@ -203,8 +200,7 @@ std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; const double xtmp = x(i,0); const double ytmp = x(i,1); const double ztmp = x(i,2); - const int jnum = d_numneigh(i); - for (int jj=0; jjnghost << " " << list->gnum << std::endl; } } }); - int n_edges = 0; - Kokkos::parallel_reduce(n_nodes, KOKKOS_LAMBDA(const int ii, int& n_edges) { + // WARNING: if n_edges remains 0 (e.g., because atoms too far apart) + // strange things happen on gpu + int64_t n_edges = 0; + Kokkos::parallel_reduce(n_nodes, KOKKOS_LAMBDA(const int ii, int64_t& n_edges) { n_edges += k_n_edges_vec(ii); }, n_edges); // make first_edge vector to help with parallelizing following loop @@ -237,9 +235,8 @@ std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; const double xtmp = x(i,0); const double ytmp = x(i,1); const double ztmp = x(i,2); - const int jnum = d_numneigh(i); int k = k_first_edge(ii); - for (int jj=0; jjnghost << " " << list->gnum << std::endl; const double xtmp = x(i,0); const double ytmp = x(i,1); const double ztmp = x(i,2); - const int jnum = d_numneigh(i); int k = k_first_edge(ii); - for (int jj=0; jjnghost << " " << list->gnum << std::endl; n_nodes, torch::TensorOptions().dtype(torch::kBool).device(device)); + // TODO: why is batch of size n_nodes? // TODO: add device? auto batch = torch::zeros({n_nodes}, torch::TensorOptions().dtype(torch::kInt64).device(device)); auto energy = torch::empty({1}, torch::TensorOptions().dtype(torch_float_dtype).device(device)); @@ -362,10 +359,10 @@ std::cout << "ghost gnum " << atom->nghost << " " << list->gnum << std::endl; input.insert("weight", weight); std::cout << "batch" << batch.to("cpu") << std::endl; std::cout << "cell" << cell.to("cpu") << std::endl; -std::cout << "edge_index" << edge_index.to("cpu") << std::endl; +//std::cout << "edge_index" << edge_index.to("cpu") << std::endl; // energy // forces -std::cout << "node_attrs" << node_attrs.to("cpu") << std::endl; +//std::cout << "node_attrs" << node_attrs.to("cpu") << std::endl; std::cout << "positions" << positions.to("cpu") << std::endl; std::cout << "ptr" << ptr.to("cpu") << std::endl; std::cout << "shifts" << shifts.to("cpu") << std::endl; @@ -374,12 +371,18 @@ std::cout << "weight" << weight.to("cpu") << std::endl; std::cout << "mask" << mask.to("cpu") << std::endl; auto output = model.forward({input, mask, true, true, false}).toGenericDict(); -// + //std::cout << "node energy: " << typeid(output.at("node_energy").toTensor()).name() << std::endl; + //std::cout << "node energy: " << typeid(output.at("node_energy").toTensor().data_ptr()).name() << std::endl; + //std::cout << "forces: " << typeid(output.at("forces").toTensor()).name() << std::endl; + //std::cout << "forces: " << typeid(output.at("forces").toTensor().data_ptr()).name() << std::endl; + + // mace energy // -> sum of site energies of local atoms if (eflag_global) { auto node_energy = output.at("node_energy").toTensor(); - auto k_node_energy = Kokkos::View>(node_energy.data_ptr(),n_nodes); + auto node_energy_ptr = static_cast(node_energy.data_ptr()); + auto k_node_energy = Kokkos::View>(node_energy_ptr,n_nodes); eng_vdwl = 0.0; Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int ii, double &eng_vdwl) { const int i = d_ilist(ii); @@ -390,7 +393,8 @@ std::cout << "mask" << mask.to("cpu") << std::endl; // mace forces // -> derivatives of total mace energy forces = output.at("forces").toTensor(); - auto k_forces = Kokkos::View>(forces.data_ptr(),n_nodes); + auto forces_ptr = static_cast(forces.data_ptr()); + auto k_forces = Kokkos::View>(forces_ptr,n_nodes); Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int ii) { const int i = d_ilist(ii); f(i,0) = k_forces(i,0); @@ -461,14 +465,6 @@ std::cout << "node_energy: " << output.at("node_energy").toTensor().to("cpu") << /* ---------------------------------------------------------------------- */ -template -void PairMACEKokkos::settings(int narg, char **arg) -{ - PairMACE::settings(narg, arg); -} - -/* ---------------------------------------------------------------------- */ - template void PairMACEKokkos::coeff(int narg, char **arg) { @@ -483,7 +479,7 @@ template void PairMACEKokkos::init_style() { std::cout << "hello from kokkos init_style" << std::endl; - if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); +// if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); // /* // MACE requires the full neighbor list AND neighbors of ghost atoms @@ -501,23 +497,23 @@ std::cout << "hello from kokkos init_style" << std::endl; // neighbor->add_request(this, NeighConst::REQ_FULL); // } - if (host_flag) { - PairMACE::init_style(); - return; - } -std::cout << "after mace init" << std::endl; - - // neighbor list request for KOKKOS - - neighflag = lmp->kokkos->neighflag; - - auto request = neighbor->add_request(this, NeighConst::REQ_FULL); - //if (domain_decomposition) { - //auto request = neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); - request->set_kokkos_host(std::is_same::value && - !std::is_same::value); - request->set_kokkos_device(std::is_same::value); - if (neighflag == FULL) request->enable_full(); +// if (host_flag) { +// PairMACE::init_style(); +// return; +// } +//std::cout << "after mace init" << std::endl; +// +// // neighbor list request for KOKKOS +// +// neighflag = lmp->kokkos->neighflag; +// +// auto request = neighbor->add_request(this, NeighConst::REQ_FULL); +// //if (domain_decomposition) { +// //auto request = neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); +// request->set_kokkos_host(std::is_same::value && +// !std::is_same::value); +// request->set_kokkos_device(std::is_same::value); +// if (neighflag == FULL) request->enable_full(); //} else { // auto request = neighbor->add_request(this, NeighConst::REQ_FULL); // request->set_kokkos_host(std::is_same::value && @@ -531,6 +527,11 @@ std::cout << "after mace init" << std::endl; + PairMACE::init_style(); + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); diff --git a/src/KOKKOS/pair_mace_kokkos.h b/src/KOKKOS/pair_mace_kokkos.h index ab658b70f49..72b041f9e90 100644 --- a/src/KOKKOS/pair_mace_kokkos.h +++ b/src/KOKKOS/pair_mace_kokkos.h @@ -39,13 +39,12 @@ class PairMACEKokkos : public PairMACE { public: - enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + //enum {EnabledNeighFlags=FULL}; typedef DeviceType device_type; typedef ArrayTypes AT; PairMACEKokkos(class LAMMPS *); ~PairMACEKokkos() override; void compute(int, int) override; - void settings(int, char **) override; void coeff(int, char **) override; void init_style() override; double init_one(int, int) override; From e7ef916606e9853c7ff13e1007f5935d229cead2 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Thu, 30 Mar 2023 11:09:23 +0100 Subject: [PATCH 29/51] kokkos. --- src/KOKKOS/npair_kokkos.cpp | 2 +- src/KOKKOS/pair_mace_kokkos.cpp | 268 ++++---------------------------- src/KOKKOS/pair_mace_kokkos.h | 11 -- 3 files changed, 31 insertions(+), 250 deletions(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index ee8da12c528..5f82b4e0690 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -246,7 +246,7 @@ void NPairKokkos::build(NeighList *list_) // assumes newton off NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#ifdef LMP_KOKKOS_GPU +#if 0 if (ExecutionSpaceFromDevice::space == Device) { int team_size = atoms_per_bin*factor; int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index 521a48fc34c..c79eed1b73a 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -18,7 +18,6 @@ #include "pair_mace_kokkos.h" -// TODO: do i need all these? #include "atom_kokkos.h" #include "atom_masks.h" #include "error.h" @@ -42,18 +41,8 @@ using namespace LAMMPS_NS; template PairMACEKokkos::PairMACEKokkos(LAMMPS *lmp) : PairMACE(lmp) { -std::cout << "hello from kokkos mace constructor" << std::endl; no_virial_fdotr_compute = 1; - //kokkosable = 1; - //atomKK = (AtomKokkos *) atom; - //execution_space = ExecutionSpaceFromDevice::space; - //datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; - //datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; - // pair_pace_kokkos has these instead - //datamask_read = EMPTY_MASK; - //datamask_modify = EMPTY_MASK; - kokkosable = 1; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; @@ -69,17 +58,6 @@ template PairMACEKokkos::~PairMACEKokkos() { if (copymode) return; - - // from lj_cut_kokkos - //if (allocated) { - // memoryKK->destroy_kokkos(k_eatom,eatom); - // memoryKK->destroy_kokkos(k_vatom,vatom); - // memoryKK->destroy_kokkos(k_cutsq,cutsq); - //} - - // from pair_pace_kokkos - //memoryKK->destroy_kokkos(k_eatom,eatom); - //memoryKK->destroy_kokkos(k_vatom,vatom); } /* ---------------------------------------------------------------------- */ @@ -105,17 +83,6 @@ void PairMACEKokkos::compute(int eflag, int vflag) error->all(FLERR, "ERROR: kokkos eflag_atom not implemented."); } -// if (eflag_atom) { -// memoryKK->destroy_kokkos(k_eatom,eatom); -// memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); -// d_eatom = k_eatom.view(); -// } -// if (vflag_atom) { -// memoryKK->destroy_kokkos(k_vatom,vatom); -// memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); -// d_vatom = k_vatom.view(); -// } - int nlocal = atom->nlocal; auto r_max_squared = this->r_max_squared; auto h0 = domain->h[0]; @@ -136,11 +103,13 @@ void PairMACEKokkos::compute(int eflag, int vflag) for (int i=0; i("k_mace_atomic_numbers",mace_atomic_numbers.size()); auto k_mace_atomic_numbers_mirror = Kokkos::create_mirror_view(k_mace_atomic_numbers); for (int i=0; i::compute(int eflag, int vflag) auto k_map_hash = atomKK->k_map_hash; k_map_array.template sync(); - auto x = atomKK->k_x.view(); // c_x = atomKK->k_x.view(); auto f = atomKK->k_f.view(); @@ -220,10 +188,12 @@ void PairMACEKokkos::compute(int eflag, int vflag) }, n_edges); // make first_edge vector to help with parallelizing following loop auto k_first_edge = Kokkos::View("k_first_edge", n_nodes); // initialized to zero - Kokkos::parallel_for(n_nodes-1, KOKKOS_LAMBDA(const int ii) { - k_first_edge(ii+1) = k_first_edge(ii) + k_n_edges_vec(ii); + // TODO: this is serial to avoid race ... is there something better? + Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i) { + for (int ii=0; ii("k_edge_index", 2, n_edges); auto k_unit_shifts = Kokkos::View("k_unit_shifts", n_edges); auto k_shifts = Kokkos::View("k_shifts", n_edges); @@ -268,7 +238,6 @@ void PairMACEKokkos::compute(int eflag, int vflag) const double rsq = delx*delx + dely*dely + delz*delz; if (rsq < r_max_squared) { k_edge_index(0,k) = i; - //int j_local = atom->map(tag(j)); int j_local = AtomKokkos::map_kokkos(tag(j),map_style,k_map_array,k_map_hash); k_edge_index(1,k) = j_local; double shiftx = x(j,0) - x(j_local,0); @@ -314,7 +283,6 @@ void PairMACEKokkos::compute(int eflag, int vflag) t = j+1; } } - //if (t==-1) error->all(FLERR, "ERROR: problem converting lammps_type to mace_type."); k_node_attrs(i,t-1) = 1.0; }); auto node_attrs = torch::from_blob( @@ -334,7 +302,6 @@ void PairMACEKokkos::compute(int eflag, int vflag) torch::TensorOptions().dtype(torch::kBool).device(device)); // TODO: why is batch of size n_nodes? - // TODO: add device? auto batch = torch::zeros({n_nodes}, torch::TensorOptions().dtype(torch::kInt64).device(device)); auto energy = torch::empty({1}, torch::TensorOptions().dtype(torch_float_dtype).device(device)); auto forces = torch::empty({n_nodes,3}, torch::TensorOptions().dtype(torch_float_dtype).device(device)); @@ -357,25 +324,22 @@ void PairMACEKokkos::compute(int eflag, int vflag) input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); -std::cout << "batch" << batch.to("cpu") << std::endl; -std::cout << "cell" << cell.to("cpu") << std::endl; + +//std::cout << "batch" << batch.to("cpu") << std::endl; +//std::cout << "cell" << cell.to("cpu") << std::endl; //std::cout << "edge_index" << edge_index.to("cpu") << std::endl; -// energy -// forces //std::cout << "node_attrs" << node_attrs.to("cpu") << std::endl; -std::cout << "positions" << positions.to("cpu") << std::endl; -std::cout << "ptr" << ptr.to("cpu") << std::endl; -std::cout << "shifts" << shifts.to("cpu") << std::endl; -std::cout << "unit_shifts" << unit_shifts.to("cpu") << std::endl; -std::cout << "weight" << weight.to("cpu") << std::endl; -std::cout << "mask" << mask.to("cpu") << std::endl; - auto output = model.forward({input, mask, true, true, false}).toGenericDict(); +//std::cout << "positions" << positions.to("cpu") << std::endl; +//std::cout << "ptr" << ptr.to("cpu") << std::endl; +//std::cout << "shifts" << shifts.to("cpu") << std::endl; +//std::cout << "unit_shifts" << unit_shifts.to("cpu") << std::endl; +//std::cout << "weight" << weight.to("cpu") << std::endl; +//std::cout << "mask" << mask.to("cpu") << std::endl; - //std::cout << "node energy: " << typeid(output.at("node_energy").toTensor()).name() << std::endl; - //std::cout << "node energy: " << typeid(output.at("node_energy").toTensor().data_ptr()).name() << std::endl; - //std::cout << "forces: " << typeid(output.at("forces").toTensor()).name() << std::endl; - //std::cout << "forces: " << typeid(output.at("forces").toTensor().data_ptr()).name() << std::endl; + auto output = model.forward({input, mask, true, true, false}).toGenericDict(); +//std::cout << "energy: " << output.at("energy").toTensor().to("cpu") << std::endl; +//std::cout << "node_energy: " << output.at("node_energy").toTensor().to("cpu") << std::endl; // mace energy // -> sum of site energies of local atoms @@ -401,66 +365,18 @@ std::cout << "mask" << mask.to("cpu") << std::endl; f(i,1) = k_forces(i,1); f(i,2) = k_forces(i,2); }); -// -//// // mace site energies -//// // -> local atoms only -//// if (eflag_atom) { -//// auto node_energy = output.at("node_energy").toTensor(); -//// #pragma omp parallel for -//// for (int ii=0; iiinum; ++ii) { -//// int i = list->ilist[ii]; -//// eatom[i] = node_energy[i].item(); -//// } -//// } -//// -// // mace virials (local atoms only) -// // -> derivatives of sum of site energies of local atoms -// if (vflag_global) { -// auto vir = output.at("virials").toTensor(); -// virial[0] = vir[0][0][0].item(); -// virial[1] = vir[0][1][1].item(); -// virial[2] = vir[0][2][2].item(); -// virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); -// virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); -// virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); -// } -//// -//// // mace site virials -//// // -> not available -//// if (vflag_atom) { -//// error->all(FLERR, "ERROR: pair_mace does not support vflag_atom."); -//// } -// -// -// //copymode = 1; -// -// //EV_FLOAT ev = pair_compute,void >(this,(NeighListKokkos*)list); -// -// //if (eflag_global) eng_vdwl += ev.evdwl; -// //if (vflag_global) { -// // virial[0] += ev.v[0]; -// // virial[1] += ev.v[1]; -// // virial[2] += ev.v[2]; -// // virial[3] += ev.v[3]; -// // virial[4] += ev.v[4]; -// // virial[5] += ev.v[5]; -// //} -// -// //if (eflag_atom) { -// // k_eatom.template modify(); -// // k_eatom.template sync(); -// //} -// -// //if (vflag_atom) { -// // k_vatom.template modify(); -// // k_vatom.template sync(); -// //} -// -// copymode = 0; - -std::cout << "energy: " << output.at("energy").toTensor().to("cpu") << std::endl; -std::cout << "node_energy: " << output.at("node_energy").toTensor().to("cpu") << std::endl; + // mace virials (local atoms only) + // -> derivatives of sum of site energies of local atoms + if (vflag_global) { + auto vir = output.at("virials").toTensor().to("cpu"); + virial[0] = vir[0][0][0].item(); + virial[1] = vir[0][1][1].item(); + virial[2] = vir[0][2][2].item(); + virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); + virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); + virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); + } } /* ---------------------------------------------------------------------- */ @@ -468,160 +384,36 @@ std::cout << "node_energy: " << output.at("node_energy").toTensor().to("cpu") << template void PairMACEKokkos::coeff(int narg, char **arg) { -std::cout << "hello from kokkos coeff" << std::endl; if (!allocated) allocate(); -std::cout << "allocated: " << allocated << std::endl; PairMACE::coeff(narg,arg); -std::cout << "allocated: " << allocated << std::endl; } template void PairMACEKokkos::init_style() { -std::cout << "hello from kokkos init_style" << std::endl; -// if (force->newton_pair == 0) error->all(FLERR, "ERROR: Pair style mace requires newton pair on."); - -// /* -// MACE requires the full neighbor list AND neighbors of ghost atoms -// it appears that: -// * without REQ_GHOST -// list->gnum == 0 -// list->ilist does not include ghost atoms, but the jlists do -// * with REQ_GHOST -// list->gnum == atom->nghost -// list->ilist includes ghost atoms -// */ -// if (domain_decomposition) { -// neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); -// } else { -// neighbor->add_request(this, NeighConst::REQ_FULL); -// } - -// if (host_flag) { -// PairMACE::init_style(); -// return; -// } -//std::cout << "after mace init" << std::endl; -// -// // neighbor list request for KOKKOS -// -// neighflag = lmp->kokkos->neighflag; -// -// auto request = neighbor->add_request(this, NeighConst::REQ_FULL); -// //if (domain_decomposition) { -// //auto request = neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); -// request->set_kokkos_host(std::is_same::value && -// !std::is_same::value); -// request->set_kokkos_device(std::is_same::value); -// if (neighflag == FULL) request->enable_full(); - //} else { - // auto request = neighbor->add_request(this, NeighConst::REQ_FULL); - // request->set_kokkos_host(std::is_same::value && - // !std::is_same::value); - // request->set_kokkos_device(std::is_same::value); - //} -// if (neighflag == FULL) -// error->all(FLERR,"Must use half neighbor list style with pair pace/kk"); - - - - - PairMACE::init_style(); auto request = neighbor->find_request(this); request->set_kokkos_host(std::is_same::value && !std::is_same::value); request->set_kokkos_device(std::is_same::value); - - - - - - -// // TODO: from lj_cut, possibly delete -// // adjust neighbor list request for KOKKOS -// auto request = neighbor->add_request(this, NeighConst::REQ_FULL); -// request->set_kokkos_host(std::is_same::value && -// !std::is_same::value); -// request->set_kokkos_device(std::is_same::value); -// -// neighflag = lmp->kokkos->neighflag; -// auto request = neighbor->find_request(this); -//std::cout << "before request" << std::endl; -// request->set_kokkos_host(std::is_same::value && -// !std::is_same::value); -//std::cout << "before request" << std::endl; -// request->set_kokkos_device(std::is_same::value); -//std::cout << "after request" << std::endl; -// if (neighflag == FULL) -// std::cout << "REQUESTING FULL LIST." << std::endl; -// if (neighflag == FULL) request->enable_full(); - - //auto request = neighbor->add_request(this, NeighConst::REQ_FULL); - //request->set_kokkos_host(std::is_same::value && - // !std::is_same::value); - //request->set_kokkos_device(std::is_same::value); - //if (neighflag == FULL) - // error->all(FLERR,"Must use half neighbor list style with pair pace/kk"); - -std::cout << "goodbye from init_style" << std::endl; } template double PairMACEKokkos::init_one(int i, int j) { -std::cout << "hello from kokkos init_one" << std::endl; double cutone = PairMACE::init_one(i,j); - - //k_scale.h_view(i,j) = k_scale.h_view(j,i) = scale[i][j]; - //k_scale.template modify(); - k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; k_cutsq.template modify(); - return cutone; - - //// to account for message passing, require cutoff of n_layers * r_max - //return num_interactions*model.attr("r_max").toTensor().item(); - -// k_params.h_view(i,j).lj1 = lj1[i][j]; -// k_params.h_view(i,j).lj2 = lj2[i][j]; -// k_params.h_view(i,j).lj3 = lj3[i][j]; -// k_params.h_view(i,j).lj4 = lj4[i][j]; -// k_params.h_view(i,j).offset = offset[i][j]; -// k_params.h_view(i,j).cutsq = cutone*cutone; -// k_params.h_view(j,i) = k_params.h_view(i,j); -// if (i(); -// k_params.template modify(); - -// return cutone; } template void PairMACEKokkos::allocate() { -std::cout << "hello from kokkos allocate" << std::endl; PairMACE::allocate(); - int n = atom->ntypes + 1; - //MemKK::realloc_kokkos(d_map, "pace:map", n); - MemKK::realloc_kokkos(k_cutsq, "mace:cutsq", n, n); d_cutsq = k_cutsq.template view(); - - // TODO: from lj_cut, possibly delete - //int n = atom->ntypes; - //memory->destroy(cutsq); - //memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); - //d_cutsq = k_cutsq.template view(); - //k_params = Kokkos::DualView("PairLJCut::params",n+1,n+1); - //params = k_params.template view(); } namespace LAMMPS_NS { diff --git a/src/KOKKOS/pair_mace_kokkos.h b/src/KOKKOS/pair_mace_kokkos.h index 72b041f9e90..e2a40fe1902 100644 --- a/src/KOKKOS/pair_mace_kokkos.h +++ b/src/KOKKOS/pair_mace_kokkos.h @@ -39,7 +39,6 @@ class PairMACEKokkos : public PairMACE { public: - //enum {EnabledNeighFlags=FULL}; typedef DeviceType device_type; typedef ArrayTypes AT; PairMACEKokkos(class LAMMPS *); @@ -52,17 +51,7 @@ class PairMACEKokkos : public PairMACE { protected: - // kokkos stuff int host_flag; - int neighflag; - //typename AT::t_x_array_randomread x; - //typename AT::t_x_array c_x; - //typename AT::t_f_array f; - //typename AT::t_int_1d_randomread type; - - - //Kokkos::View k_positions; - typedef Kokkos::DualView tdual_fparams; tdual_fparams k_cutsq; typedef Kokkos::View t_fparams; From ae8d33b1fdba40bc4559fb796db2c0c3dbbcf7af Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Fri, 21 Apr 2023 18:04:19 +0100 Subject: [PATCH 30/51] move some things out of compute. --- src/KOKKOS/pair_mace_kokkos.cpp | 37 +++++++++++++++++++-------------- src/KOKKOS/pair_mace_kokkos.h | 5 +++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index c79eed1b73a..c8e33988f30 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -98,19 +98,9 @@ void PairMACEKokkos::compute(int eflag, int vflag) auto hinv4 = domain->h_inv[4]; auto hinv5 = domain->h_inv[5]; - auto k_lammps_atomic_numbers = Kokkos::View("k_lammps_atomic_numbers",lammps_atomic_numbers.size()); - auto k_lammps_atomic_numbers_mirror = Kokkos::create_mirror_view(k_lammps_atomic_numbers); - for (int i=0; i("k_mace_atomic_numbers",mace_atomic_numbers.size()); - auto k_mace_atomic_numbers_mirror = Kokkos::create_mirror_view(k_mace_atomic_numbers); - for (int i=0; imap_style; @@ -272,14 +262,14 @@ void PairMACEKokkos::compute(int eflag, int vflag) // ----- node_attrs ----- // node_attrs is one-hot encoding for atomic numbers - int n_node_feats = mace_atomic_numbers_size; + int n_node_feats = _mace_atomic_numbers_size; auto k_node_attrs = Kokkos::View("k_node_attrs", n_nodes, n_node_feats); Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA(const int ii) { const int i = d_ilist(ii); const int lammps_type = type(i); int t = -1; - for (int j=0; j::coeff(int narg, char **arg) { if (!allocated) allocate(); PairMACE::coeff(narg,arg); + + // new + k_lammps_atomic_numbers = Kokkos::View("k_lammps_atomic_numbers",lammps_atomic_numbers.size()); + auto k_lammps_atomic_numbers_mirror = Kokkos::create_mirror_view(k_lammps_atomic_numbers); + for (int i=0; i("k_mace_atomic_numbers",mace_atomic_numbers.size()); + auto k_mace_atomic_numbers_mirror = Kokkos::create_mirror_view(k_mace_atomic_numbers); + for (int i=0; i diff --git a/src/KOKKOS/pair_mace_kokkos.h b/src/KOKKOS/pair_mace_kokkos.h index e2a40fe1902..8aa0a8e62b4 100644 --- a/src/KOKKOS/pair_mace_kokkos.h +++ b/src/KOKKOS/pair_mace_kokkos.h @@ -57,6 +57,11 @@ class PairMACEKokkos : public PairMACE { typedef Kokkos::View t_fparams; t_fparams d_cutsq; + // new + Kokkos::View k_lammps_atomic_numbers; + Kokkos::View k_mace_atomic_numbers; + int mace_atomic_numbers_size; + }; } // namespace LAMMPS_NS From 11150d52f87897445b459b28804bfbb0d0a1c802 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Wed, 26 Apr 2023 11:30:11 +0100 Subject: [PATCH 31/51] add labels to kokkos kernels. --- src/KOKKOS/pair_mace_kokkos.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index c8e33988f30..2152a122fff 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -127,7 +127,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) n_nodes = atom->nlocal; } auto k_positions = Kokkos::View("k_positions", n_nodes); - Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA (const int i) { + Kokkos::parallel_for("PairMACEKokkos: Fill k_positions.", n_nodes, KOKKOS_LAMBDA (const int i) { k_positions(i,0) = x(i,0); k_positions(i,1) = x(i,1); k_positions(i,2) = x(i,2); @@ -153,7 +153,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) // ----- edge_index and unit_shifts ----- // count total number of edges auto k_n_edges_vec = Kokkos::View("k_n_edges_vec", n_nodes); - Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA (const int ii) { + Kokkos::parallel_for("PairMACEKokkos: Fill k_n_edges_vec.", n_nodes, KOKKOS_LAMBDA (const int ii) { const int i = d_ilist(ii); const double xtmp = x(i,0); const double ytmp = x(i,1); @@ -173,13 +173,13 @@ void PairMACEKokkos::compute(int eflag, int vflag) // WARNING: if n_edges remains 0 (e.g., because atoms too far apart) // strange things happen on gpu int64_t n_edges = 0; - Kokkos::parallel_reduce(n_nodes, KOKKOS_LAMBDA(const int ii, int64_t& n_edges) { + Kokkos::parallel_reduce("PairMACEKokkos: Determine n_edges.", n_nodes, KOKKOS_LAMBDA(const int ii, int64_t& n_edges) { n_edges += k_n_edges_vec(ii); }, n_edges); // make first_edge vector to help with parallelizing following loop auto k_first_edge = Kokkos::View("k_first_edge", n_nodes); // initialized to zero // TODO: this is serial to avoid race ... is there something better? - Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i) { + Kokkos::parallel_for("PairMACEKokkos: Fill k_first_edge.", 1, KOKKOS_LAMBDA(const int i) { for (int ii=0; ii::compute(int eflag, int vflag) if (domain_decomposition) { - Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA(const int ii) { + Kokkos::parallel_for("PairMACEKokkos: Fill edge_index (using domain decomposition).", n_nodes, KOKKOS_LAMBDA(const int ii) { const int i = d_ilist(ii); const double xtmp = x(i,0); const double ytmp = x(i,1); @@ -213,7 +213,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) } else { - Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA(const int ii) { + Kokkos::parallel_for("PairMACEKokkos: Fill edge_index (no domain decomposition).", n_nodes, KOKKOS_LAMBDA(const int ii) { const int i = d_ilist(ii); const double xtmp = x(i,0); const double ytmp = x(i,1); @@ -264,7 +264,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) // node_attrs is one-hot encoding for atomic numbers int n_node_feats = _mace_atomic_numbers_size; auto k_node_attrs = Kokkos::View("k_node_attrs", n_nodes, n_node_feats); - Kokkos::parallel_for(n_nodes, KOKKOS_LAMBDA(const int ii) { + Kokkos::parallel_for("PairMACEKokkos: Fill k_node_attrs.", n_nodes, KOKKOS_LAMBDA(const int ii) { const int i = d_ilist(ii); const int lammps_type = type(i); int t = -1; @@ -282,7 +282,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) // ----- mask for ghost ----- Kokkos::View k_mask("k_mask", n_nodes); - Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int ii) { + Kokkos::parallel_for("PairMACEKokkos: Fill k_mask.", nlocal, KOKKOS_LAMBDA(const int ii) { const int i = d_ilist(ii); k_mask(i) = true; }); @@ -338,7 +338,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) auto node_energy_ptr = static_cast(node_energy.data_ptr()); auto k_node_energy = Kokkos::View>(node_energy_ptr,n_nodes); eng_vdwl = 0.0; - Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int ii, double &eng_vdwl) { + Kokkos::parallel_reduce("PairMACEKokkos: Accumulate site energies.", nlocal, KOKKOS_LAMBDA(const int ii, double &eng_vdwl) { const int i = d_ilist(ii); eng_vdwl += k_node_energy(i); }, eng_vdwl); @@ -349,7 +349,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) forces = output.at("forces").toTensor(); auto forces_ptr = static_cast(forces.data_ptr()); auto k_forces = Kokkos::View>(forces_ptr,n_nodes); - Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(const int ii) { + Kokkos::parallel_for("PairMACEKokkos: Extract k_forces.", nlocal, KOKKOS_LAMBDA(const int ii) { const int i = d_ilist(ii); f(i,0) = k_forces(i,0); f(i,1) = k_forces(i,1); From 4195b189d9bb1c6f85e6b359d82f8e1311750b06 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 30 May 2023 08:11:19 +0100 Subject: [PATCH 32/51] improvements. --- src/KOKKOS/pair_mace_kokkos.cpp | 46 ++++++++++++--------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index 2152a122fff..37ed8716ce4 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -31,7 +31,6 @@ #include "update.h" #include -#include #include using namespace LAMMPS_NS; @@ -74,14 +73,12 @@ void PairMACEKokkos::compute(int eflag, int vflag) auto d_neighbors = k_list->d_neighbors; auto d_ilist = k_list->d_ilist; - if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); - if (domain_decomposition) { - if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); - } - - if (eflag_atom || vflag_atom) { - error->all(FLERR, "ERROR: kokkos eflag_atom not implemented."); - } + if (atom->nlocal != list->inum) + error->all(FLERR, "ERROR: nlocal != inum."); + if (domain_decomposition && (atom->nghost != list->gnum)) + error->all(FLERR, "ERROR: nghost != gnum."); + if (eflag_atom || vflag_atom) + error->all(FLERR, "ERROR: mace/kokkos eflag_atom and/or vflag_atom not implemented."); int nlocal = atom->nlocal; auto r_max_squared = this->r_max_squared; @@ -122,8 +119,8 @@ void PairMACEKokkos::compute(int eflag, int vflag) } else { // normally, ghost atoms are included in the graph as independent // nodes, as required when the local domain does not have PBC. - // however, in no_domain_decomposition mode, ghost atoms are known to - // be shifted versions of local atoms. + // however, in no_domain_decomposition mode, ghost atoms are simply + // shifted versions of local atoms. n_nodes = atom->nlocal; } auto k_positions = Kokkos::View("k_positions", n_nodes); @@ -314,22 +311,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); - -//std::cout << "batch" << batch.to("cpu") << std::endl; -//std::cout << "cell" << cell.to("cpu") << std::endl; -//std::cout << "edge_index" << edge_index.to("cpu") << std::endl; -//std::cout << "node_attrs" << node_attrs.to("cpu") << std::endl; -//std::cout << "positions" << positions.to("cpu") << std::endl; -//std::cout << "ptr" << ptr.to("cpu") << std::endl; -//std::cout << "shifts" << shifts.to("cpu") << std::endl; -//std::cout << "unit_shifts" << unit_shifts.to("cpu") << std::endl; -//std::cout << "weight" << weight.to("cpu") << std::endl; -//std::cout << "mask" << mask.to("cpu") << std::endl; - - auto output = model.forward({input, mask, true, true, false}).toGenericDict(); - -//std::cout << "energy: " << output.at("energy").toTensor().to("cpu") << std::endl; -//std::cout << "node_energy: " << output.at("node_energy").toTensor().to("cpu") << std::endl; + auto output = model.forward({input, mask, true, bool(vflag_global), false}).toGenericDict(); // mace energy // -> sum of site energies of local atoms @@ -359,14 +341,20 @@ void PairMACEKokkos::compute(int eflag, int vflag) // mace virials (local atoms only) // -> derivatives of sum of site energies of local atoms if (vflag_global) { + // TODO: is this cpu transfer necessary? auto vir = output.at("virials").toTensor().to("cpu"); + // caution: lammps does not use voigt ordering virial[0] = vir[0][0][0].item(); virial[1] = vir[0][1][1].item(); virial[2] = vir[0][2][2].item(); - virial[3] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); + virial[3] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); - virial[5] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); + virial[5] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); } + + // TODO: investigate this + // Appears to be important for dumps and probably more + atomKK->modified(execution_space,F_MASK); } /* ---------------------------------------------------------------------- */ From eb867b146010b2f1a9607f7297ff0472187eac9d Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 30 May 2023 08:26:01 +0100 Subject: [PATCH 33/51] mergefix. --- cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 66963a7a566..e3fc660d17a 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -521,7 +521,7 @@ else() endif() foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF - PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM LATTE MSCG COMPRESS ML-PACE ML-MACE LEPTON) + PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM MSCG COMPRESS ML-PACE ML-MACE LEPTON) if(PKG_${PKG_WITH_INCL}) include(Packages/${PKG_WITH_INCL}) endif() From 22eb83687ce56da8d9db26a78533219827f1496f Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 30 May 2023 09:22:31 +0100 Subject: [PATCH 34/51] delete debug code. --- src/ML-MACE/pair_mace.cpp | 102 ++------------------------------------ 1 file changed, 5 insertions(+), 97 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index dd6821c6397..001bc8213a2 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -30,10 +30,6 @@ #include #include -#ifdef MACE_DEBUG -#include -#endif - using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ @@ -55,16 +51,6 @@ void PairMACE::compute(int eflag, int vflag) { ev_init(eflag, vflag); -std::cout << " " << std::endl; - - #ifdef MACE_DEBUG - std::chrono::time_point t0, t1; - #endif - - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif - if (atom->nlocal != list->inum) error->all(FLERR, "ERROR: nlocal != inum."); if (domain_decomposition) { if (atom->nghost != list->gnum) error->all(FLERR, "ERROR: nghost != gnum."); @@ -102,15 +88,6 @@ std::cout << " " << std::endl; cell[2][1] = domain->h[3]; cell[2][2] = domain->h[2]; - #ifdef MACE_DEBUG - t1 = std::chrono::high_resolution_clock::now(); - std::cout << "positions+cell: " << std::chrono::duration_cast(t1-t0).count() << std::endl; - #endif - - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif - // ----- edge_index and unit_shifts ----- // count total number of edges int n_edges = 0; @@ -142,13 +119,6 @@ std::cout << " " << std::endl; for (int ii=0; ii(t1-t0).count() << std::endl; - #endif - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif // fill edge_index and unit_shifts tensors auto edge_index = torch::empty({2,n_edges}, torch::dtype(torch::kInt64)); auto unit_shifts = torch::zeros({n_edges,3}, torch_float_dtype); @@ -193,14 +163,6 @@ std::cout << " " << std::endl; } } } - #ifdef MACE_DEBUG - t1 = std::chrono::high_resolution_clock::now(); - std::cout << "edge fill: " << std::chrono::duration_cast(t1-t0).count() << std::endl; - #endif - - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif // ----- node_attrs ----- // node_attrs is one-hot encoding for atomic numbers @@ -221,15 +183,6 @@ std::cout << " " << std::endl; node_attrs[i][mace_type(atom->type[i])-1] = 1.0; } - #ifdef MACE_DEBUG - t1 = std::chrono::high_resolution_clock::now(); - std::cout << "node attrs: " << std::chrono::duration_cast(t1-t0).count() << std::endl; - #endif - - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif - // ----- mask for ghost ----- auto mask = torch::zeros(n_nodes, torch::dtype(torch::kBool)); #pragma omp parallel for @@ -247,15 +200,7 @@ std::cout << " " << std::endl; ptr[1] = n_nodes; weight[0] = 1.0; - #ifdef MACE_DEBUG - t1 = std::chrono::high_resolution_clock::now(); - std::cout << "other setup: " << std::chrono::duration_cast(t1-t0).count() << std::endl; - #endif - // transfer data to device - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif batch = batch.to(device); cell = cell.to(device); edge_index = edge_index.to(device); @@ -267,15 +212,8 @@ std::cout << " " << std::endl; shifts = shifts.to(device); unit_shifts = unit_shifts.to(device); weight = weight.to(device); - #ifdef MACE_DEBUG - t1 = std::chrono::high_resolution_clock::now(); - std::cout << "transfer: " << std::chrono::duration_cast(t1-t0).count() << std::endl; - #endif - - // pack the input, call the model, extract the output - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif + + // pack the input, call the model c10::Dict input; input.insert("batch", batch); input.insert("cell", cell); @@ -288,38 +226,12 @@ std::cout << " " << std::endl; input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); - #ifdef MACE_DEBUG - t1 = std::chrono::high_resolution_clock::now(); - std::cout << "pack: " << std::chrono::duration_cast(t1-t0).count() << std::endl; - #endif - - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif auto output = model.forward({input, mask.to(device), true, true, false}).toGenericDict(); - #ifdef MACE_DEBUG - t1 = std::chrono::high_resolution_clock::now(); - std::cout << "model: " << std::chrono::duration_cast(t1-t0).count() << std::endl; - #endif - - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif - auto node_energy = output.at("node_energy").toTensor().cpu(); - forces = output.at("forces").toTensor().cpu(); - auto vir = output.at("virials").toTensor().cpu(); - #ifdef MACE_DEBUG - t1 = std::chrono::high_resolution_clock::now(); - std::cout << "transfer back: " << std::chrono::duration_cast(t1-t0).count() << std::endl; - #endif - - #ifdef MACE_DEBUG - t0 = std::chrono::high_resolution_clock::now(); - #endif // mace energy // -> sum of site energies of local atoms if (eflag_global) { + auto node_energy = output.at("node_energy").toTensor().cpu(); eng_vdwl = 0.0; #pragma omp parallel for reduction(+:eng_vdwl) for (int ii=0; iiinum; ++ii) { @@ -330,6 +242,7 @@ std::cout << " " << std::endl; // mace forces // -> derivatives of total mace energy + forces = output.at("forces").toTensor().cpu(); #pragma omp parallel for for (int ii=0; iiinum; ++ii) { int i = list->ilist[ii]; @@ -351,6 +264,7 @@ std::cout << " " << std::endl; // mace virials (local atoms only) // -> derivatives of sum of site energies of local atoms if (vflag_global) { + auto vir = output.at("virials").toTensor().cpu(); virial[0] = vir[0][0][0].item(); virial[1] = vir[0][1][1].item(); virial[2] = vir[0][2][2].item(); @@ -364,12 +278,6 @@ std::cout << " " << std::endl; if (vflag_atom) { error->all(FLERR, "ERROR: pair_mace does not support vflag_atom."); } - - #ifdef MACE_DEBUG - t1 = std::chrono::high_resolution_clock::now(); - std::cout << "unpack: " << std::chrono::duration_cast(t1-t0).count() << std::endl; - #endif - } /* ---------------------------------------------------------------------- */ From 7d0543f3598a0cdc44960af5eec983915c97c624 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Tue, 30 May 2023 15:13:00 +0100 Subject: [PATCH 35/51] tidy. --- cmake/Modules/Packages/ML-MACE.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/Modules/Packages/ML-MACE.cmake b/cmake/Modules/Packages/ML-MACE.cmake index 4d04b762a1a..a1624a370ef 100644 --- a/cmake/Modules/Packages/ML-MACE.cmake +++ b/cmake/Modules/Packages/ML-MACE.cmake @@ -5,4 +5,3 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") target_link_libraries(lammps PRIVATE "${TORCH_LIBRARIES}") set_property(TARGET lammps PROPERTY CXX_STANDARD 14) -add_compile_definitions(MACE_DEBUG) From f96283e0d362d61dd25d93828018bb5bb6d71dcd Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Wed, 31 May 2023 17:29:41 +0100 Subject: [PATCH 36/51] mergefix. --- src/ML-MACE/pair_mace.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 001bc8213a2..20bdd287849 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -231,13 +231,8 @@ void PairMACE::compute(int eflag, int vflag) // mace energy // -> sum of site energies of local atoms if (eflag_global) { - auto node_energy = output.at("node_energy").toTensor().cpu(); - eng_vdwl = 0.0; - #pragma omp parallel for reduction(+:eng_vdwl) - for (int ii=0; iiinum; ++ii) { - int i = list->ilist[ii]; - eng_vdwl += node_energy[i].item(); - } + energy = output.at("total_energy_local").toTensor().cpu(); + eng_vdwl += energy.item(); } // mace forces @@ -254,6 +249,7 @@ void PairMACE::compute(int eflag, int vflag) // mace site energies // -> local atoms only if (eflag_atom) { + auto node_energy = output.at("node_energy").toTensor().cpu(); #pragma omp parallel for for (int ii=0; iiinum; ++ii) { int i = list->ilist[ii]; From 1350adb93cc56eb2e1e12db1766cc61e504e8e45 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Wed, 31 May 2023 18:00:37 +0100 Subject: [PATCH 37/51] detect gpu automatically. --- src/ML-MACE/pair_mace.cpp | 15 ++++----------- src/ML-MACE/pair_mace.h | 1 - 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 20bdd287849..614f0c1ed47 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -280,18 +280,12 @@ void PairMACE::compute(int eflag, int vflag) void PairMACE::settings(int narg, char **arg) { - if (narg > 2) { + if (narg > 1) { error->all(FLERR, "Too many pair_style arguments for pair_style mace."); } - if (narg >= 1) { - if (strcmp(arg[0], "gpu") == 0) { - device_type = "gpu"; - } - } - - if (narg >= 2) { - if (strcmp(arg[1], "no_domain_decomposition") == 0) { + if (narg == 1) { + if (strcmp(arg[0], "no_domain_decomposition") == 0) { domain_decomposition = false; // TODO: add check against MPI rank } @@ -306,12 +300,11 @@ void PairMACE::coeff(int narg, char **arg) if (!allocated) allocate(); - if (device_type == "cpu") { + if (!torch::cuda::is_available()) { device = c10::Device(torch::kCPU); } else { int rank; MPI_Comm_rank(world, &rank); - std::cout << "MPI rank: " << rank << std::endl; device = c10::Device(torch::kCUDA,rank); } diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h index 9616d18286e..57b60e2bb17 100644 --- a/src/ML-MACE/pair_mace.h +++ b/src/ML-MACE/pair_mace.h @@ -47,7 +47,6 @@ class PairMACE : public Pair { protected: - std::string device_type = "cpu"; bool domain_decomposition = true; torch::Device device = torch::kCPU; torch::jit::script::Module model; From 38500ff5c5adeb47fafa2b367cf6777b3167928f Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Fri, 2 Jun 2023 10:30:09 +0100 Subject: [PATCH 38/51] bugfix. --- src/KOKKOS/pair_mace_kokkos.cpp | 2 +- src/ML-MACE/pair_mace.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index 37ed8716ce4..27e509164b6 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -311,7 +311,7 @@ void PairMACEKokkos::compute(int eflag, int vflag) input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); - auto output = model.forward({input, mask, true, bool(vflag_global), false}).toGenericDict(); + auto output = model.forward({input, mask, bool(vflag_global)}).toGenericDict(); // mace energy // -> sum of site energies of local atoms diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 614f0c1ed47..de97351c8ef 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -226,7 +226,7 @@ void PairMACE::compute(int eflag, int vflag) input.insert("shifts", shifts); input.insert("unit_shifts", unit_shifts); input.insert("weight", weight); - auto output = model.forward({input, mask.to(device), true, true, false}).toGenericDict(); + auto output = model.forward({input, mask.to(device), bool(vflag_global)}).toGenericDict(); // mace energy // -> sum of site energies of local atoms From 5dbc52d8a3e9b5ef321d2f157c052bb3812814fb Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Wed, 7 Jun 2023 23:23:41 +0100 Subject: [PATCH 39/51] add cuda message. --- src/ML-MACE/pair_mace.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index de97351c8ef..e44645bdd4a 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -301,8 +301,10 @@ void PairMACE::coeff(int narg, char **arg) if (!allocated) allocate(); if (!torch::cuda::is_available()) { + std::cout << "CUDA unavailable, setting device type to torch::kCPU." << std::endl; device = c10::Device(torch::kCPU); } else { + std::cout << "CUDA found, setting device type to torch::kCUDA." << std::endl; int rank; MPI_Comm_rank(world, &rank); device = c10::Device(torch::kCUDA,rank); From 4995a4023ee71819bb90b7699c1e1849dc7128df Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 10 Jul 2023 15:15:15 +0100 Subject: [PATCH 40/51] update for domain decomposition with new lammps wrapper. --- src/KOKKOS/pair_mace_kokkos.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index 27e509164b6..c9408c54e45 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -331,11 +331,11 @@ void PairMACEKokkos::compute(int eflag, int vflag) forces = output.at("forces").toTensor(); auto forces_ptr = static_cast(forces.data_ptr()); auto k_forces = Kokkos::View>(forces_ptr,n_nodes); - Kokkos::parallel_for("PairMACEKokkos: Extract k_forces.", nlocal, KOKKOS_LAMBDA(const int ii) { + Kokkos::parallel_for("PairMACEKokkos: Extract k_forces.", n_nodes, KOKKOS_LAMBDA(const int ii) { const int i = d_ilist(ii); - f(i,0) = k_forces(i,0); - f(i,1) = k_forces(i,1); - f(i,2) = k_forces(i,2); + f(i,0) += k_forces(i,0); + f(i,1) += k_forces(i,1); + f(i,2) += k_forces(i,2); }); // mace virials (local atoms only) @@ -344,12 +344,12 @@ void PairMACEKokkos::compute(int eflag, int vflag) // TODO: is this cpu transfer necessary? auto vir = output.at("virials").toTensor().to("cpu"); // caution: lammps does not use voigt ordering - virial[0] = vir[0][0][0].item(); - virial[1] = vir[0][1][1].item(); - virial[2] = vir[0][2][2].item(); - virial[3] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); - virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); - virial[5] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); + virial[0] += vir[0][0][0].item(); + virial[1] += vir[0][1][1].item(); + virial[2] += vir[0][2][2].item(); + virial[3] += 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); + virial[4] += 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); + virial[5] += 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); } // TODO: investigate this From 8b3243853a0790a3edab3142dd3a7f4c20640003 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 10 Jul 2023 15:29:51 +0100 Subject: [PATCH 41/51] same update for non-kokkos style. --- src/ML-MACE/pair_mace.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index e44645bdd4a..e5bd0b8bbef 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -239,11 +239,11 @@ void PairMACE::compute(int eflag, int vflag) // -> derivatives of total mace energy forces = output.at("forces").toTensor().cpu(); #pragma omp parallel for - for (int ii=0; iiinum; ++ii) { + for (int ii=0; iiilist[ii]; - atom->f[i][0] = forces[i][0].item(); - atom->f[i][1] = forces[i][1].item(); - atom->f[i][2] = forces[i][2].item(); + atom->f[i][0] += forces[i][0].item(); + atom->f[i][1] += forces[i][1].item(); + atom->f[i][2] += forces[i][2].item(); } // mace site energies @@ -261,12 +261,12 @@ void PairMACE::compute(int eflag, int vflag) // -> derivatives of sum of site energies of local atoms if (vflag_global) { auto vir = output.at("virials").toTensor().cpu(); - virial[0] = vir[0][0][0].item(); - virial[1] = vir[0][1][1].item(); - virial[2] = vir[0][2][2].item(); - virial[3] = 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); - virial[4] = 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); - virial[5] = 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); + virial[0] += vir[0][0][0].item(); + virial[1] += vir[0][1][1].item(); + virial[2] += vir[0][2][2].item(); + virial[3] += 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); + virial[4] += 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); + virial[5] += 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); } // mace site virials From dd850990aa29f23925909940412c8040fd1a10fd Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 10 Jul 2023 17:26:23 +0100 Subject: [PATCH 42/51] enable multi-node. --- src/ML-MACE/pair_mace.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index e5bd0b8bbef..a9290034254 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -305,9 +305,13 @@ void PairMACE::coeff(int narg, char **arg) device = c10::Device(torch::kCPU); } else { std::cout << "CUDA found, setting device type to torch::kCUDA." << std::endl; - int rank; - MPI_Comm_rank(world, &rank); - device = c10::Device(torch::kCUDA,rank); + //int worldrank; + //MPI_Comm_rank(world, &worldrank); + MPI_Comm local; + MPI_Comm_split_type(world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local); + int localrank; + MPI_Comm_rank(local, &localrank); + device = c10::Device(torch::kCUDA,localrank); } std::cout << "Loading MACE model from \"" << arg[2] << "\" ..."; From 11ef3c412ff1c394eefe81aa8a2ff9f00546f091 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 20 Nov 2023 19:46:34 +0000 Subject: [PATCH 43/51] check pair_style input. --- src/ML-MACE/pair_mace.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index a9290034254..d7ccfdbfeff 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -288,6 +288,8 @@ void PairMACE::settings(int narg, char **arg) if (strcmp(arg[0], "no_domain_decomposition") == 0) { domain_decomposition = false; // TODO: add check against MPI rank + } else { + error->all(FLERR, "Unrecognized argument for pair_style mace."); } } } From 344780a91dc05ac6514cd71708aae3e76ab4bbba Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 20 Nov 2023 20:25:50 +0000 Subject: [PATCH 44/51] bugfix affecting cases when lammps has fewer atom types than mace. --- src/KOKKOS/pair_mace_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index c9408c54e45..8dc3c25c797 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -374,7 +374,7 @@ void PairMACEKokkos::coeff(int narg, char **arg) Kokkos::deep_copy(k_lammps_atomic_numbers, k_lammps_atomic_numbers_mirror); k_mace_atomic_numbers = Kokkos::View("k_mace_atomic_numbers",mace_atomic_numbers.size()); auto k_mace_atomic_numbers_mirror = Kokkos::create_mirror_view(k_mace_atomic_numbers); - for (int i=0; i Date: Mon, 20 Nov 2023 21:03:09 +0000 Subject: [PATCH 45/51] delete outdated. --- src/ML-MACE/pair_mace.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index d7ccfdbfeff..1dd37a324ea 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -307,8 +307,6 @@ void PairMACE::coeff(int narg, char **arg) device = c10::Device(torch::kCPU); } else { std::cout << "CUDA found, setting device type to torch::kCUDA." << std::endl; - //int worldrank; - //MPI_Comm_rank(world, &worldrank); MPI_Comm local; MPI_Comm_split_type(world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local); int localrank; From 2de384d48c9047ca2c9f3693c1fcb72397a1c74a Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 20 Nov 2023 23:26:21 +0000 Subject: [PATCH 46/51] print more info about type mapping. --- src/ML-MACE/pair_mace.cpp | 28 +++++++++++++++++----------- src/ML-MACE/pair_mace.h | 1 + 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 1dd37a324ea..abfcba5bd76 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -165,16 +165,6 @@ void PairMACE::compute(int eflag, int vflag) } // ----- node_attrs ----- - // node_attrs is one-hot encoding for atomic numbers - auto mace_type = [this](int lammps_type) { - for (int i=0; iall(FLERR, "ERROR: problem converting lammps_type to mace_type."); - return -1; - }; int n_node_feats = mace_atomic_numbers.size(); auto node_attrs = torch::zeros({n_nodes,n_node_feats}, torch_float_dtype); #pragma omp parallel for @@ -343,7 +333,7 @@ void PairMACE::coeff(int narg, char **arg) for (int i=0; i()); } - std::cout << " - The model atomic numbers are: " << mace_atomic_numbers << "." << std::endl; + std::cout << " - The MACE model atomic numbers are: " << mace_atomic_numbers << "." << std::endl; // extract atomic numbers from pair_coeff for (int i=3; intypes+1; i++) for (int j=i; jntypes+1; j++) setflag[i][j] = 1; @@ -396,3 +391,14 @@ void PairMACE::allocate() memory->create(cutsq, atom->ntypes+1, atom->ntypes+1, "pair:cutsq"); } + +int PairMACE::mace_type(int lammps_type) +{ + for (int i=0; iall(FLERR, "Problem converting lammps_type to mace_type."); + return -1; + } diff --git a/src/ML-MACE/pair_mace.h b/src/ML-MACE/pair_mace.h index 57b60e2bb17..4c8eadf86d6 100644 --- a/src/ML-MACE/pair_mace.h +++ b/src/ML-MACE/pair_mace.h @@ -56,6 +56,7 @@ class PairMACE : public Pair { int64_t num_interactions; std::vector mace_atomic_numbers; std::vector lammps_atomic_numbers; + int mace_type(int lammps_type); const std::array periodic_table = { "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", From 9988368f0e7fe7ed02cc8d3025794af3978b8073 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Sun, 10 Dec 2023 00:57:26 +0000 Subject: [PATCH 47/51] bugfix. --- src/ML-MACE/pair_mace.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index abfcba5bd76..146262fafff 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -344,7 +344,8 @@ void PairMACE::coeff(int narg, char **arg) std::cout << " - The pair_coeff atomic numbers are: " << lammps_atomic_numbers << "." << std::endl; for (int i=1; i<=lammps_atomic_numbers.size(); ++i) { - std::cout << " - Mapping LAMMPS type " << i << " (" << periodic_table[i-1] + std::cout << " - Mapping LAMMPS type " << i + << " (" << periodic_table[lammps_atomic_numbers[i-1]-1] << ") to MACE type " << mace_type(i) << "." << std::endl; } From 184b6fdeccd0c430fd259e7467ca37d616632b63 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Thu, 1 Feb 2024 05:36:26 +0000 Subject: [PATCH 48/51] remove hardcoded c++14. better to set at command line. --- cmake/Modules/Packages/ML-MACE.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/Modules/Packages/ML-MACE.cmake b/cmake/Modules/Packages/ML-MACE.cmake index a1624a370ef..e2956ae5984 100644 --- a/cmake/Modules/Packages/ML-MACE.cmake +++ b/cmake/Modules/Packages/ML-MACE.cmake @@ -4,4 +4,3 @@ find_package(Torch REQUIRED) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") target_link_libraries(lammps PRIVATE "${TORCH_LIBRARIES}") -set_property(TARGET lammps PROPERTY CXX_STANDARD 14) From 2a22b697874ce8ef4cbb316064ca4a95249d6b5a Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 28 Oct 2024 04:02:04 -0400 Subject: [PATCH 49/51] Add domain header. --- src/KOKKOS/pair_mace_kokkos.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index 8dc3c25c797..406e7ff4b45 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -20,6 +20,7 @@ #include "atom_kokkos.h" #include "atom_masks.h" +#include "domain.h" #include "error.h" #include "force.h" #include "kokkos.h" From f51963aada27c5df4a87a37faec5a4125d0f2e82 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 28 Oct 2024 04:03:44 -0400 Subject: [PATCH 50/51] Change item to template item. --- src/KOKKOS/pair_mace_kokkos.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/pair_mace_kokkos.cpp b/src/KOKKOS/pair_mace_kokkos.cpp index 406e7ff4b45..b984b5fc210 100644 --- a/src/KOKKOS/pair_mace_kokkos.cpp +++ b/src/KOKKOS/pair_mace_kokkos.cpp @@ -345,12 +345,14 @@ void PairMACEKokkos::compute(int eflag, int vflag) // TODO: is this cpu transfer necessary? auto vir = output.at("virials").toTensor().to("cpu"); // caution: lammps does not use voigt ordering - virial[0] += vir[0][0][0].item(); - virial[1] += vir[0][1][1].item(); - virial[2] += vir[0][2][2].item(); - virial[3] += 0.5*(vir[0][1][0].item() + vir[0][0][1].item()); - virial[4] += 0.5*(vir[0][2][0].item() + vir[0][0][2].item()); - virial[5] += 0.5*(vir[0][2][1].item() + vir[0][1][2].item()); + // also: it would be nice to get rid of the 'template item' stuff, + // but some compilers seem to require it + virial[0] += vir[0][0][0].template item(); + virial[1] += vir[0][1][1].template item(); + virial[2] += vir[0][2][2].template item(); + virial[3] += 0.5*(vir[0][1][0].template item() + vir[0][0][1].template item()); + virial[4] += 0.5*(vir[0][2][0].template item() + vir[0][0][2].template item()); + virial[5] += 0.5*(vir[0][2][1].template item() + vir[0][1][2].template item()); } // TODO: investigate this From e55b3212da455af14f02772efe29b9e787e16eec Mon Sep 17 00:00:00 2001 From: Logan Ward Date: Thu, 24 Apr 2025 15:39:03 +0000 Subject: [PATCH 51/51] Rely on GPU pinning externally Avoids the need to compile with MPI support --- src/ML-MACE/pair_mace.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/ML-MACE/pair_mace.cpp b/src/ML-MACE/pair_mace.cpp index 146262fafff..9b7547653fc 100644 --- a/src/ML-MACE/pair_mace.cpp +++ b/src/ML-MACE/pair_mace.cpp @@ -297,11 +297,8 @@ void PairMACE::coeff(int narg, char **arg) device = c10::Device(torch::kCPU); } else { std::cout << "CUDA found, setting device type to torch::kCUDA." << std::endl; - MPI_Comm local; - MPI_Comm_split_type(world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local); - int localrank; - MPI_Comm_rank(local, &localrank); - device = c10::Device(torch::kCUDA,localrank); + int localrank = 0; // Assume GPU pinning occurs outside of LAMMPS + device = c10::Device(torch::kCUDA, localrank); } std::cout << "Loading MACE model from \"" << arg[2] << "\" ...";