From 73f03db5d640a245e8510604b7197c8221c7d83a Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 13 Nov 2024 17:27:53 +0000 Subject: [PATCH 1/9] Update ASE --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fc870cce..8de3a1eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ numpy = "^1.26" tqdm = "^4.66" pymongo = "^4.7.3" matplotlib = "^3.9" -ase = "3.22.1" +ase = "^3.23" lark = "^1.1.9" [tool.poetry.group.dev.dependencies] From 4a56b0b073fd38a89f53ef95b90436b12bc1d3dd Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 13 Nov 2024 18:46:58 +0000 Subject: [PATCH 2/9] Fix duplicate calculated results --- abcd/model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/abcd/model.py b/abcd/model.py index f4c87b61..cb9f135f 100644 --- a/abcd/model.py +++ b/abcd/model.py @@ -159,9 +159,10 @@ def from_atoms(cls, atoms: Atoms, extra_info=None, store_calc=True): } arrays_keys = set(atoms.arrays.keys()) info_keys = set(atoms.info.keys()) - results_keys = ( - set(atoms.calc.results.keys()) if store_calc and atoms.calc else {} - ) + if store_calc and atoms.calc: + results_keys = atoms.calc.results.keys() - (arrays_keys | info_keys) + else: + results_keys = {} all_keys = (reserved_keys, arrays_keys, info_keys, results_keys) if len(set.union(*all_keys)) != sum(map(len, all_keys)): From 93445cc4269fadde62994c727bd8def714fc7386 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 27 Nov 2024 18:11:10 +0000 Subject: [PATCH 3/9] Fix getting Atoms results --- abcd/model.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/abcd/model.py b/abcd/model.py index cb9f135f..ea8933a3 100644 --- a/abcd/model.py +++ b/abcd/model.py @@ -146,7 +146,9 @@ def __iter__(self): @classmethod def from_atoms(cls, atoms: Atoms, extra_info=None, store_calc=True): - """ASE's original implementation""" + """Extract data from Atoms info, arrays and results.""" + if not isinstance(atoms, Atoms): + raise ValueError("atoms must be an ASE Atoms object.") reserved_keys = { "n_atoms", @@ -157,12 +159,13 @@ def from_atoms(cls, atoms: Atoms, extra_info=None, store_calc=True): "derived", "formula", } + arrays_keys = set(atoms.arrays.keys()) info_keys = set(atoms.info.keys()) if store_calc and atoms.calc: results_keys = atoms.calc.results.keys() - (arrays_keys | info_keys) else: - results_keys = {} + results_keys = set() all_keys = (reserved_keys, arrays_keys, info_keys, results_keys) if len(set.union(*all_keys)) != sum(map(len, all_keys)): @@ -173,46 +176,47 @@ def from_atoms(cls, atoms: Atoms, extra_info=None, store_calc=True): n_atoms = len(atoms) - dct = { + data = { "n_atoms": n_atoms, "cell": atoms.cell.tolist(), "pbc": atoms.pbc.tolist(), "formula": atoms.get_chemical_formula(), } - info_keys.update({"n_atoms", "cell", "pbc", "formula"}) + info_keys.update(data.keys()) for key, value in atoms.arrays.items(): if isinstance(value, np.ndarray): - dct[key] = value.tolist() + data[key] = value.tolist() else: - dct[key] = value + data[key] = value for key, value in atoms.info.items(): if isinstance(value, np.ndarray): - dct[key] = value.tolist() + data[key] = value.tolist() else: - dct[key] = value + data[key] = value if store_calc and atoms.calc: - dct["calculator_name"] = atoms.calc.__class__.__name__ - dct["calculator_parameters"] = atoms.calc.todict() + data["calculator_name"] = atoms.calc.__class__.__name__ + data["calculator_parameters"] = atoms.calc.todict() info_keys.update({"calculator_name", "calculator_parameters"}) for key, value in atoms.calc.results.items(): - if isinstance(value, np.ndarray): if value.shape[0] == n_atoms: - arrays_keys.update(key) + arrays_keys.add(key) else: - info_keys.update(key) - dct[key] = value.tolist() + info_keys.add(key) + data[key] = value.tolist() + else: + data[key] = value item.arrays_keys = list(arrays_keys) item.info_keys = list(info_keys) item.results_keys = list(results_keys) - item.update(dct) + item.update(data) if extra_info: item.info_keys.extend(extra_info.keys()) From 0d1258700602db3997a2a07f127cfed650534023 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 28 Nov 2024 00:55:15 +0000 Subject: [PATCH 4/9] Fix derived data --- abcd/model.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/abcd/model.py b/abcd/model.py index ea8933a3..7f6854da 100644 --- a/abcd/model.py +++ b/abcd/model.py @@ -204,10 +204,6 @@ def from_atoms(cls, atoms: Atoms, extra_info=None, store_calc=True): for key, value in atoms.calc.results.items(): if isinstance(value, np.ndarray): - if value.shape[0] == n_atoms: - arrays_keys.add(key) - else: - info_keys.add(key) data[key] = value.tolist() else: data[key] = value @@ -261,14 +257,14 @@ def pre_save(self): if cell: volume = abs(np.linalg.det(cell)) # atoms.get_volume() - self["volume"] = volume self.derived_keys.append("volume") + self["volume"] = volume virial = self.get("virial") if virial: # pressure P = -1/3 Tr(stress) = -1/3 Tr(virials/volume) - self["pressure"] = -1 / 3 * np.trace(virial / volume) self.derived_keys.append("pressure") + self["pressure"] = -1 / 3 * np.trace(virial / volume) # 'elements': Counter(atoms.get_chemical_symbols()), self["elements"] = Counter(str(element) for element in self["numbers"]) From 27f44af261c8248c2cd99772e35eab1f541b6475 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 13 Nov 2024 18:29:05 +0000 Subject: [PATCH 5/9] Test extxyz file --- tests/test_abstract_model.py | 107 +++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 tests/test_abstract_model.py diff --git a/tests/test_abstract_model.py b/tests/test_abstract_model.py new file mode 100644 index 00000000..78964abe --- /dev/null +++ b/tests/test_abstract_model.py @@ -0,0 +1,107 @@ +import pytest + +from io import StringIO +from ase.io import read +import numpy as np + +from abcd.model import AbstractModel + + +@pytest.fixture +def extxyz_file(): + return StringIO( + """2 + Properties=species:S:1:pos:R:3:forces:R:3 energy=-1 pbc="F T F" + Si 0.0 0.0 0.0 0.4 0.6 -0.4 + Si 0.0 0.0 0.0 -0.1 -0.5 -0.6 + """ + ) + + +def test_from_atoms(extxyz_file): + """Test extracting data from ASE Atoms object.""" + expected_forces = np.array([[0.4, 0.6, -0.4], [-0.1, -0.5, -0.6]]) + expected_stress = np.array([-1.0, -1.0, -1.0, -2.1, 2.0, 1.8]) + + atoms = read(extxyz_file, format="extxyz") + atoms.calc.results["stress"] = expected_stress + data = AbstractModel.from_atoms(atoms) + + # Test info + info_keys = { + "pbc", + "n_atoms", + "cell", + "formula", + "calculator_name", + "calculator_parameters", + } + assert info_keys == set(data.info_keys) + assert data["pbc"] == [False, True, False] + assert data["n_atoms"] == 2 + assert len(data["cell"]) == 3 + assert all(arr == [0.0, 0.0, 0.0] for arr in data["cell"]) + assert data["formula"] == "Si2" + + # Test arrays + assert {"numbers", "positions"} == set(data.arrays_keys) + + # Test results + assert {"energy", "stress", "forces"} == set(data.results_keys) + assert data["energy"] == -1 + assert data["forces"] == pytest.approx(expected_forces) + assert data["stress"] == pytest.approx(expected_stress) + + # Test derived + derived_keys = { + "elements", + "username", + "uploaded", + "modified", + "volume", + "hash", + "hash_structure", + } + assert derived_keys == set(data.derived_keys) + + +def test_from_atoms_no_calc(extxyz_file): + """Test extracting data from ASE Atoms object without results.""" + expected_stress = np.array([-1.0, -1.0, -1.0, -2.1, 2.0, 1.8]) + + atoms = read(extxyz_file, format="extxyz") + atoms.calc.results["stress"] = expected_stress + data = AbstractModel.from_atoms(atoms, store_calc=False) + + # Test info + assert {"pbc", "n_atoms", "cell", "formula"} == set(data.info_keys) + assert data["pbc"] == [False, True, False] + assert data["n_atoms"] == 2 + assert len(data["cell"]) == 3 + assert all(arr == [0.0, 0.0, 0.0] for arr in data["cell"]) + assert data["formula"] == "Si2" + + # Test arrays + assert {"numbers", "positions"} == set(data.arrays_keys) + + # Test results + results_keys = { + "energy", + "forces", + "stress", + "calculator_name", + "calculator_parameters", + } + assert all(key not in data for key in results_keys) + + # Test derived + derived_keys = { + "elements", + "username", + "uploaded", + "modified", + "volume", + "hash", + "hash_structure", + } + assert derived_keys == set(data.derived_keys) From 0d7a352bd122d3f576e88bbd94194bce423590c3 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 28 Nov 2024 17:10:04 +0000 Subject: [PATCH 6/9] Fix to ASE function --- abcd/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/abcd/model.py b/abcd/model.py index 7f6854da..4b9dabfd 100644 --- a/abcd/model.py +++ b/abcd/model.py @@ -241,6 +241,7 @@ def to_ase(self): # atoms.calc = get_calculator(data['results']['calculator_name'])(**params) params = self.pop("calculator_parameters", {}) + info_keys -= {"calculator_parameters"} atoms.calc = SinglePointCalculator(atoms, **params) atoms.calc.results.update((key, self[key]) for key in results_keys) From 51cc5c02528952a53fda0bcdb1a35331414b9f8f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 28 Nov 2024 17:10:31 +0000 Subject: [PATCH 7/9] Test to ASE function --- tests/test_abstract_model.py | 50 ++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/tests/test_abstract_model.py b/tests/test_abstract_model.py index 78964abe..5d4d7c6e 100644 --- a/tests/test_abstract_model.py +++ b/tests/test_abstract_model.py @@ -11,7 +11,7 @@ def extxyz_file(): return StringIO( """2 - Properties=species:S:1:pos:R:3:forces:R:3 energy=-1 pbc="F T F" + Properties=species:S:1:pos:R:3:forces:R:3 energy=-1 pbc="F T F" info="test" Si 0.0 0.0 0.0 0.4 0.6 -0.4 Si 0.0 0.0 0.0 -0.1 -0.5 -0.6 """ @@ -35,6 +35,7 @@ def test_from_atoms(extxyz_file): "formula", "calculator_name", "calculator_parameters", + "info", } assert info_keys == set(data.info_keys) assert data["pbc"] == [False, True, False] @@ -42,6 +43,7 @@ def test_from_atoms(extxyz_file): assert len(data["cell"]) == 3 assert all(arr == [0.0, 0.0, 0.0] for arr in data["cell"]) assert data["formula"] == "Si2" + assert data["info"] == "test" # Test arrays assert {"numbers", "positions"} == set(data.arrays_keys) @@ -74,12 +76,13 @@ def test_from_atoms_no_calc(extxyz_file): data = AbstractModel.from_atoms(atoms, store_calc=False) # Test info - assert {"pbc", "n_atoms", "cell", "formula"} == set(data.info_keys) + assert {"pbc", "n_atoms", "cell", "formula", "info"} == set(data.info_keys) assert data["pbc"] == [False, True, False] assert data["n_atoms"] == 2 assert len(data["cell"]) == 3 assert all(arr == [0.0, 0.0, 0.0] for arr in data["cell"]) assert data["formula"] == "Si2" + assert data["info"] == "test" # Test arrays assert {"numbers", "positions"} == set(data.arrays_keys) @@ -105,3 +108,46 @@ def test_from_atoms_no_calc(extxyz_file): "hash_structure", } assert derived_keys == set(data.derived_keys) + + +def test_to_ase(extxyz_file): + """Test returning data to ASE Atoms object with results.""" + atoms = read(extxyz_file, format="extxyz") + data = AbstractModel.from_atoms(atoms, store_calc=True) + + new_atoms = data.to_ase() + + # Test info set + assert new_atoms.cell == pytest.approx(atoms.cell) + assert new_atoms.pbc == pytest.approx(atoms.pbc) + assert new_atoms.positions == pytest.approx(atoms.positions) + assert new_atoms.numbers == pytest.approx(atoms.numbers) + + assert new_atoms.info["n_atoms"] == len(atoms) + assert new_atoms.info["formula"] == atoms.get_chemical_formula() + + assert new_atoms.calc.results["energy"] == pytest.approx( + atoms.calc.results["energy"] + ) + assert new_atoms.calc.results["forces"] == pytest.approx( + atoms.calc.results["forces"] + ) + + +def test_to_ase_no_results(extxyz_file): + """Test returning data to ASE Atoms object without results.""" + atoms = read(extxyz_file, format="extxyz") + data = AbstractModel.from_atoms(atoms, store_calc=False) + + new_atoms = data.to_ase() + + # Test info set + assert new_atoms.cell == pytest.approx(atoms.cell) + assert new_atoms.pbc == pytest.approx(atoms.pbc) + assert new_atoms.positions == pytest.approx(atoms.positions) + assert new_atoms.numbers == pytest.approx(atoms.numbers) + + assert new_atoms.info["n_atoms"] == len(atoms) + assert new_atoms.info["formula"] == atoms.get_chemical_formula() + + assert new_atoms.calc is None From 38f232a8a3aa01b9ed25ee1e9e5e512e44d280d2 Mon Sep 17 00:00:00 2001 From: Tamas K Stenczel Date: Sun, 8 Dec 2024 10:15:02 +0000 Subject: [PATCH 8/9] additional tests --- tests/test_abstract_model.py | 105 ++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/tests/test_abstract_model.py b/tests/test_abstract_model.py index 5d4d7c6e..3c185ea5 100644 --- a/tests/test_abstract_model.py +++ b/tests/test_abstract_model.py @@ -1,10 +1,15 @@ +import io + +import ase import pytest +from pytest import approx from io import StringIO -from ase.io import read +from ase.io import read, write import numpy as np from abcd.model import AbstractModel +from ase.calculators.lj import LennardJones @pytest.fixture @@ -151,3 +156,101 @@ def test_to_ase_no_results(extxyz_file): assert new_atoms.info["formula"] == atoms.get_chemical_formula() assert new_atoms.calc is None + + +def test_from_atoms_len_atoms_3(): + atoms = ase.Atoms( + "H3", + positions=[[0, 0, 0], [0, 0, 1], [0, 1, 0]], + pbc=True, + cell=[2, 2, 2], + ) + atoms.calc = LennardJones() + atoms.calc.calculate(atoms) + + # convert + abcd_data = AbstractModel.from_atoms(atoms, store_calc=True) + + assert set(abcd_data.info_keys) == { + "pbc", + "n_atoms", + "cell", + "formula", + "calculator_name", + "calculator_parameters", + } + assert set(abcd_data.arrays_keys) == {"numbers", "positions"} + assert set(abcd_data.results_keys) == { + "stress", + "energy", + "forces", + "energies", + "stresses", + "free_energy", + } + + # check a some keys as well + assert abcd_data["energy"] == atoms.get_potential_energy() + assert abcd_data["forces"] == approx(atoms.get_forces()) + + +@pytest.mark.parametrize("store_calc", [True, False]) +def test_write_and_read(store_calc): + # create atoms & add a calculator + atoms = ase.Atoms( + "H3", + positions=[[0, 0, 0], [0, 0, 1], [0, 1, 0]], + pbc=True, + cell=[2, 2, 2], + ) + atoms.calc = LennardJones() + atoms.calc.calculate(atoms) + + # dump to XYZ + buffer = io.StringIO() + write(buffer, atoms, format="extxyz") + + # read back + buffer.seek(0) + atoms_read = read(buffer, format="extxyz") + + # read in both of them + abcd_data = AbstractModel.from_atoms(atoms, store_calc=store_calc) + abcd_data_after_read = AbstractModel.from_atoms(atoms_read, store_calc=store_calc) + + # check that all results are the same + for key in ["info_keys", "arrays_keys", "derived_keys", "results_keys"]: + assert set(getattr(abcd_data, key)) == set( + getattr(abcd_data_after_read, key) + ), f"{key} mismatched" + + # info & arrays same, except calc recognised as LJ when not from XYZ + for key in set(abcd_data.info_keys + abcd_data.arrays_keys) - { + "calculator_name", + "calculator_parameters", + }: + assert ( + abcd_data[key] == abcd_data_after_read[key] + ), f"{key}'s value does not match" + + # date & hashed will differ + for key in set(abcd_data.derived_keys) - { + "hash", + "modified", + "uploaded", + "hash_structure", # see issue #118 + }: + assert ( + abcd_data[key] == abcd_data_after_read[key] + ), f"{key}'s value does not match" + + # expected differences - n.b. order of calls above + assert abcd_data_after_read["modified"] > abcd_data["modified"] + assert abcd_data_after_read["uploaded"] > abcd_data["uploaded"] + assert abcd_data_after_read["hash"] != abcd_data["hash"] + + # expect results to match within fp precision + for key in set(abcd_data.results_keys): + assert abcd_data[key] == approx( + np.array(abcd_data_after_read[key]) + ), f"{key}'s value does not match" From 687d25ef89049e844ba38772f756515047f4ac1b Mon Sep 17 00:00:00 2001 From: Tamas Stenczel <39164540+stenczelt@users.noreply.github.com> Date: Tue, 10 Dec 2024 07:55:49 +0000 Subject: [PATCH 9/9] Update tests/test_abstract_model.py Co-authored-by: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> --- tests/test_abstract_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_abstract_model.py b/tests/test_abstract_model.py index 3c185ea5..f9e820e6 100644 --- a/tests/test_abstract_model.py +++ b/tests/test_abstract_model.py @@ -189,7 +189,7 @@ def test_from_atoms_len_atoms_3(): "free_energy", } - # check a some keys as well + # check some values as well assert abcd_data["energy"] == atoms.get_potential_energy() assert abcd_data["forces"] == approx(atoms.get_forces())