Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions JiQi535/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# ML Band Gaps (Materials)

> Ideal candidate: skilled ML data scientist with solid knowledge of materials science.

# Overview

The aim of this task is to create a python package that implements automatic prediction of electronic band gaps for a set of materials based on training data.

# User story

As a user of this software I can predict the value of an electronic band gap after passing training data and structural information about the target material.

# Requirements

- suggest the bandgap values for a set of materials designated by their crystallographic and stoichiometric properties
- the code shall be written in a way that can facilitate easy addition of other characteristics extracted from simulations (forces, pressures, phonon frequencies etc)

# Expectations

- the code shall be able to suggest realistic values for slightly modified geometry sets - eg. trained on Si and Ge it should suggest the value of bandgap for Si49Ge51 to be between those of Si and Ge
- modular and object-oriented implementation
- commit early and often - at least once per 24 hours

# Timeline

We leave exact timing to the candidate. Must fit Within 5 days total.

# Notes

- use a designated github repository for version control
- suggested source of training data: materialsproject.org

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions JiQi535/examples/MP_789_bandgap.json

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions JiQi535/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
scikit-learn==1.3.2
pymatgen==2023.11.12
maml==2023.9.9
matminer==0.9.0
numpy==1.26.1
m3gnet==0.2.4
Empty file.
157 changes: 157 additions & 0 deletions JiQi535/rewotesMLpredictor/featurizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import numpy as np

from maml.describers._m3gnet import BaseDescriber, M3GNetStructure
from matminer.featurizers.composition import ElementProperty
from pymatgen.core import Structure, Composition
from sklearn.base import BaseEstimator, TransformerMixin
from typing import Union, List

MATMINER_ELEMENT_PRESETS = ["magpie", "deml", "matminer", "matscholar_el", "megnet_el"]


class CompositionFeaturizer(BaseEstimator, TransformerMixin):
"""
Featurizer encodes composition into features.
"""

def __init__(self, featurizers: List[ElementProperty] = []):
"""
Initialize a CompositionFeaturizer.
:param featurizers: A list of compositional featurizers. Only allows ElementProperty in matminer
(https://github.com/hackingmaterials/matminer) for the current implementation. Default is Element
Property with "megnet_el" as preset.
"""
if featurizers:
if not (isinstance(featurizers, list) and
all(type(x) == ElementProperty for x in featurizers)):
raise TypeError(
f"Featurizers of CompositionFeaturizer must be a list of matminer ElementProperty object."
"Check out matminer: https://github.com/hackingmaterials/matminer."
)
else:
featurizers = [ElementProperty.from_preset("megnet_el")]
self.featurizers = featurizers
self.feature_labels = [l for f in self.featurizers for l in f.feature_labels()]

@classmethod
def from_presets(cls, presets: List[str] = ["megnet_el"]):
"""
Initialize a CompositionFeaturizer from a list of presets in matminer.
:param presets: A list as a subset of ["magpie", "deml", "matminer", "matscholar_el", "megnet_el"].
Default to ["megnet_el"].
:return: CompositionFeaturizer
"""
if not presets:
raise ValueError(
f"At least one of the presets should be provided."
f"Options: {MATMINER_ELEMENT_PRESETS}."
)
for preset in presets:
if preset not in MATMINER_ELEMENT_PRESETS:
raise ValueError(f"{preset} is not an allowed preset in matminer.")
featurizers = [ElementProperty.from_preset(p) for p in presets]
return CompositionFeaturizer(featurizers=featurizers)

def fit(self, X = None, y = None):
"""
Place holder for fit method.
:param X: Any input.
:param y: Any input.
:return: self
"""
return self

def transform(self, compositions: Union[List[str], List[Composition]] = None):
"""
Transform compositions into features.
:param compositions: A list of str or a list of pymatgen Composition.
:return: A numpy array of compositional features.
"""
compositions = self._check_compositions(compositions)
composition_features = np.concatenate([f.transform(compositions) for f in self.featurizers], axis=1)
return composition_features

def predict(self, compositions: Union[List[str], List[Composition]] = None):
"""
Transform compositions into features. Same as transform method.
:param compositions: A list of str or a list of pymatgen Composition.
:return: A numpy array of compositional features.
"""
return self.transform(compositions)

@staticmethod
def _check_compositions(compositions: Union[List[str], List[Composition]] = None):
"""
Check the compositions are in correct format.
:param compositions: A list of str or a list of pymatgen Composition.
:return: A list of pymatgen Composition.
"""
if not (isinstance(compositions, list) and
all(isinstance(x, (str, Composition)) for x in compositions)):
raise TypeError("Compositions must be provided as a list of str or a list of pymatgen Composition.")
if any(type(x) == str for x in compositions):
compositions = [Composition(str(x)) for x in compositions]
return compositions


class StructureFeaturizer(BaseEstimator, TransformerMixin):
"""
Featurizer encodes structure into features.
"""

def __init__(self, featurizers: List[BaseDescriber] = []):
"""
Initialize a StructureFeaturizer.
:param featurizers: A list of structural featurizers. Only allows BaseDescriber in maml
(https://github.com/materialsvirtuallab/maml) for the current implementation. Default is
M3GNet structure featurizer.
"""
if featurizers:
if not (isinstance(featurizers, list) and
all(type(x) == BaseDescriber for x in featurizers)):
raise TypeError(
f"Featurizers of StructureFeaturizer must be a list of maml BaseDescriber object."
"Check out maml: https://github.com/materialsvirtuallab/maml."
)
else:
featurizers = [M3GNetStructure()]
self.featurizers = featurizers

def fit(self, X = None, y = None):
"""
Place holder for fit method.
:param X: Any input.
:param y: Any input.
:return: self
"""
return self

def transform(self, structures: List[Structure] = None):
"""
Transform structures into features.
:param structures: A list of pymatgen structures.
:return: A numpy array of structural features.
"""
structures = self._check_structures(structures)
features = np.concatenate([f.transform(structures) for f in self.featurizers], axis=1)
return features

def predict(self, structures: List[Structure] = None):
"""
Transform structures into features. Same as transform method.
:param structures: A list of pymatgen structures.
:return: A numpy array of structural features.
"""
return self.transform(structures)

@staticmethod
def _check_structures(structures: List[Structure] = None):
"""
Check the structures are in correct format.
:param structures: A list of pymatgen Composition.
:return: A list of pymatgen structures.
"""
if not (isinstance(structures, list) and
all(type(x) == Structure for x in structures)):
raise TypeError("Structures must be provided as a list of pymatgen Structures.")
return structures
32 changes: 32 additions & 0 deletions JiQi535/rewotesMLpredictor/predictor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from typing import Union
from .featurizer import CompositionFeaturizer, StructureFeaturizer


class Predictor(Pipeline):
"""
Materials property predictor that predict target property from input structures or stoichiometry.
"""

def __init__(
self,
featurizer: Union[CompositionFeaturizer, StructureFeaturizer] = StructureFeaturizer(),
model=DecisionTreeRegressor(),
):
"""
Initialize the ML property predictors.
:param featurizer: CompositionFeaturizer or StructureFeaturizer. Default is StructureFeaturizer.
:param model: The regression model in sklearn to use. Default is DecisionTreeRegressor.
"""
self.featurizer = featurizer
self.model = model
steps = [
(i.__class__.__name__, i)
for i in [
self.featurizer,
self.model,
]
if i
]
super().__init__(steps)
12 changes: 12 additions & 0 deletions JiQi535/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from setuptools import setup

setup(
name='rewotes-ML-predictor',
version='0.0.1',
packages=['rewotes-ML-predictor'],
url='https://github.com/JiQi535/rewotes.git',
license='GNU General Public License v3.0',
author='Ji Qi',
author_email='qj535355@gmail.com',
description='This is a package for the "ML Property Predict" rewotes test of Mat3ra.com. '
)
Empty file added JiQi535/tests/test_features.py
Empty file.