diff --git a/JiQi535/README.md b/JiQi535/README.md new file mode 100644 index 00000000..2ccb5adc --- /dev/null +++ b/JiQi535/README.md @@ -0,0 +1,31 @@ +# ML Band Gaps (Materials) + +> Ideal candidate: skilled ML data scientist with solid knowledge of materials science. + +# Overview + +The aim of this task is to create a python package that implements automatic prediction of electronic band gaps for a set of materials based on training data. + +# User story + +As a user of this software I can predict the value of an electronic band gap after passing training data and structural information about the target material. + +# Requirements + +- suggest the bandgap values for a set of materials designated by their crystallographic and stoichiometric properties +- the code shall be written in a way that can facilitate easy addition of other characteristics extracted from simulations (forces, pressures, phonon frequencies etc) + +# Expectations + +- the code shall be able to suggest realistic values for slightly modified geometry sets - eg. trained on Si and Ge it should suggest the value of bandgap for Si49Ge51 to be between those of Si and Ge +- modular and object-oriented implementation +- commit early and often - at least once per 24 hours + +# Timeline + +We leave exact timing to the candidate. Must fit Within 5 days total. + +# Notes + +- use a designated github repository for version control +- suggested source of training data: materialsproject.org diff --git a/JiQi535/examples/Example_fit_structural_compositional_predictors_for_band_gap_energy.ipynb b/JiQi535/examples/Example_fit_structural_compositional_predictors_for_band_gap_energy.ipynb new file mode 100644 index 00000000..da2b41e1 --- /dev/null +++ b/JiQi535/examples/Example_fit_structural_compositional_predictors_for_band_gap_energy.ipynb @@ -0,0 +1,1532 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ada6bf31", + "metadata": { + "toc": true + }, + "source": [ + "
Predictor(featurizer=CompositionFeaturizer(featurizers=[ElementProperty(data_source=<matminer.utils.data.MEGNetElementData object at 0x295635700>,\n", + " features=['embedding '\n", + " '1',\n", + " 'embedding '\n", + " '2',\n", + " 'embedding '\n", + " '3',\n", + " 'embedding '\n", + " '4',\n", + " 'embedding '\n", + " '5',\n", + " 'embedding '\n", + " '6',\n", + " 'embedding '\n", + " '7',\n", + " 'embedding '\n", + " '8',\n", + " 'embedding '\n", + " '9',\n", + " 'embedding '\n", + " '10',\n", + " 'embedding '\n", + " '11',\n", + " 'embedding '\n", + " '12',\n", + " 'embedding '\n", + " '13',\n", + " 'embedding '\n", + " '14',\n", + " 'embedding '\n", + " '15',\n", + " 'embedding '\n", + " '16'],\n", + " stats=['minimum',\n", + " 'maximum',\n", + " 'range',\n", + " 'mean',\n", + " 'std_dev'])]))In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Predictor(featurizer=CompositionFeaturizer(featurizers=[ElementProperty(data_source=<matminer.utils.data.MEGNetElementData object at 0x295635700>,\n", + " features=['embedding '\n", + " '1',\n", + " 'embedding '\n", + " '2',\n", + " 'embedding '\n", + " '3',\n", + " 'embedding '\n", + " '4',\n", + " 'embedding '\n", + " '5',\n", + " 'embedding '\n", + " '6',\n", + " 'embedding '\n", + " '7',\n", + " 'embedding '\n", + " '8',\n", + " 'embedding '\n", + " '9',\n", + " 'embedding '\n", + " '10',\n", + " 'embedding '\n", + " '11',\n", + " 'embedding '\n", + " '12',\n", + " 'embedding '\n", + " '13',\n", + " 'embedding '\n", + " '14',\n", + " 'embedding '\n", + " '15',\n", + " 'embedding '\n", + " '16'],\n", + " stats=['minimum',\n", + " 'maximum',\n", + " 'range',\n", + " 'mean',\n", + " 'std_dev'])]))
CompositionFeaturizer(featurizers=[ElementProperty(data_source=<matminer.utils.data.MEGNetElementData object at 0x295635700>,\n", + " features=['embedding 1',\n", + " 'embedding 2',\n", + " 'embedding 3',\n", + " 'embedding 4',\n", + " 'embedding 5',\n", + " 'embedding 6',\n", + " 'embedding 7',\n", + " 'embedding 8',\n", + " 'embedding 9',\n", + " 'embedding 10',\n", + " 'embedding 11',\n", + " 'embedding 12',\n", + " 'embedding 13',\n", + " 'embedding 14',\n", + " 'embedding 15',\n", + " 'embedding 16'],\n", + " stats=['minimum', 'maximum',\n", + " 'range', 'mean',\n", + " 'std_dev'])])
DecisionTreeRegressor()
Predictor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Predictor()
StructureFeaturizer(featurizers=[M3GNetStructure()])
DecisionTreeRegressor()