Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Oxidation States 24 #329

Merged
merged 22 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 36 additions & 52 deletions paper.bib
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
@article{davies_computational_2016,
title = {Computational {Screening} of {All} {Stoichiometric} {Inorganic} {Materials}},
volume = {1},
issn = {24519294},
url = {http://www.cell.com/chem/abstract/S2451-9294(16)30155-3},
doi = {10.1016/j.chempr.2016.09.010},
abstract = {Forming a four-component compound from the first 103 elements of the periodic table results in more than 1012 combinations. Such a materials space is intractable to high-throughput experiment or first-principle computation. We introduce a framework to address this problem and quantify how many materials can exist. We apply principles of valency and electronegativity to filter chemically implausible compositions, which reduces the inorganic quaternary space to 1010 combinations. We demonstrate that estimates of band gaps and absolute electron energies can be made simply on the basis of the chemical composition and apply this to the search for new semiconducting materials to support the photoelectrochemical splitting of water. We show the applicability to predicting crystal structure by analogy with known compounds, including exploration of the phase space for ternary combinations that form a perovskite lattice. Computer screening reproduces known perovskite materials and predicts the feasibility of thousands more. Given the simplicity of the approach, large-scale searches can be performed on a single workstation.},
number = {4},
journal = {Chem},
author = {Davies, Daniel W. and Butler, Keith T. and Jackson, Adam J. and Morris, Andrew and Frost, Jarvist M. and Skelton, Jonathan M. and Walsh, Aron},
year = {2016},
keywords = {Perovskites, Data, Materials Design, Screening, Water splitting},
pages = {617--627}
title = {Computational Screening of All Stoichiometric Inorganic Materials},
volume = {1},
issn = {24519294},
url = {http://www.cell.com/chem/abstract/S2451-9294(16)30155-3},
doi = {10.1016/j.chempr.2016.09.010},
abstract = {Forming a four-component compound from the first 103 elements of the periodic table results in more than 1012 combinations. Such a materials space is intractable to high-throughput experiment or first-principle computation. We introduce a framework to address this problem and quantify how many materials can exist. We apply principles of valency and electronegativity to filter chemically implausible compositions, which reduces the inorganic quaternary space to 1010 combinations. We demonstrate that estimates of band gaps and absolute electron energies can be made simply on the basis of the chemical composition and apply this to the search for new semiconducting materials to support the photoelectrochemical splitting of water. We show the applicability to predicting crystal structure by analogy with known compounds, including exploration of the phase space for ternary combinations that form a perovskite lattice. Computer screening reproduces known perovskite materials and predicts the feasibility of thousands more. Given the simplicity of the approach, large-scale searches can be performed on a single workstation.},
number = {4},
journal = {Chem},
author = {Davies, Daniel W. and Butler, Keith T. and Jackson, Adam J. and Morris, Andrew and Frost, Jarvist M. and Skelton, Jonathan M. and Walsh, Aron},
year = {2016},
pages = {617--627}
}

@article{pamplin1964,
title = "A systematic method of deriving new semiconducting compounds by structural analogy",
journal = "Journal of Physics and Chemistry of Solids",
journal = "J. Phys. Chem. Solids",
volume = "25",
number = "7",
pages = "675 - 684",
Expand All @@ -29,7 +28,7 @@ @article{pamplin1964

@article{goodman1958,
title = "The prediction of semiconducting properties in inorganic compounds",
journal = "Journal of Physics and Chemistry of Solids",
journal = "J. Phys. Chem. Solids",
volume = "6",
number = "4",
pages = "305 - 314",
Expand All @@ -44,38 +43,28 @@ @article{goodman1958
@article{gaultois2013,
author = {Gaultois, Michael W. and Sparks, Taylor D. and Borg, Christopher K. H. and Seshadri, Ram and Bonificio, William D. and Clarke, David R.},
title = {Data-Driven Review of Thermoelectric Materials: Performance and Resource Considerations},
journal = {Chemistry of Materials},
journal = {Chem. Mater.},
volume = {25},
number = {15},
pages = {2911-2920},
year = {2013},
doi = {10.1021/cm400893e},

URL = {
https://doi.org/10.1021/cm400893e
},
eprint = {
https://doi.org/10.1021/cm400893e}

URL = {https://doi.org/10.1021/cm400893e},
eprint = {https://doi.org/10.1021/cm400893e}
}

@article{pelatt2011,
author = {Pelatt, Brian D. and Ravichandran, Ram and Wager, John F. and Keszler, Douglas A.},
title = {Atomic Solid State Energy Scale},
journal = {Journal of the American Chemical Society},
journal = {J. Am. Chem. Soc.},
volume = {133},
number = {42},
pages = {16852-16860},
year = {2011},
doi = {10.1021/ja204670s},
note ={PMID: 21861503},

URL = {
https://doi.org/10.1021/ja204670s
},
eprint = {
https://doi.org/10.1021/ja204670s
}
note ={PMID: 21861503},
URL = {https://doi.org/10.1021/ja204670s},
eprint = {https://doi.org/10.1021/ja204670s}
}

@Article{davies2018,
Expand All @@ -86,7 +75,6 @@ @Article{davies2018
volume ="211",
issue ="0",
pages ="553-568",
publisher ="The Royal Society of Chemistry",
doi ="10.1039/C8FD00032H",
url ="http://dx.doi.org/10.1039/C8FD00032H",
abstract ="The likelihiood of an element to adopt a specific oxidation state in a solid{,} given a certain set of neighbours{,} might often be obvious to a trained chemist. However{,} encoding this information for use in high-throughput searches presents a significant challenge. We carry out a statistical analysis of the occurrence of oxidation states in 16 735 ordered{,} inorganic compounds and show that a large number of cations are only likely to exhibit certain oxidation states in combination with particular anions. We use this data to build a model that ascribes probabilities to the formation of hypothetical compounds{,} given the proposed oxidation states of their constituent species. The model is then used as part of a high-throughput materials design process{,} which significantly narrows down the vast compositional search space for new ternary metal halide compounds. Finally{,} we employ a machine learning analysis of existing compounds to suggest likely structures for a small subset of the candidate compositions. We predict two new compounds{,} MnZnBr4 and YSnF7{,} that are thermodynamically stable according to density functional theory{,} as well as four compounds{,} MnCdBr4{,} MnRu2Br8{,} ScZnF5 and ZnCoBr4{,} which lie within the window of metastability."
Expand All @@ -100,51 +88,47 @@ @Article{goldschmidt1929
volume ="25",
issue ="0",
pages ="253-283",
publisher ="The Royal Society of Chemistry",
doi ="10.1039/TF9292500253",
url ="http://dx.doi.org/10.1039/TF9292500253",
abstract =""}

@article{nethercot1974,
title = {Prediction of Fermi Energies and Photoelectric Thresholds Based on Electronegativity Concepts},
author = {Nethercot, Arthur H.},
journal = {Phys. Rev. Lett.},
volume = {33},
issue = {18},
pages = {1088--1091},
numpages = {0},
year = {1974},
month = {Oct},
publisher = {American Physical Society},
doi = {10.1103/PhysRevLett.33.1088},
url = {https://link.aps.org/doi/10.1103/PhysRevLett.33.1088}
title = {Prediction of Fermi Energies and Photoelectric Thresholds Based on Electronegativity Concepts},
author = {Nethercot, Arthur H.},
journal = {Phys. Rev. Lett.},
volume = {33},
issue = {18},
pages = {1088--1091},
numpages = {0},
year = {1974},
month = {Oct},
doi = {10.1103/PhysRevLett.33.1088},
url = {https://link.aps.org/doi/10.1103/PhysRevLett.33.1088}
}

@article{ward2018,
title = "Matminer: An open source toolkit for materials data mining",
journal = "Computational Materials Science",
journal = "Comp. Mater. Sci.",
volume = "152",
pages = "60 - 69",
year = "2018",
issn = "0927-0256",
doi = "https://doi.org/10.1016/j.commatsci.2018.05.018",
url = "http://www.sciencedirect.com/science/article/pii/S0927025618303252",
author = "Logan Ward and Alexander Dunn and Alireza Faghaninia and Nils E.R. Zimmermann and Saurabh Bajaj and Qi Wang and Joseph Montoya and Jiming Chen and Kyle Bystrom and Maxwell Dylla and Kyle Chard and Mark Asta and Kristin A. Persson and G. Jeffrey Snyder and Ian Foster and Anubhav Jain",
keywords = "Data mining, Open source software, Machine learning, Materials informatics",
abstract = "As materials data sets grow in size and scope, the role of data mining and statistical learning methods to analyze these materials data sets and build predictive models is becoming more important. This manuscript introduces matminer, an open-source, Python-based software platform to facilitate data-driven methods of analyzing and predicting materials properties. Matminer provides modules for retrieving large data sets from external databases such as the Materials Project, Citrination, Materials Data Facility, and Materials Platform for Data Science. It also provides implementations for an extensive library of feature extraction routines developed by the materials community, with 47 featurization classes that can generate thousands of individual descriptors and combine them into mathematical functions. Finally, matminer provides a visualization module for producing interactive, shareable plots. These functions are designed in a way that integrates closely with machine learning and data analysis packages already developed and in use by the Python data science community. We explain the structure and logic of matminer, provide a description of its various modules, and showcase several examples of how matminer can be used to collect data, reproduce data mining studies reported in the literature, and test new methodologies."
}

@article{ong2013,
title = "Python Materials Genomics (pymatgen): A robust, open-source python library for materials analysis",
journal = "Computational Materials Science",
journal = "Comp. Mater. Sci.",
volume = "68",
pages = "314 - 319",
year = "2013",
issn = "0927-0256",
doi = "https://doi.org/10.1016/j.commatsci.2012.10.028",
url = "http://www.sciencedirect.com/science/article/pii/S0927025612006295",
author = "Shyue Ping Ong and William Davidson Richards and Anubhav Jain and Geoffroy Hautier and Michael Kocher and Shreyas Cholia and Dan Gunter and Vincent L. Chevrier and Kristin A. Persson and Gerbrand Ceder",
keywords = "Materials, Project, Design, Thermodynamics, High-throughput",
abstract = "We present the Python Materials Genomics (pymatgen) library, a robust, open-source Python library for materials analysis. A key enabler in high-throughput computational materials science efforts is a robust set of software tools to perform initial setup for the calculations (e.g., generation of structures and necessary input files) and post-calculation analysis to derive useful material properties from raw calculated data. The pymatgen library aims to meet these needs by (1) defining core Python objects for materials data representation, (2) providing a well-tested set of structure and thermodynamic analyses relevant to many applications, and (3) establishing an open platform for researchers to collaboratively develop sophisticated analyses of materials data obtained both from first principles calculations and experiments. The pymatgen library also provides convenient tools to obtain useful materials data via the Materials Project’s REpresentational State Transfer (REST) Application Programming Interface (API). As an example, using pymatgen’s interface to the Materials Project’s RESTful API and phasediagram package, we demonstrate how the phase and electrochemical stability of a recently synthesized material, Li4SnS4, can be analyzed using a minimum of computing resources. We find that Li4SnS4 is a stable phase in the Li–Sn–S phase diagram (consistent with the fact that it can be synthesized), but the narrow range of lithium chemical potentials for which it is predicted to be stable would suggest that it is not intrinsically stable against typical electrodes used in lithium-ion batteries."
}

Expand All @@ -170,7 +154,7 @@ @Article{oboyle2011
and Vandermeersch, Tim
and Hutchison, Geoffrey R.",
title="Open Babel: An open chemical toolbox",
journal="Journal of Cheminformatics",
journal="J. Cheminf.",
year="2011",
month="Oct",
day="07",
Expand All @@ -191,11 +175,11 @@ @article{ase-paper
Peterson and Carsten Rostgaard and Jakob Schiøtz and Ole Schütt and Mikkel Strange and Kristian S Thygesen and Tejs
Vegge and Lasse Vilhelmsen and Michael Walter and Zhenhua Zeng and Karsten W Jacobsen},
title={The atomic simulation environment—a Python library for working with atoms},
journal={Journal of Physics: Condensed Matter},
journal={J. Condens. Matter Phys.},
volume={29},
number={27},
pages={273002},
url={http://stacks.iop.org/0953-8984/29/i=27/a=273002},
year={2017},
abstract={The atomic simulation environment (ASE) is a software package written in the Python programming language with the aim of setting up, steering, and analyzing atomistic simulations. In ASE, tasks are fully scripted in Python. The powerful syntax of Python combined with the NumPy array library make it possible to perform very complex simulation tasks. For example, a sequence of calculations may be performed with the use of a simple ‘for-loop’ construction. Calculations of energy, forces, stresses and other quantities are performed through interfaces to many external electronic structure codes or force fields using a uniform interface. On top of this calculator interface, ASE provides modules for performing many standard simulation tasks such as structure optimization, molecular dynamics, handling of constraints and performing nudged elastic band calculations.}
}
}
11 changes: 8 additions & 3 deletions smact/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@

A collection of fast screening tools from elemental data
"""

import itertools
import warnings
from math import gcd
from operator import mul as multiply
from os import path
from typing import Iterable, List, Optional, Sequence, Tuple, Union
from typing import Iterable, List, Optional, Tuple, Union
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Removal of Sequence import may cause issues

The removal of Sequence from smact/__init__.py could be problematic as it's actively used in structure_prediction/database.py with type hints. Since database.py imports Sequence directly from typing, the removal is safe, but we should maintain consistency in import patterns across the codebase.

  • structure_prediction/database.py uses Sequence[SmactStructure] type hints
  • Consider keeping the Sequence import in __init__.py for API consistency
🔗 Analysis chain

Verify removal of Sequence import

The removal of the unused Sequence import is good practice. However, as this is a public API, we should verify that this doesn't affect downstream code that might be importing it from this module.

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Search for any downstream code that might be importing Sequence from smact
rg -l "from smact import.*Sequence" || echo "No direct imports found"

Length of output: 94


Script:

#!/bin/bash
# Let's verify if Sequence is used in type hints within the codebase
rg "Sequence\[" || echo "No Sequence type hints found"

# Also check if it's imported differently
rg "from typing import.*Sequence" || echo "No Sequence imports found"

Length of output: 250


import pandas as pd

Expand Down Expand Up @@ -55,12 +54,14 @@ class Element:

Element.oxidation_states_sp (list) : List of oxdation states recognised by the Pymatgen Structure Predictor

Element.oxidation_states_icsd (list) : List of oxidation states that appear in the ICSD
Element.oxidation_states_icsd (list) : List of oxidation states that appear in the 2016 version of the ICSD

Element.oxidation_states_wiki (list): List of oxidation states that appear wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) Data retrieved: 2022-09-22

Element.oxidation_states_custom (list | None ): List of oxidation states that appear in the custom data file supplied (if any)

Element.oxidation_states_icsd24 (list): List of oxidation states that appear in the 2024 version of the ICSD

Element.coord_envs (list): The allowed coordination enviroments for the ion

Element.covalent_radius (float) : Covalent radius of the element
Expand Down Expand Up @@ -169,6 +170,10 @@ def __init__(
"oxidation_states_wiki",
data_loader.lookup_element_oxidation_states_wiki(symbol),
),
(
"oxidation_states_icsd24",
data_loader.lookup_element_oxidation_states_icsd24(symbol),
),
Comment on lines +200 to +203
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Implementation for ICSD24 oxidation states is missing

The verification reveals critical issues:

  • The lookup_element_oxidation_states_icsd24 function implementation is not found in the codebase
  • The required data file oxidation_states_icsd24.txt is missing
  • Error handling in the data loader is minimal

This change cannot be safely integrated without:

  • Implementing the missing data loader function
  • Adding the required data file
  • Including proper error handling similar to the existing ICSD implementation
🔗 Analysis chain

Verify data loader implementation and error handling

The initialization of the new oxidation states attribute follows the established pattern. However, as this is a breaking change, please ensure:

  1. The lookup_element_oxidation_states_icsd24 function in data_loader handles missing data gracefully
  2. The new attribute doesn't affect existing functionality that might be checking for oxidation states availability

Let me gather more information about the data loader implementation for the new ICSD24 function.

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Check data loader implementation
ast-grep --pattern 'def lookup_element_oxidation_states_icsd24($_) {
  $$$
}'

# Check for any existing code that might be affected
rg -A 3 "oxidation_states_icsd\W"

Length of output: 1925


Script:

#!/bin/bash
# Check implementation of the new ICSD24 function
ast-grep --pattern 'def lookup_element_oxidation_states_icsd24($_) {
  $$$
}'

# Check for the data file
fd "oxidation_states_icsd24.txt"

# Check for any error handling patterns in data_loader
rg -A 3 "try:|except:|raise" smact/data_loader.py

Length of output: 243

("dipol", dataset["dipol"]),
("pauling_eneg", dataset["el_neg"]),
("SSE", sse),
Expand Down
File renamed without changes.
4 changes: 4 additions & 0 deletions smact/data/oxidation_states.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#
# Oxidation state set
# Source: Original SMACT set manually compiled (2014)
#
H -1 +1
He
Li +1
Expand Down
4 changes: 4 additions & 0 deletions smact/data/oxidation_states_SP.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#
# Oxidation state set
# Source: Pymatgen structure predictor (2017)
#
H 1
He
Li 1
Expand Down
4 changes: 4 additions & 0 deletions smact/data/oxidation_states_icsd.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#
# Oxidation state set
# Source: ICSD (2016)
#
H -1 1
He
Li 1
Expand Down
107 changes: 107 additions & 0 deletions smact/data/oxidation_states_icsd24_common.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#
# Oxidation state set
# Source: ICSD (2024, V2), most common non-zero values
#
H 1
He
Li 1
Be 2
B 3
C 4
N -3
O -2
F -1
Ne
Na 1
Mg 2
Al 3
Si 4
P 5
S -2
Cl -1
Ar
K 1
Ca 2
Sc 3
Ti 4
V 5
Cr 3
Mn 2
Fe 3
Co 2
Ni 2
Cu 2
Zn 2
Ga 3
Ge 4
As 5
Se -2
Br -1
Kr 2
Rb 1
Sr 2
Y 3
Zr 4
Nb 5
Mo 6
Tc 7
Ru 4
Rh 3
Pd 2
Ag 1
Cd 2
In 3
Sn 4
Sb 3
Te -2
I -1
Xe 6
Cs 1
Ba 2
La 3
Ce 3
Pr 3
Nd 3
Pm 3
Sm 3
Eu 3
Gd 3
Tb 3
Dy 3
Ho 3
Er 3
Tm 3
Yb 3
Lu 3
Hf 4
Ta 5
W 6
Re 7
Os 5
Ir 4
Pt 4
Au 1
Hg 2
Tl 1
Pb 2
Bi 3
Po 4
At
Rn
Fr
Ra 2
Ac 3
Th 4
Pa 5
U 6
Np 6
Pu 3
Am 3
Cm 3
Bk 3
Cf 3
Es 3
Fm
Md
No
Lr
1 change: 1 addition & 0 deletions smact/data/oxidation_states_icsd24_counts.json

Large diffs are not rendered by default.

Loading