WMD-group · AntObi · Nov 20, 2024 · Jul 25, 2024 · Jul 25, 2024 · Jul 27, 2024
diff --git a/paper.bib b/paper.bib
@@ -1,21 +1,20 @@
 @article{davies_computational_2016,
-	title = {Computational {Screening} of {All} {Stoichiometric} {Inorganic} {Materials}},
-	volume = {1},
-	issn = {24519294},
-	url = {http://www.cell.com/chem/abstract/S2451-9294(16)30155-3},
-	doi = {10.1016/j.chempr.2016.09.010},
-	abstract = {Forming a four-component compound from the first 103 elements of the periodic table results in more than 1012 combinations. Such a materials space is intractable to high-throughput experiment or first-principle computation. We introduce a framework to address this problem and quantify how many materials can exist. We apply principles of valency and electronegativity to filter chemically implausible compositions, which reduces the inorganic quaternary space to 1010 combinations. We demonstrate that estimates of band gaps and absolute electron energies can be made simply on the basis of the chemical composition and apply this to the search for new semiconducting materials to support the photoelectrochemical splitting of water. We show the applicability to predicting crystal structure by analogy with known compounds, including exploration of the phase space for ternary combinations that form a perovskite lattice. Computer screening reproduces known perovskite materials and predicts the feasibility of thousands more. Given the simplicity of the approach, large-scale searches can be performed on a single workstation.},
-	number = {4},
-	journal = {Chem},
-	author = {Davies, Daniel W. and Butler, Keith T. and Jackson, Adam J. and Morris, Andrew and Frost, Jarvist M. and Skelton, Jonathan M. and Walsh, Aron},
-	year = {2016},
-	keywords = {Perovskites, Data, Materials Design, Screening, Water splitting},
-	pages = {617--627}
+title = {Computational Screening of All Stoichiometric Inorganic Materials},
+volume = {1},
+issn = {24519294},
+url = {http://www.cell.com/chem/abstract/S2451-9294(16)30155-3},
+doi = {10.1016/j.chempr.2016.09.010},
+abstract = {Forming a four-component compound from the first 103 elements of the periodic table results in more than 1012 combinations. Such a materials space is intractable to high-throughput experiment or first-principle computation. We introduce a framework to address this problem and quantify how many materials can exist. We apply principles of valency and electronegativity to filter chemically implausible compositions, which reduces the inorganic quaternary space to 1010 combinations. We demonstrate that estimates of band gaps and absolute electron energies can be made simply on the basis of the chemical composition and apply this to the search for new semiconducting materials to support the photoelectrochemical splitting of water. We show the applicability to predicting crystal structure by analogy with known compounds, including exploration of the phase space for ternary combinations that form a perovskite lattice. Computer screening reproduces known perovskite materials and predicts the feasibility of thousands more. Given the simplicity of the approach, large-scale searches can be performed on a single workstation.},
+number = {4},
+journal = {Chem},
+author = {Davies, Daniel W. and Butler, Keith T. and Jackson, Adam J. and Morris, Andrew and Frost, Jarvist M. and Skelton, Jonathan M. and Walsh, Aron},
+year = {2016},
+pages = {617--627}
 }
 
 @article{pamplin1964,
 title = "A systematic method of deriving new semiconducting compounds by structural analogy",
-journal = "Journal of Physics and Chemistry of Solids",
+journal = "J. Phys. Chem. Solids",
 volume = "25",
 number = "7",
 pages = "675 - 684",
@@ -29,7 +28,7 @@ @article{pamplin1964
 
 @article{goodman1958,
 title = "The prediction of semiconducting properties in inorganic compounds",
-journal = "Journal of Physics and Chemistry of Solids",
+journal = "J. Phys. Chem. Solids",
 volume = "6",
 number = "4",
 pages = "305 - 314",
@@ -44,38 +43,28 @@ @article{goodman1958
 @article{gaultois2013,
 author = {Gaultois, Michael W. and Sparks, Taylor D. and Borg, Christopher K. H. and Seshadri, Ram and Bonificio, William D. and Clarke, David R.},
 title = {Data-Driven Review of Thermoelectric Materials: Performance and Resource Considerations},
-journal = {Chemistry of Materials},
+journal = {Chem. Mater.},
 volume = {25},
 number = {15},
 pages = {2911-2920},
 year = {2013},
 doi = {10.1021/cm400893e},
-
-URL = {
-        https://doi.org/10.1021/cm400893e
-},
-eprint = {
-        https://doi.org/10.1021/cm400893e}
-
+URL = {https://doi.org/10.1021/cm400893e},
+eprint = {https://doi.org/10.1021/cm400893e}
 }
 
 @article{pelatt2011,
 author = {Pelatt, Brian D. and Ravichandran, Ram and Wager, John F. and Keszler, Douglas A.},
 title = {Atomic Solid State Energy Scale},
-journal = {Journal of the American Chemical Society},
+journal = {J. Am. Chem. Soc.},
 volume = {133},
 number = {42},
 pages = {16852-16860},
 year = {2011},
 doi = {10.1021/ja204670s},
-    note ={PMID: 21861503},
-
-URL = {
-        https://doi.org/10.1021/ja204670s
-},
-eprint = {
-        https://doi.org/10.1021/ja204670s
-}
+note ={PMID: 21861503},
+URL = {https://doi.org/10.1021/ja204670s},
+eprint = {https://doi.org/10.1021/ja204670s}
 }
 
 @Article{davies2018,
@@ -86,7 +75,6 @@ @Article{davies2018
 volume  ="211",
 issue  ="0",
 pages  ="553-568",
-publisher  ="The Royal Society of Chemistry",
 doi  ="10.1039/C8FD00032H",
 url  ="http://dx.doi.org/10.1039/C8FD00032H",
 abstract  ="The likelihiood of an element to adopt a specific oxidation state in a solid{,} given a certain set of neighbours{,} might often be obvious to a trained chemist. However{,} encoding this information for use in high-throughput searches presents a significant challenge. We carry out a statistical analysis of the occurrence of oxidation states in 16 735 ordered{,} inorganic compounds and show that a large number of cations are only likely to exhibit certain oxidation states in combination with particular anions. We use this data to build a model that ascribes probabilities to the formation of hypothetical compounds{,} given the proposed oxidation states of their constituent species. The model is then used as part of a high-throughput materials design process{,} which significantly narrows down the vast compositional search space for new ternary metal halide compounds. Finally{,} we employ a machine learning analysis of existing compounds to suggest likely structures for a small subset of the candidate compositions. We predict two new compounds{,} MnZnBr4 and YSnF7{,} that are thermodynamically stable according to density functional theory{,} as well as four compounds{,} MnCdBr4{,} MnRu2Br8{,} ScZnF5 and ZnCoBr4{,} which lie within the window of metastability."
@@ -100,51 +88,47 @@ @Article{goldschmidt1929
 volume  ="25",
 issue  ="0",
 pages  ="253-283",
-publisher  ="The Royal Society of Chemistry",
 doi  ="10.1039/TF9292500253",
 url  ="http://dx.doi.org/10.1039/TF9292500253",
 abstract  =""}
 
 @article{nethercot1974,
-  title = {Prediction of Fermi Energies and Photoelectric Thresholds Based on Electronegativity Concepts},
-  author = {Nethercot, Arthur H.},
-  journal = {Phys. Rev. Lett.},
-  volume = {33},
-  issue = {18},
-  pages = {1088--1091},
-  numpages = {0},
-  year = {1974},
-  month = {Oct},
-  publisher = {American Physical Society},
-  doi = {10.1103/PhysRevLett.33.1088},
-  url = {https://link.aps.org/doi/10.1103/PhysRevLett.33.1088}
+title = {Prediction of Fermi Energies and Photoelectric Thresholds Based on Electronegativity Concepts},
+author = {Nethercot, Arthur H.},
+journal = {Phys. Rev. Lett.},
+volume = {33},
+issue = {18},
+pages = {1088--1091},
+numpages = {0},
+year = {1974},
+month = {Oct},
+doi = {10.1103/PhysRevLett.33.1088},
+url = {https://link.aps.org/doi/10.1103/PhysRevLett.33.1088}
 }
 
 @article{ward2018,
 title = "Matminer: An open source toolkit for materials data mining",
-journal = "Computational Materials Science",
+journal = "Comp. Mater. Sci.",
 volume = "152",
 pages = "60 - 69",
 year = "2018",
 issn = "0927-0256",
 doi = "https://doi.org/10.1016/j.commatsci.2018.05.018",
 url = "http://www.sciencedirect.com/science/article/pii/S0927025618303252",
 author = "Logan Ward and Alexander Dunn and Alireza Faghaninia and Nils E.R. Zimmermann and Saurabh Bajaj and Qi Wang and Joseph Montoya and Jiming Chen and Kyle Bystrom and Maxwell Dylla and Kyle Chard and Mark Asta and Kristin A. Persson and G. Jeffrey Snyder and Ian Foster and Anubhav Jain",
-keywords = "Data mining, Open source software, Machine learning, Materials informatics",
 abstract = "As materials data sets grow in size and scope, the role of data mining and statistical learning methods to analyze these materials data sets and build predictive models is becoming more important. This manuscript introduces matminer, an open-source, Python-based software platform to facilitate data-driven methods of analyzing and predicting materials properties. Matminer provides modules for retrieving large data sets from external databases such as the Materials Project, Citrination, Materials Data Facility, and Materials Platform for Data Science. It also provides implementations for an extensive library of feature extraction routines developed by the materials community, with 47 featurization classes that can generate thousands of individual descriptors and combine them into mathematical functions. Finally, matminer provides a visualization module for producing interactive, shareable plots. These functions are designed in a way that integrates closely with machine learning and data analysis packages already developed and in use by the Python data science community. We explain the structure and logic of matminer, provide a description of its various modules, and showcase several examples of how matminer can be used to collect data, reproduce data mining studies reported in the literature, and test new methodologies."
 }
 
 @article{ong2013,
 title = "Python Materials Genomics (pymatgen): A robust, open-source python library for materials analysis",
-journal = "Computational Materials Science",
+journal = "Comp. Mater. Sci.",
 volume = "68",
 pages = "314 - 319",
 year = "2013",
 issn = "0927-0256",
 doi = "https://doi.org/10.1016/j.commatsci.2012.10.028",
 url = "http://www.sciencedirect.com/science/article/pii/S0927025612006295",
 author = "Shyue Ping Ong and William Davidson Richards and Anubhav Jain and Geoffroy Hautier and Michael Kocher and Shreyas Cholia and Dan Gunter and Vincent L. Chevrier and Kristin A. Persson and Gerbrand Ceder",
-keywords = "Materials, Project, Design, Thermodynamics, High-throughput",
 abstract = "We present the Python Materials Genomics (pymatgen) library, a robust, open-source Python library for materials analysis. A key enabler in high-throughput computational materials science efforts is a robust set of software tools to perform initial setup for the calculations (e.g., generation of structures and necessary input files) and post-calculation analysis to derive useful material properties from raw calculated data. The pymatgen library aims to meet these needs by (1) defining core Python objects for materials data representation, (2) providing a well-tested set of structure and thermodynamic analyses relevant to many applications, and (3) establishing an open platform for researchers to collaboratively develop sophisticated analyses of materials data obtained both from first principles calculations and experiments. The pymatgen library also provides convenient tools to obtain useful materials data via the Materials Project’s REpresentational State Transfer (REST) Application Programming Interface (API). As an example, using pymatgen’s interface to the Materials Project’s RESTful API and phasediagram package, we demonstrate how the phase and electrochemical stability of a recently synthesized material, Li4SnS4, can be analyzed using a minimum of computing resources. We find that Li4SnS4 is a stable phase in the Li–Sn–S phase diagram (consistent with the fact that it can be synthesized), but the narrow range of lithium chemical potentials for which it is predicted to be stable would suggest that it is not intrinsically stable against typical electrodes used in lithium-ion batteries."
 }
 
@@ -170,7 +154,7 @@ @Article{oboyle2011
 and Vandermeersch, Tim
 and Hutchison, Geoffrey R.",
 title="Open Babel: An open chemical toolbox",
-journal="Journal of Cheminformatics",
+journal="J. Cheminf.",
 year="2011",
 month="Oct",
 day="07",
@@ -191,11 +175,11 @@ @article{ase-paper
 Peterson and Carsten Rostgaard and Jakob Schiøtz and Ole Schütt and Mikkel Strange and Kristian S Thygesen and Tejs
 Vegge and Lasse Vilhelmsen and Michael Walter and Zhenhua Zeng and Karsten W Jacobsen},
   title={The atomic simulation environment—a Python library for working with atoms},
-  journal={Journal of Physics: Condensed Matter},
+  journal={J. Condens. Matter Phys.},
   volume={29},
   number={27},
   pages={273002},
   url={http://stacks.iop.org/0953-8984/29/i=27/a=273002},
   year={2017},
   abstract={The atomic simulation environment (ASE) is a software package written in the Python programming language with the aim of setting up, steering, and analyzing atomistic simulations. In ASE, tasks are fully scripted in Python. The powerful syntax of Python combined with the NumPy array library make it possible to perform very complex simulation tasks. For example, a sequence of calculations may be performed with the use of a simple ‘for-loop’ construction. Calculations of energy, forces, stresses and other quantities are performed through interfaces to many external electronic structure codes or force fields using a uniform interface. On top of this calculator interface, ASE provides modules for performing many standard simulation tasks such as structure optimization, molecular dynamics, handling of constraints and performing nudged elastic band calculations.}
-}
+}
diff --git a/smact/__init__.py b/smact/__init__.py
@@ -3,13 +3,12 @@
 
 A collection of fast screening tools from elemental data
 """
-
 import itertools
 import warnings
 from math import gcd
 from operator import mul as multiply
 from os import path
-from typing import Iterable, List, Optional, Sequence, Tuple, Union
+from typing import Iterable, List, Optional, Tuple, Union
 
 import pandas as pd
 
@@ -55,12 +54,14 @@ class Element:
 
         Element.oxidation_states_sp (list) : List of oxdation states recognised by the Pymatgen Structure Predictor
 
-        Element.oxidation_states_icsd (list) : List of oxidation states that appear in the ICSD
+        Element.oxidation_states_icsd (list) : List of oxidation states that appear in the 2016 version of the ICSD
 
         Element.oxidation_states_wiki (list): List of oxidation states that appear wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) Data retrieved: 2022-09-22
 
         Element.oxidation_states_custom (list | None ): List of oxidation states that appear in the custom data file supplied (if any)
 
+        Element.oxidation_states_icsd24 (list): List of oxidation states that appear in the 2024 version of the ICSD
+
         Element.coord_envs (list): The allowed coordination enviroments for the ion
 
         Element.covalent_radius (float) : Covalent radius of the element
@@ -169,6 +170,10 @@ def __init__(
                 "oxidation_states_wiki",
                 data_loader.lookup_element_oxidation_states_wiki(symbol),
             ),
+            (
+                "oxidation_states_icsd24",
+                data_loader.lookup_element_oxidation_states_icsd24(symbol),
+            ),
             ("dipol", dataset["dipol"]),
             ("pauling_eneg", dataset["el_neg"]),
             ("SSE", sse),

diff --git a/smact/data/HHIs.txt → smact/data/hhi.txt b/smact/data/HHIs.txt → smact/data/hhi.txt
diff --git a/smact/data/oxidation_states.txt b/smact/data/oxidation_states.txt
@@ -1,3 +1,7 @@
+#
+# Oxidation state set
+# Source: Original SMACT set manually compiled (2014)
+#
 H -1 +1
 He
 Li +1

diff --git a/smact/data/oxidation_states_SP.txt b/smact/data/oxidation_states_SP.txt
@@ -1,3 +1,7 @@
+#
+# Oxidation state set
+# Source: Pymatgen structure predictor (2017)
+#
 H 1
 He
 Li 1

diff --git a/smact/data/oxidation_states_icsd.txt b/smact/data/oxidation_states_icsd.txt
@@ -1,3 +1,7 @@
+#
+# Oxidation state set
+# Source: ICSD (2016)
+#
 H -1 1
 He
 Li 1

diff --git a/smact/data/oxidation_states_icsd24_common.txt b/smact/data/oxidation_states_icsd24_common.txt
@@ -0,0 +1,107 @@
+#
+# Oxidation state set
+# Source: ICSD (2024, V2), most common non-zero values
+#
+H 1
+He
+Li 1
+Be 2
+B 3
+C 4
+N -3
+O -2
+F -1
+Ne
+Na 1
+Mg 2
+Al 3
+Si 4
+P 5
+S -2
+Cl -1
+Ar
+K 1
+Ca 2
+Sc 3
+Ti 4
+V 5
+Cr 3
+Mn 2
+Fe 3
+Co 2
+Ni 2
+Cu 2
+Zn 2
+Ga 3
+Ge 4
+As 5
+Se -2
+Br -1
+Kr 2
+Rb 1
+Sr 2
+Y 3
+Zr 4
+Nb 5
+Mo 6
+Tc 7
+Ru 4
+Rh 3
+Pd 2
+Ag 1
+Cd 2
+In 3
+Sn 4
+Sb 3
+Te -2
+I -1
+Xe 6
+Cs 1
+Ba 2
+La 3
+Ce 3
+Pr 3
+Nd 3
+Pm 3
+Sm 3
+Eu 3
+Gd 3
+Tb 3
+Dy 3
+Ho 3
+Er 3
+Tm 3
+Yb 3
+Lu 3
+Hf 4
+Ta 5
+W 6
+Re 7
+Os 5
+Ir 4
+Pt 4
+Au 1
+Hg 2
+Tl 1
+Pb 2
+Bi 3
+Po 4
+At
+Rn
+Fr
+Ra 2
+Ac 3
+Th 4
+Pa 5
+U 6
+Np 6
+Pu 3
+Am 3
+Cm 3
+Bk 3
+Cf 3
+Es 3
+Fm
+Md
+No
+Lr
diff --git a/smact/data/oxidation_states_icsd24_counts.json b/smact/data/oxidation_states_icsd24_counts.json