diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..72982003 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,50 @@ +# Contributing + +This is a quick guide on how to follow best practice and contribute smoothly to `SMACT`. + +## Workflow + +We follow the [GitHub flow](), using +branches for new work and pull requests for verifying the work. + +The steps for a new piece of work can be summarised as follows: + +1. Push up or create [an issue](https://github.com/WMD-group/SMACT/issues). +2. Create a branch from main, with a sensible name that relates to the issue. +3. Do the work and commit changes to the branch. Push the branch + regularly to GitHub to make sure no work is accidentally lost. +4. Write or update unit tests for the code you work on. +5. When you are finished with the work, ensure that all of the unit + tests pass on your own machine. +6. Open a pull request [on the pull request page](https://github.com/WMD-group/SMACT/pulls). +7. If nobody acknowledges your pull request promptly, feel free to poke one of the main developers into action. + +## Pull requests + +For a general overview of using pull requests on GitHub look [in the GitHub docs](https://help.github.com/en/articles/about-pull-requests). + +When creating a pull request you should: + +- Ensure that the title succinctly describes the changes so it is easy to read on the overview page +- Reference the issue which the pull request is closing + +Recommended reading: [How to Write the Perfect Pull Request](https://github.blog/2015-01-21-how-to-write-the-perfect-pull-request/) + +## Dev requirements + +When developing locally, it is recommended to install the python packages in `requirements-dev.txt`. + +```bash +pip install -r requirements-dev.txt +``` + +This will allow you to run the tests locally with pytest as described in the main README, +as well as run pre-commit hooks to automatically format python files with isort and black. +To install the pre-commit hooks (only needs to be done once): + +```bash +pre-commit install +pre-commit run --all-files # optionally run hooks on all files +``` + +Pre-commit hooks will check all files when you commit changes, automatically fixing any files which are not formatted correctly. Those files will need to be staged again before re-attempting the commit. diff --git a/README.md b/README.md index 83bf6a03..87b58b01 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,7 @@ We are always looking for ways to make SMACT better and more useful to the wider - Code style should comply with [PEP8](http://www.python.org/dev/peps/pep-0008) where possible. [Google's house style](https://google.github.io/styleguide/pyguide.html) is also helpful, including a good model for docstrings. - Please use comments liberally when adding nontrivial features, and take the chance to clean up other people's code while looking at it. - Add tests wherever possible, and use the test suite to check if you broke anything. +- Look at the [contributing guide](CONTRIBUTING.md) for more information. ### Tests diff --git a/docs/conf.py b/docs/conf.py index eb7ccac8..9cd4530a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -63,9 +63,9 @@ # built documents. # # The short X.Y version. -version = "2.6" +version = "2.7" # The full version, including alpha/beta/rc tags. -release = "2.6.0" +release = "2.7.0" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/smact.rst b/docs/smact.rst index 215effd8..82783775 100644 --- a/docs/smact.rst +++ b/docs/smact.rst @@ -25,6 +25,7 @@ Submodules smact.structure_prediction smact.dopant_prediction + smact.utils smact.properties smact.screening smact.oxidation_states diff --git a/docs/smact.utils.composition.rst b/docs/smact.utils.composition.rst new file mode 100644 index 00000000..68f87a73 --- /dev/null +++ b/docs/smact.utils.composition.rst @@ -0,0 +1,9 @@ +SMACT Utilities Composition Module +===================================== + +Miscellaneous utilities for composition handling + +.. automodule:: smact.utils.composition + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/smact.utils.rst b/docs/smact.utils.rst new file mode 100644 index 00000000..06d0a868 --- /dev/null +++ b/docs/smact.utils.rst @@ -0,0 +1,11 @@ +SMACT Utilities module +=========================== + +The utilities module provides some utilty functions to support the core functionalities of SMACT + +Submodules +---------- + +.. toctree:: + + smact.utils.composition diff --git a/docs/tutorials/crystal_space_visualisation.ipynb b/docs/tutorials/crystal_space_visualisation.ipynb index c57e023e..8beee7d1 100644 --- a/docs/tutorials/crystal_space_visualisation.ipynb +++ b/docs/tutorials/crystal_space_visualisation.ipynb @@ -77,7 +77,7 @@ "metadata": {}, "outputs": [], "source": [ - "from typing import Iterable\n", + "from collections.abc import Iterable\n", "from pathlib import Path\n", "\n", "from tqdm import tqdm\n", diff --git a/examples/vec_example.py b/examples/vec_example.py new file mode 100644 index 00000000..318bc298 --- /dev/null +++ b/examples/vec_example.py @@ -0,0 +1,10 @@ +from smact.properties import valence_electron_count + +# Define the compound +compound = "Fe2O3" + +# Calculate the Valence Electron Count (VEC) +vec = valence_electron_count(compound) + +# Print the result +print(f"The Valence Electron Count (VEC) for {compound} is: {vec:.2f}") diff --git a/setup.py b/setup.py index 0fc21cf3..dee2d1eb 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,8 @@ __version__ = "2.6" __maintainer__ = "Anthony O. Onwuli" __maintainer_email__ = "anthony.onwuli16@imperial.ac.uk" -__date__ = "July 10 2024" +__date__ = "August 30 2024" + import os @@ -32,9 +33,11 @@ author_email=__author_email__, maintainer=__maintainer__, maintainer_email=__maintainer_email__, + maintainer_email=__maintainer_email__, license="MIT", packages=[ "smact", + "smact.utils", "smact.tests", "smact.structure_prediction", "smact.dopant_prediction", @@ -56,7 +59,7 @@ "scipy", "numpy<2", "spglib", - "pymatgen>=2024.2.20", + "pymatgen>=2024.2.20,<2024.8.8", "ase", "pandas", "pathos", diff --git a/smact/__init__.py b/smact/__init__.py index 8f5ac623..7e1f37bc 100644 --- a/smact/__init__.py +++ b/smact/__init__.py @@ -60,9 +60,11 @@ class Element: Element.oxidation_states (list) : Default list of allowed oxidation states for use in SMACT - Element.oxidation_states_sp (list) : List of oxdation states recognised by the Pymatgen Structure Predictor + Element.oxidation_states_smact14 (list): Original list of oxidation states that were manually compiled for SMACT in 2014 (default in SMACT < 3.0) - Element.oxidation_states_icsd (list) : List of oxidation states that appear in the ICSD + Element.oxidation_states_sp (list) : List of oxidation states recognised by the Pymatgen Structure Predictor + + Element.oxidation_states_icsd (list) : List of oxidation states that appear in the 2016 version of ICSD Element.oxidation_states_wiki (list): List of oxidation states that appear wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) Data retrieved: 2022-09-22 @@ -80,6 +82,18 @@ class Element: Element.HHI_r (float) : Hirfindahl-Hirschman Index for elemental reserves + Element.mendeleev (int): Mendeleev number + + Element.AtomicWeight (float): Atomic weight + + Element.MeltingT (float): Melting temperature in K + + Element.num_valence (int): Number of valence electrons + + Element.num_valence_modified (int): Number of valence electrons based on a modified definition + + + Raises: ------ NameError: Element not found in element.txt @@ -133,6 +147,23 @@ def __init__(self, symbol: str, oxi_states_custom_filepath: str | None = None): sse_Pauling_data = data_loader.lookup_element_sse_pauling_data(symbol) sse_Pauling = sse_Pauling_data["SolidStateEnergyPauling"] if sse_Pauling_data else None + magpie_data = data_loader.lookup_element_magpie_data(symbol) + if magpie_data: + mendeleev = magpie_data["MendeleevNumber"] + AtomicWeight = magpie_data["AtomicWeight"] + MeltingT = magpie_data["MeltingT"] + num_valence = magpie_data["NValence"] + else: + mendeleev = None + AtomicWeight = None + MeltingT = None + num_valence = None + + valence_data = data_loader.lookup_element_valence_data(symbol) + num_valence_modified = ( + valence_data["NValence"] if valence_data else None + ) + for attribute, value in ( ("coord_envs", coord_envs), ("covalent_radius", dataset["r_cov"]), @@ -150,6 +181,10 @@ def __init__(self, symbol: str, oxi_states_custom_filepath: str | None = None): "oxidation_states", data_loader.lookup_element_oxidation_states(symbol), ), + ( + "oxidation_states_smact14", + data_loader.lookup_element_oxidation_states(symbol), + ), ( "oxidation_states_icsd", data_loader.lookup_element_oxidation_states_icsd(symbol), @@ -167,6 +202,11 @@ def __init__(self, symbol: str, oxi_states_custom_filepath: str | None = None): ("SSE", sse), ("SSEPauling", sse_Pauling), ("symbol", symbol), + ("mendeleev", mendeleev), + ("AtomicWeight", AtomicWeight), + ("MeltingT", MeltingT), + ("num_valence", num_valence), + ("num_valence_modified", num_valence_modified), # ('vdw_radius', dataset['RVdW']), ): setattr(self, attribute, value) diff --git a/smact/data/element_valence_modified.csv b/smact/data/element_valence_modified.csv new file mode 100644 index 00000000..9a642db3 --- /dev/null +++ b/smact/data/element_valence_modified.csv @@ -0,0 +1,98 @@ +element,NValence +H,1 +He,2 +Li,1 +Be,2 +B,3 +C,4 +N,5 +O,6 +F,7 +Ne,8 +Na,1 +Mg,2 +Al,3 +Si,4 +P,5 +S,6 +Cl,7 +Ar,8 +K,1 +Ca,2 +Sc,3 +Ti,4 +V,5 +Cr,6 +Mn,7 +Fe,8 +Co,9 +Ni,10 +Cu,11 +Zn,12 +Ga,3 +Ge,4 +As,5 +Se,6 +Br,7 +Kr,8 +Rb,1 +Sr,2 +Y,3 +Zr,4 +Nb,5 +Mo,6 +Tc,7 +Ru,8 +Rh,9 +Pd,10 +Ag,11 +Cd,12 +In,3 +Sn,4 +Sb,5 +Te,6 +I,7 +Xe,8 +Cs,1 +Ba,2 +La,3 +Ce,4 +Pr,5 +Nd,6 +Pm,7 +Sm,8 +Eu,9 +Gd,10 +Tb,11 +Dy,12 +Ho,13 +Er,14 +Tm,15 +Yb,16 +Lu,3 +Hf,4 +Ta,5 +W,6 +Re,7 +Os,8 +Ir,9 +Pt,10 +Au,11 +Hg,12 +Tl,3 +Pb,4 +Bi,5 +Po,6 +At,7 +Rn,8 +Fr,1 +Ra,2 +Ac,3 +Th,4 +Pa,5 +U,6 +Np,7 +Pu,8 +Am,9 +Cm,10 +Bk,11 diff --git a/smact/data/magpie.csv b/smact/data/magpie.csv new file mode 100644 index 00000000..9c75e52e --- /dev/null +++ b/smact/data/magpie.csv @@ -0,0 +1,98 @@ +element,Number,MendeleevNumber,AtomicWeight,MeltingT,Column,Row,CovalentRadius,Electronegativity,NsValence,NpValence,NdValence,NfValence,NValence,NsUnfilled,NpUnfilled,NdUnfilled,NfUnfilled,NUnfilled,GSvolume_pa,GSbandgap,GSmagmom,SpaceGroupNumber +H,1.0,92.0,1.00794,14.01,1.0,1.0,31.0,2.2,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,6.615,7.853,0.0,194.0 +He,2.0,98.0,4.002602,1211.4,18.0,1.0,28.0,1.63,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,12.305,18.098,0.0,225.0 +Li,3.0,1.0,6.941,453.69,1.0,2.0,128.0,0.98,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,16.5933333333,0.0,0.0,229.0 +Be,4.0,67.0,9.012182,1560.0,2.0,2.0,96.0,1.57,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,7.89,0.0,0.0,194.0 +B,5.0,72.0,10.811,2348.0,13.0,2.0,84.0,2.04,2.0,1.0,0.0,0.0,3.0,0.0,5.0,0.0,0.0,5.0,7.1725,1.524,0.0,166.0 +C,6.0,77.0,12.0107,3823.0,14.0,2.0,76.0,2.55,2.0,2.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,4.0,5.64,4.496,0.0,194.0 +N,7.0,82.0,14.0067,63.05,15.0,2.0,71.0,3.04,2.0,3.0,0.0,0.0,5.0,0.0,3.0,0.0,0.0,3.0,14.76875,6.437,0.0,194.0 +O,8.0,87.0,15.9994,54.8,16.0,2.0,66.0,3.44,2.0,4.0,0.0,0.0,6.0,0.0,2.0,0.0,0.0,2.0,9.105,0.0,0.0,12.0 +F,9.0,93.0,18.9984032,53.5,17.0,2.0,57.0,3.98,2.0,5.0,0.0,0.0,7.0,0.0,1.0,0.0,0.0,1.0,9.7075,1.97,0.0,15.0 +Ne,10.0,99.0,20.1791,24.56,18.0,2.0,58.0,1.63,2.0,6.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,12.64,13.088,0.0,225.0 +Na,11.0,2.0,22.98976928,370.87,1.0,3.0,166.0,0.93,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,29.2433333333,0.0,0.0,229.0 +Mg,12.0,68.0,24.305,923.0,2.0,3.0,141.0,1.31,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,22.89,0.0,0.0,194.0 +Al,13.0,73.0,26.9815386,933.47,13.0,3.0,121.0,1.61,2.0,1.0,0.0,0.0,3.0,0.0,5.0,0.0,0.0,5.0,16.48,0.0,0.0,225.0 +Si,14.0,78.0,28.0855,1687.0,14.0,3.0,111.0,1.9,2.0,2.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,4.0,20.44,0.773,0.0,227.0 +P,15.0,83.0,30.973762,317.3,15.0,3.0,107.0,2.19,2.0,3.0,0.0,0.0,5.0,0.0,3.0,0.0,0.0,3.0,22.5702380952,1.625,0.0,2.0 +S,16.0,88.0,32.065,388.36,16.0,3.0,105.0,2.58,2.0,4.0,0.0,0.0,6.0,0.0,2.0,0.0,0.0,2.0,25.786875,2.202,0.0,70.0 +Cl,17.0,94.0,35.453,171.6,17.0,3.0,102.0,3.16,2.0,5.0,0.0,0.0,7.0,0.0,1.0,0.0,0.0,1.0,24.4975,2.493,0.0,64.0 +Ar,18.0,100.0,39.948,83.8,18.0,3.0,106.0,1.63,2.0,6.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,28.54,9.26,0.0,225.0 +K,19.0,3.0,39.0983,336.53,1.0,4.0,203.0,0.82,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,73.1066666667,0.0,0.0,229.0 +Ca,20.0,7.0,40.078,1115.0,2.0,4.0,176.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,37.77,0.0,0.0,225.0 +Sc,21.0,11.0,44.955912,1814.0,3.0,4.0,170.0,1.36,2.0,0.0,1.0,0.0,3.0,0.0,0.0,9.0,0.0,9.0,22.235,0.0,6.35e-06,194.0 +Ti,22.0,43.0,47.867,1941.0,4.0,4.0,160.0,1.54,2.0,0.0,2.0,0.0,4.0,0.0,0.0,8.0,0.0,8.0,16.69,0.0,2.25333333333e-05,194.0 +V,23.0,46.0,50.9415,2183.0,5.0,4.0,153.0,1.63,2.0,0.0,3.0,0.0,5.0,0.0,0.0,7.0,0.0,7.0,13.01,0.0,0.0,229.0 +Cr,24.0,49.0,51.9961,2180.0,6.0,4.0,139.0,1.66,1.0,0.0,5.0,0.0,6.0,1.0,0.0,5.0,0.0,6.0,11.19,0.0,0.0,229.0 +Mn,25.0,52.0,54.938045,1519.0,7.0,4.0,139.0,1.55,2.0,0.0,5.0,0.0,7.0,0.0,0.0,5.0,0.0,5.0,10.4875862069,0.0,0.000310120689655,217.0 +Fe,26.0,55.0,55.845,1811.0,8.0,4.0,132.0,1.83,2.0,0.0,6.0,0.0,8.0,0.0,0.0,4.0,0.0,4.0,10.73,0.0,2.1106628,229.0 +Co,27.0,58.0,58.933195,1768.0,9.0,4.0,126.0,1.88,2.0,0.0,7.0,0.0,9.0,0.0,0.0,3.0,0.0,3.0,10.245,0.0,1.5484712,194.0 +Ni,28.0,61.0,58.6934,1728.0,10.0,4.0,124.0,1.91,2.0,0.0,8.0,0.0,10.0,0.0,0.0,2.0,0.0,2.0,10.32,0.0,0.5953947,225.0 +Cu,29.0,64.0,63.546,1357.77,11.0,4.0,132.0,1.9,1.0,0.0,10.0,0.0,11.0,1.0,0.0,0.0,0.0,1.0,11.07,0.0,0.0,225.0 +Zn,30.0,69.0,65.38,692.68,12.0,4.0,122.0,1.65,2.0,0.0,10.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,13.96,0.0,0.0,194.0 +Ga,31.0,74.0,69.723,302.91,13.0,4.0,122.0,1.81,2.0,1.0,10.0,0.0,13.0,0.0,5.0,0.0,0.0,5.0,18.8575,0.0,0.0,64.0 +Ge,32.0,79.0,72.64,1211.4,14.0,4.0,120.0,2.01,2.0,2.0,10.0,0.0,14.0,0.0,4.0,0.0,0.0,4.0,23.005,0.383,0.0,225.0 +As,33.0,84.0,74.9216,1090.0,15.0,4.0,119.0,2.18,2.0,3.0,10.0,0.0,15.0,0.0,3.0,0.0,0.0,3.0,22.175,0.0,0.0,166.0 +Se,34.0,89.0,78.96,494.0,16.0,4.0,120.0,2.55,2.0,4.0,10.0,0.0,16.0,0.0,2.0,0.0,0.0,2.0,25.92,0.799,0.0,14.0 +Br,35.0,95.0,79.904,265.8,17.0,4.0,120.0,2.96,2.0,5.0,10.0,0.0,17.0,0.0,1.0,0.0,0.0,1.0,29.48,1.457,0.0,64.0 +Kr,36.0,101.0,83.798,115.79,18.0,4.0,116.0,3.0,2.0,6.0,10.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,36.06,7.535,0.0,225.0 +Rb,37.0,4.0,85.4678,312.46,1.0,5.0,220.0,0.82,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,90.7225,0.0,0.0,229.0 +Sr,38.0,8.0,87.62,1050.0,2.0,5.0,195.0,0.95,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,54.23,0.0,0.0,225.0 +Y,39.0,12.0,88.90585,1799.0,3.0,5.0,190.0,1.22,2.0,0.0,1.0,0.0,3.0,0.0,0.0,9.0,0.0,9.0,32.365,0.0,0.0,194.0 +Zr,40.0,44.0,91.224,2128.0,4.0,5.0,175.0,1.33,2.0,0.0,2.0,0.0,4.0,0.0,0.0,8.0,0.0,8.0,23.195,0.0,0.0,194.0 +Nb,41.0,47.0,92.90638,2750.0,5.0,5.0,164.0,1.6,1.0,0.0,4.0,0.0,5.0,1.0,0.0,6.0,0.0,7.0,18.18,0.0,0.0,229.0 +Mo,42.0,50.0,95.96,2896.0,6.0,5.0,154.0,2.16,1.0,0.0,5.0,0.0,6.0,1.0,0.0,5.0,0.0,6.0,15.69,0.0,0.0,229.0 +Tc,43.0,53.0,98.0,2430.0,7.0,5.0,147.0,1.9,2.0,0.0,5.0,0.0,7.0,0.0,0.0,5.0,0.0,5.0,14.285,0.0,0.0,194.0 +Ru,44.0,56.0,101.07,2607.0,8.0,5.0,146.0,2.2,1.0,0.0,7.0,0.0,8.0,1.0,0.0,3.0,0.0,4.0,13.51,0.0,0.0,194.0 +Rh,45.0,59.0,102.9055,2237.0,9.0,5.0,142.0,2.28,1.0,0.0,8.0,0.0,9.0,1.0,0.0,2.0,0.0,3.0,13.64,0.0,0.0,225.0 +Pd,46.0,62.0,106.42,1828.05,10.0,5.0,139.0,2.2,0.0,0.0,10.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,14.41,0.0,0.0,225.0 +Ag,47.0,65.0,107.8682,1234.93,11.0,5.0,145.0,1.93,1.0,0.0,10.0,0.0,11.0,1.0,0.0,0.0,0.0,1.0,16.33,0.0,0.0,225.0 +Cd,48.0,70.0,112.411,594.22,12.0,5.0,144.0,1.69,2.0,0.0,10.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,19.495,0.0,0.0,194.0 +In,49.0,75.0,114.818,429.75,13.0,5.0,142.0,1.78,2.0,1.0,10.0,0.0,13.0,0.0,5.0,0.0,0.0,5.0,24.26,0.0,0.0,139.0 +Sn,50.0,80.0,118.71,505.08,14.0,5.0,139.0,1.96,2.0,2.0,10.0,0.0,14.0,0.0,4.0,0.0,0.0,4.0,33.285,0.0,0.0,141.0 +Sb,51.0,85.0,121.76,903.78,15.0,5.0,139.0,2.05,2.0,3.0,10.0,0.0,15.0,0.0,3.0,0.0,0.0,3.0,31.56,0.0,0.0,166.0 +Te,52.0,90.0,127.6,722.66,16.0,5.0,138.0,2.1,2.0,4.0,10.0,0.0,16.0,0.0,2.0,0.0,0.0,2.0,34.7633333333,0.464,0.0,152.0 +I,53.0,96.0,126.90447,386.85,17.0,5.0,139.0,2.66,2.0,5.0,10.0,0.0,17.0,0.0,1.0,0.0,0.0,1.0,43.015,1.062,0.0,64.0 +Xe,54.0,102.0,131.293,161.3,18.0,5.0,140.0,2.6,2.0,6.0,10.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,53.65,6.456,0.0,225.0 +Cs,55.0,5.0,132.9054519,301.59,1.0,6.0,244.0,0.79,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,115.765,0.0,0.0,229.0 +Ba,56.0,9.0,137.327,1000.0,2.0,6.0,215.0,0.89,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,63.59,0.0,0.0,229.0 +La,57.0,13.0,138.90547,1193.0,3.0,6.0,207.0,1.1,2.0,0.0,1.0,0.0,3.0,0.0,0.0,9.0,0.0,9.0,36.8975,0.0,0.0,194.0 +Ce,58.0,15.0,140.116,1071.0,3.0,6.0,204.0,1.12,2.0,0.0,1.0,1.0,4.0,0.0,0.0,9.0,13.0,22.0,37.24,0.0,0.0,194.0 +Pr,59.0,17.0,140.90765,1204.0,3.0,6.0,203.0,1.13,2.0,0.0,0.0,3.0,5.0,0.0,0.0,0.0,11.0,11.0,35.675,0.0,0.0,194.0 +Nd,60.0,19.0,144.242,1294.0,3.0,6.0,201.0,1.14,2.0,0.0,0.0,4.0,6.0,0.0,0.0,0.0,10.0,10.0,34.81,0.0,0.0,194.0 +Pm,61.0,21.0,145.0,1373.0,3.0,6.0,199.0,1.155,2.0,0.0,0.0,5.0,7.0,0.0,0.0,0.0,9.0,9.0,33.8425,0.0,0.0,194.0 +Sm,62.0,23.0,150.36,1345.0,3.0,6.0,198.0,1.17,2.0,0.0,0.0,6.0,8.0,0.0,0.0,0.0,8.0,8.0,33.23,0.0,0.0,166.0 +Eu,63.0,25.0,151.964,1095.0,3.0,6.0,198.0,1.185,2.0,0.0,0.0,7.0,9.0,0.0,0.0,0.0,7.0,7.0,36.46,0.0,0.0,229.0 +Gd,64.0,27.0,157.25,1586.0,3.0,6.0,196.0,1.2,2.0,0.0,1.0,7.0,10.0,0.0,0.0,9.0,7.0,16.0,32.05,0.0,0.0,194.0 +Tb,65.0,29.0,158.92535,1629.0,3.0,6.0,194.0,1.21,2.0,0.0,0.0,9.0,11.0,0.0,0.0,0.0,5.0,5.0,31.7366666667,0.0,0.0,194.0 +Dy,66.0,31.0,162.5,1685.0,3.0,6.0,192.0,1.22,2.0,0.0,0.0,10.0,12.0,0.0,0.0,0.0,4.0,4.0,31.24,0.0,0.0,194.0 +Ho,67.0,33.0,164.93032,1747.0,3.0,6.0,192.0,1.23,2.0,0.0,0.0,11.0,13.0,0.0,0.0,0.0,3.0,3.0,30.7333333333,0.0,0.0,194.0 +Er,68.0,35.0,167.259,1770.0,3.0,6.0,189.0,1.24,2.0,0.0,0.0,12.0,14.0,0.0,0.0,0.0,2.0,2.0,30.585,0.0,0.0,194.0 +Tm,69.0,37.0,168.93421,1818.0,3.0,6.0,190.0,1.25,2.0,0.0,0.0,13.0,15.0,0.0,0.0,0.0,1.0,1.0,29.78,0.0,0.0,194.0 +Yb,70.0,39.0,173.054,1092.0,3.0,6.0,187.0,1.26,2.0,0.0,0.0,14.0,16.0,0.0,0.0,0.0,0.0,0.0,34.12,0.0,0.0,225.0 +Lu,71.0,41.0,174.9668,1936.0,3.0,6.0,187.0,1.27,2.0,0.0,1.0,14.0,17.0,0.0,0.0,9.0,0.0,9.0,28.865,0.0,0.0022471,194.0 +Hf,72.0,45.0,178.49,2506.0,4.0,6.0,175.0,1.3,2.0,0.0,2.0,14.0,18.0,0.0,0.0,8.0,0.0,8.0,22.2,0.0,0.0,194.0 +Ta,73.0,48.0,180.94788,3290.0,5.0,6.0,170.0,1.5,2.0,0.0,3.0,14.0,19.0,0.0,0.0,7.0,0.0,7.0,18.12,0.0,0.0,229.0 +W,74.0,51.0,183.84,3695.0,6.0,6.0,162.0,2.36,2.0,0.0,4.0,14.0,20.0,0.0,0.0,6.0,0.0,6.0,16.05,0.0,0.0,229.0 +Re,75.0,54.0,186.207,3459.0,7.0,6.0,151.0,1.9,2.0,0.0,5.0,14.0,21.0,0.0,0.0,5.0,0.0,5.0,14.655,0.0,0.0,194.0 +Os,76.0,57.0,190.23,3306.0,8.0,6.0,144.0,2.2,2.0,0.0,6.0,14.0,22.0,0.0,0.0,4.0,0.0,4.0,14.09,0.0,0.0,194.0 +Ir,77.0,60.0,192.217,2739.0,9.0,6.0,141.0,2.2,2.0,0.0,7.0,14.0,23.0,0.0,0.0,3.0,0.0,3.0,14.21,0.0,0.0,225.0 +Pt,78.0,63.0,195.084,2041.4,10.0,6.0,136.0,2.28,1.0,0.0,9.0,14.0,24.0,1.0,0.0,1.0,0.0,2.0,15.02,0.0,0.0,225.0 +Au,79.0,66.0,196.966569,1337.33,11.0,6.0,136.0,2.54,1.0,0.0,10.0,14.0,25.0,1.0,0.0,0.0,0.0,1.0,16.7,0.0,0.0,225.0 +Hg,80.0,71.0,200.59,234.32,12.0,6.0,132.0,2.0,2.0,0.0,10.0,14.0,26.0,0.0,0.0,0.0,0.0,0.0,25.2375862069,0.0,0.0,166.0 +Tl,81.0,76.0,204.3833,577.0,13.0,6.0,145.0,1.62,2.0,1.0,10.0,14.0,27.0,0.0,5.0,0.0,0.0,5.0,26.91,0.0,0.0,194.0 +Pb,82.0,81.0,207.2,600.61,14.0,6.0,146.0,2.33,2.0,2.0,10.0,14.0,28.0,0.0,4.0,0.0,0.0,4.0,28.11,0.0,0.0,225.0 +Bi,83.0,86.0,208.9804,544.4,15.0,6.0,148.0,2.02,2.0,3.0,10.0,14.0,29.0,0.0,3.0,0.0,0.0,3.0,32.95,0.0,0.0,12.0 +Po,84.0,91.0,209.0,527.0,16.0,6.0,140.0,2.0,2.0,4.0,10.0,14.0,30.0,0.0,2.0,0.0,0.0,2.0,38.73125,0.0,0.0,221.0 +At,85.0,97.0,210.0,575.0,17.0,6.0,150.0,2.2,2.0,5.0,10.0,14.0,31.0,0.0,1.0,0.0,0.0,1.0,38.73125,0.0,0.0,194.0 +Rn,86.0,103.0,222.0,202.0,18.0,6.0,150.0,1.63,2.0,6.0,10.0,14.0,32.0,0.0,0.0,0.0,0.0,0.0,38.73125,0.0,0.0,194.0 +Fr,87.0,6.0,223.0,1211.4,1.0,7.0,260.0,0.7,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,38.73125,0.0,0.0,194.0 +Ra,88.0,10.0,226.0,973.0,2.0,7.0,221.0,0.9,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,38.73125,0.0,0.0,229.0 +Ac,89.0,14.0,227.0,1323.0,3.0,7.0,215.0,1.1,2.0,0.0,1.0,0.0,3.0,0.0,0.0,9.0,0.0,9.0,44.5125,0.0,0.0,225.0 +Th,90.0,16.0,232.03806,2023.0,3.0,7.0,206.0,1.3,2.0,0.0,2.0,0.0,4.0,0.0,0.0,8.0,0.0,8.0,32.37,0.0,0.0,225.0 +Pa,91.0,18.0,231.03586,1845.0,3.0,7.0,200.0,1.5,2.0,0.0,1.0,2.0,5.0,0.0,0.0,9.0,12.0,21.0,25.18,0.0,0.0,139.0 +U,92.0,20.0,238.02891,1408.0,3.0,7.0,196.0,1.38,2.0,0.0,1.0,3.0,6.0,0.0,0.0,9.0,11.0,20.0,20.025,0.0,0.0,63.0 +Np,93.0,22.0,237.0,917.0,3.0,7.0,190.0,1.36,2.0,0.0,1.0,4.0,7.0,0.0,0.0,9.0,10.0,19.0,18.45375,0.0,0.0,62.0 +Pu,94.0,24.0,244.0,913.0,3.0,7.0,187.0,1.28,2.0,0.0,0.0,6.0,8.0,0.0,0.0,0.0,8.0,8.0,18.08,0.0,0.3180036375,11.0 +Am,95.0,26.0,243.0,1449.0,3.0,7.0,180.0,1.3,2.0,0.0,0.0,7.0,9.0,0.0,0.0,0.0,7.0,7.0,18.08,0.0,0.3180036375,194.0 +Cm,96.0,28.0,247.0,1618.0,3.0,7.0,169.0,1.3,2.0,0.0,1.0,7.0,10.0,0.0,0.0,9.0,7.0,16.0,18.08,0.0,0.3180036375,194.0 +Bk,97.0,30.0,247.0,1323.0,3.0,7.0,146.0,1.3,2.0,0.0,0.0,9.0,11.0,0.0,0.0,0.0,5.0,5.0,18.08,0.0,0.3180036375,194.0 diff --git a/smact/data_loader.py b/smact/data_loader.py index b022ffcf..c3f9d5df 100644 --- a/smact/data_loader.py +++ b/smact/data_loader.py @@ -15,6 +15,8 @@ import csv import os +import pandas as pd + from smact import data_directory # Module-level switch: print "verbose" warning messages @@ -789,3 +791,134 @@ def lookup_element_sse_pauling_data(symbol): ) return None + + +_element_magpie_data = None + + +def lookup_element_magpie_data(symbol: str, copy: bool = True): + """ + Retrieve element data contained in the Magpie representation. + + Taken from Ward, L., Agrawal, A., Choudhary, A. et al. + A general-purpose machine learning framework for + predicting properties of inorganic materials. + npj Comput Mater 2, 16028 (2016). + https://doi.org/10.1038/npjcompumats.2016.28 + + Args: + symbol : the atomic symbol of the element to look up. + copy: if True (default), return a copy of the data dictionary, + rather than a reference to a cached object -- only use + copy=False in performance-sensitive code and where you are + certain the dictionary will not be modified! + + Returns: + list: + Magpie features. + Returns None if the element was not found among the external + data. + + Magpie features are dictionaries with the keys: + + + + + """ + + global _element_magpie_data + + if _element_magpie_data is None: + _element_magpie_data = {} + + df = pd.read_csv(os.path.join(data_directory, "magpie.csv")) + for _index, row in df.iterrows(): + key = row.iloc[0] + + dataset = { + "Number": int(row.iloc[1]), + "MendeleevNumber": int(row.iloc[2]), + "AtomicWeight": float(row.iloc[3]), + "MeltingT": float(row.iloc[4]), + "Column": int(row.iloc[5]), + "Row": int(row.iloc[6]), + "CovalentRadius": float(row.iloc[7]), + "Electronegativity": float(row.iloc[8]), + "NsValence": int(row.iloc[9]), + "NpValence": int(row.iloc[10]), + "NdValence": int(row.iloc[11]), + "NfValence": int(row.iloc[12]), + "NValence": int(row.iloc[13]), + "NsUnfilled": int(row.iloc[14]), + "NpUnfilled": int(row.iloc[15]), + "NdUnfilled": int(row.iloc[16]), + "NfUnfilled": int(row.iloc[17]), + "NUnfilled": int(row.iloc[18]), + "GSvolume_pa": float(row.iloc[19]), + "GSbandgap": float(row.iloc[20]), + "GSmagmom": float(row.iloc[21]), + "SpaceGroupNumber": int(row.iloc[22]), + } + _element_magpie_data[key] = dataset + + if symbol in _element_magpie_data: + return _element_magpie_data[symbol] + else: + if _print_warnings: + print( + "WARNING: Magpie data for element {} not " + "found.".format(symbol) + ) + + return None + + +_element_valence_data = None + + +def lookup_element_valence_data(symbol: str, copy: bool = True): + """ + Retrieve valence electron data. + + For d-block elements, the s and d electrons contribute to NValence. + For p-block elements, the s and p electrons contribute to NValence. + For s- and f-block elements, NValence is calculated from the Noble Gas electron configuration + i.e. + + Args: + symbol : the atomic symbol of the element to look up. + copy: if True (default), return a copy of the data dictionary, + rather than a reference to a cached object -- only use + copy=False in performance-sensitive code and where you are + certain the dictionary will not be modified! + + Returns: + NValence (int): the number of valence electrons + Returns None if the element was not found among the external + data. + """ + + global _element_valence_data + + if _element_valence_data is None: + _element_valence_data = {} + + df = pd.read_csv( + os.path.join(data_directory, "element_valence_modified.csv") + ) + for _index, row in df.iterrows(): + key = row.iloc[0] + + dataset = {"NValence": int(row.iloc[1])} + _element_valence_data[key] = dataset + + if symbol in _element_valence_data: + return _element_valence_data[symbol] + else: + if _print_warnings: + print( + "WARNING: Valence data for element {} not " + "found.".format(symbol) + ) + + return None diff --git a/smact/properties.py b/smact/properties.py index 2dead3e8..d3ad5a40 100644 --- a/smact/properties.py +++ b/smact/properties.py @@ -5,6 +5,7 @@ import numpy as np import smact +from smact.utils.composition import parse_formula def eneg_mulliken(element: smact.Element | str) -> float: @@ -159,3 +160,48 @@ def compound_electroneg( print("Geometric mean = Compound 'electronegativity'=", compelectroneg) return compelectroneg + + +def valence_electron_count(compound: str) -> float: + """ + Calculate the Valence Electron Count (VEC) for a given chemical compound. + + This function parses the input compound, extracts the elements and their + stoichiometries, and calculates the VEC using the valence electron data + from SMACT's Element class. + + Args: + compound (str): Chemical formula of the compound (e.g., "Fe2O3"). + + Returns: + float: Valence Electron Count (VEC) for the compound. + + Raises: + ValueError: If an element in the compound is not found in the valence data. + """ + + def get_element_valence(element: str) -> int: + try: + return smact.Element(element).num_valence_modified + except NameError: + raise ValueError( + f"Valence data not found for element: {element}" + ) from None + + element_stoich = parse_formula(compound) + + total_valence = 0 + total_stoich = 0 + for element, stoich in element_stoich.items(): + try: + valence = get_element_valence(element) + total_valence += stoich * valence + total_stoich += stoich + except TypeError: + raise ValueError(f"No valence information for element {element}") + + if total_stoich == 0: + return 0.0 + + vec = total_valence / total_stoich + return vec diff --git a/smact/screening.py b/smact/screening.py index eb979907..aedaaa3f 100644 --- a/smact/screening.py +++ b/smact/screening.py @@ -335,21 +335,25 @@ def smact_filter( threshold: int | None = 8, stoichs: list[list[int]] | None = None, species_unique: bool = True, - oxidation_states_set: str = "default", -) -> list[tuple[str, int, int]] | list[tuple[str, int]]: - """ - Function that applies the charge neutrality and electronegativity + oxidation_states_set: str = "smact14", + comp_tuple: bool = False, +) -> Union[List[Tuple[str, int, int]], List[Tuple[str, int]]]: + """Function that applies the charge neutrality and electronegativity tests in one go for simple application in external scripts that wish to apply the general 'smact test'. + .. warning:: + For backwards compatability in SMACT >=2.7, expllicitly set oxidation_states_set to 'smact14' if you wish to use the 2014 SMACT default oxidation states. + In SMACT 3.0, the smact_filter function will be set to use a new default oxidation states set. + Args: ---- els (tuple/list): A list of smact.Element objects threshold (int): Threshold for stoichiometry limit, default = 8 stoichs (list[int]): A selection of valid stoichiometric ratios for each site. species_unique (bool): Whether or not to consider elements in different oxidation states as unique in the results. - oxidation_states_set (string): A string to choose which set of oxidation states should be chosen. Options are 'default', 'icsd', 'pymatgen' and 'wiki' for the default, icsd, pymatgen structure predictor and Wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) oxidation states respectively. A filepath to an oxidation states text file can also be supplied as well. - + oxidation_states_set (string): A string to choose which set of oxidation states should be chosen. Options are 'smact14', 'icsd', 'pymatgen' and 'wiki' for the 2014 SMACT default, 2016 ICSD, pymatgen structure predictor and Wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) oxidation states respectively. A filepath to an oxidation states text file can also be supplied as well. + comp_tuple (bool): Whether or not to return the results as a named tuple of elements and stoichiometries (True) or as a normal tuple of elements and stoichiometries (False). Returns: ------- allowed_comps (list): Allowed compositions for that chemical system @@ -388,7 +392,7 @@ def smact_filter( # Select the specified oxidation states set: oxi_set = { - "default": [e.oxidation_states for e in els], + "smact14": [e.oxidation_states_smact14 for e in els], "icsd": [e.oxidation_states_icsd for e in els], "pymatgen": [e.oxidation_states_sp for e in els], "wiki": [e.oxidation_states_wiki for e in els], @@ -400,7 +404,7 @@ def smact_filter( else: raise ( Exception( - f'{oxidation_states_set} is not valid. Enter either "default", "icsd", "pymatgen","wiki" or a filepath to a textfile of oxidation states.' + f'{oxidation_states_set} is not valid. Enter either "smact14", "icsd", "pymatgen","wiki" or a filepath to a textfile of oxidation states.' ) ) if oxidation_states_set == "wiki": @@ -432,21 +436,25 @@ def smact_validity( composition: pymatgen.core.Composition | str, use_pauling_test: bool = True, include_alloys: bool = True, - oxidation_states_set: str | bytes | os.PathLike = "default", + oxidation_states_set: Union[str, bytes, os.PathLike] = "smact14", ) -> bool: """ Check if a composition is valid according to the SMACT rules. Composition is considered valid if it passes the charge neutrality test and the Pauling electronegativity test. + .. warning:: + For backwards compatability in SMACT >=2.7, expllicitly set oxidation_states_set to 'smact14' if you wish to use the 2014 SMACT default oxidation states. + In SMACT 3.0, the smact_filter function will be set to use a new default oxidation states set. + Args: ---- composition (Union[pymatgen.core.Composition, str]): Composition/formula to check. This can be a pymatgen Composition object or a string. use_pauling_test (bool): Whether to use the Pauling electronegativity test include_alloys (bool): If True, compositions which only contain metal elements will be considered valid without further checks. oxidation_states_set (Union[str, bytes, os.PathLike]): A string to choose which set of - oxidation states should be chosen for charge-balancing. Options are 'default', 'icsd', - 'pymatgen' and 'wiki' for the default, icsd, pymatgen structure predictor and Wikipedia + oxidation states should be chosen for charge-balancing. Options are 'smact14', 'icsd', + 'pymatgen' and 'wiki' for the 2014 SMACT default, 2016 ICSD, pymatgen structure predictor and Wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) oxidation states respectively. A filepath to an oxidation states text file can also be supplied. @@ -476,8 +484,8 @@ def smact_validity( smact_elems = [e[1] for e in space.items()] electronegs = [e.pauling_eneg for e in smact_elems] - if oxidation_states_set == "default" or oxidation_states_set is None: - ox_combos = [e.oxidation_states for e in smact_elems] + if oxidation_states_set == "smact14" or oxidation_states_set is None: + ox_combos = [e.oxidation_states_smact14 for e in smact_elems] elif oxidation_states_set == "icsd": ox_combos = [e.oxidation_states_icsd for e in smact_elems] elif oxidation_states_set == "pymatgen": @@ -494,7 +502,7 @@ def smact_validity( else: raise ( Exception( - f'{oxidation_states_set} is not valid. Enter either "default", "icsd", "pymatgen","wiki" or a filepath to a textfile of oxidation states.' + f'{oxidation_states_set} is not valid. Enter either "smact14", "icsd", "pymatgen","wiki" or a filepath to a textfile of oxidation states.' ) ) diff --git a/smact/tests/test_core.py b/smact/tests/test_core.py index 0d46f8ec..39104af0 100755 --- a/smact/tests/test_core.py +++ b/smact/tests/test_core.py @@ -15,7 +15,11 @@ import smact.screening from smact import Species from smact.builder import wurtzite -from smact.properties import band_gap_Harrison, compound_electroneg +from smact.properties import ( + band_gap_Harrison, + compound_electroneg, + valence_electron_count, +) files_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files") TEST_OX_STATES = os.path.join(files_dir, "test_oxidation_states.txt") @@ -80,6 +84,24 @@ def test_harrison_gap_MgCl(self): 3.545075110572662, ) + def test_valence_electron_count(self): + # Test valid compounds + self.assertAlmostEqual(valence_electron_count("Fe2O3"), 6.8, places=2) + self.assertAlmostEqual(valence_electron_count("CuZn"), 11.5, places=2) + + # Test single element + self.assertEqual(valence_electron_count("Fe"), 8) + + # Test empty string + self.assertEqual(valence_electron_count(""), 0.0) + + # Test invalid elements and formats + with self.assertRaises(ValueError): + valence_electron_count("Xx2O3") # Xx is not a real element + + with self.assertRaises(ValueError): + valence_electron_count("LrO") + # ---------------- BUILDER ---------------- def test_builder_ZnS(self): diff --git a/smact/tests/test_utils.py b/smact/tests/test_utils.py new file mode 100644 index 00000000..aad99f6c --- /dev/null +++ b/smact/tests/test_utils.py @@ -0,0 +1,76 @@ +import unittest + +from pymatgen.core import Composition + +from smact import Element +from smact.screening import smact_filter +from smact.utils.composition import comp_maker, formula_maker, parse_formula + + +class TestComposition(unittest.TestCase): + """Test composition utilities""" + + def setUp(self) -> None: + self.mock_filter_output = [ + (("Fe", "O"), (2, -2), (1, 1)), + (("Fe", "O"), (1, 1)), + (("Fe", "Fe", "O"), (2, 3, -2), (1, 2, 4)), + ] + self.smact_filter_output = smact_filter( + els=[Element("Li"), Element("Ge"), Element("P"), Element("S")], + stoichs=[[10], [1], [2], [12]], + ) + + def test_parse_formula(self): + """Test the parse_formula function""" + + formulas = ["Li10GeP2S12", "Mg0.5O0.5", "CaMg(CO3)2"] + + LGPS = parse_formula(formulas[0]) + self.assertIsInstance(LGPS, dict) + for el_sym, ammt in LGPS.items(): + self.assertIsInstance(el_sym, str) + self.assertIsInstance(ammt, float) + self.assertEqual(LGPS["Li"], 10) + self.assertEqual(LGPS["Ge"], 1) + self.assertEqual(LGPS["P"], 2) + self.assertEqual(LGPS["S"], 12) + + MgO = parse_formula(formulas[1]) + self.assertIsInstance(MgO, dict) + self.assertEqual(MgO["Mg"], 0.5) + self.assertEqual(MgO["O"], 0.5) + + dolomite = parse_formula(formulas[2]) + self.assertIsInstance(dolomite, dict) + self.assertEqual(dolomite["Ca"], 1) + self.assertEqual(dolomite["Mg"], 1) + self.assertEqual(dolomite["C"], 2) + self.assertEqual(dolomite["O"], 6) + + def test_comp_maker(self): + """Test the comp_maker function""" + comp1 = comp_maker(self.mock_filter_output[0]) + comp2 = comp_maker(self.mock_filter_output[1]) + comp3 = comp_maker(self.mock_filter_output[2]) + comp4 = comp_maker(self.smact_filter_output[1]) + for comp in [comp1, comp2, comp3, comp4]: + self.assertIsInstance(comp, Composition) + self.assertEqual(Composition("FeO"), comp2) + self.assertEqual(Composition({"Fe2+": 1, "O2-": 1}), comp1) + self.assertEqual(Composition({"Fe2+": 1, "Fe3+": 2, "O2-": 4}), comp3) + self.assertEqual( + Composition({"Li+": 10, "Ge4+": 1, "P5+": 2, "S2-": 12}), comp4 + ) + + def test_formula_maker(self): + """Test the formula_maker function""" + form1 = formula_maker(self.mock_filter_output[0]) + form2 = formula_maker(self.mock_filter_output[1]) + form3 = formula_maker(self.mock_filter_output[2]) + form4 = formula_maker(self.smact_filter_output[1]) + self.assertEqual(form1, "FeO") + self.assertEqual(form2, "FeO") + self.assertEqual(form1, form2) + self.assertEqual(form3, "Fe3O4") + self.assertEqual(form4, "Li10Ge(PS6)2") diff --git a/smact/utils/composition.py b/smact/utils/composition.py new file mode 100644 index 00000000..29bc660a --- /dev/null +++ b/smact/utils/composition.py @@ -0,0 +1,97 @@ +"""Utility functioms for handling elements, species, formulas and composition""" +from __future__ import annotations + +import re +from collections import defaultdict + +from pymatgen.core import Composition + +from smact.structure_prediction.utilities import unparse_spec + + +# Adapted from ElementEmbeddings and Pymatgen +def parse_formula(formula: str) -> dict[str, float]: + """Parse a chemical formula into a dictionary of elements and their amounts. + + Args: + formula (str): Chemical formula + + Returns: + dict: Dictionary of element symbol: amount + """ + regex = r"\(([^\(\)]+)\)\s*([\.e\d]*)" + r = re.compile(regex) + m = re.search(r, formula) + if m: + factor = 1.0 + if m.group(2) != "": + factor = float(m.group(2)) + unit_sym_dict = _get_sym_dict(m.group(1), factor) + expanded_sym = "".join( + [f"{el}{amt}" for el, amt in unit_sym_dict.items()] + ) + expanded_formula = formula.replace(m.group(), expanded_sym) + return parse_formula(expanded_formula) + return _get_sym_dict(formula, 1) + + +def _get_sym_dict(formula: str, factor: float) -> dict[str, float]: + sym_dict: dict[str, float] = defaultdict(float) + regex = r"([A-Z][a-z]*)\s*([-*\.e\d]*)" + r = re.compile(regex) + for m in re.finditer(r, formula): + el = m.group(1) + amt = 1.0 + if m.group(2).strip() != "": + amt = float(m.group(2)) + sym_dict[el] += amt * factor + formula = formula.replace(m.group(), "", 1) + if formula.strip(): + msg = f"{formula} is an invalid formula" + raise ValueError(msg) + + return sym_dict + + +def comp_maker( + smact_filter_output: tuple[str, int, int] | tuple[str, int] +) -> Composition: + """Convert an item in the output of smact.screening.smact_filer into a Pymatgen Composition. + + Args: + smact_filter_output (tuple[str, int, int]|tuple[str, int]): An item in the list returned from smact_filter + + Returns: + composition (pymatgen.core.Composition): An instance of the Composition class + """ + if len(smact_filter_output) == 2: + form = [] + for el, ammt in zip(smact_filter_output[0], smact_filter_output[-1]): + form.append(el) + form.append(ammt) + form = "".join(str(e) for e in form) + else: + form = { + unparse_spec((el, ox)): ammt + for el, ox, ammt in zip( + smact_filter_output[0], + smact_filter_output[1], + smact_filter_output[2], + ) + } + return Composition(form) + + +def formula_maker( + smact_filter_output: tuple[str, int, int] | tuple[str, int] +) -> str: + """Convert an item in the output of smact.screening.smact_filter into a chemical formula. + + Args: + smact_filter_output (tuple[str, int, int]|tuple[str, int]): An item in the list returned from smact_filter + + Returns: + formula (str): A formula + + """ + return comp_maker(smact_filter_output).reduced_formula