Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Update values of oxidation states #202

Merged
merged 4 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/source/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -831,9 +831,9 @@ @misc{NIST-CH-WB
@misc{enwiki:1102394064,
author = {{Wikipedia contributors}},
title = {Oxidation state --- {Wikipedia}{,} The Free Encyclopedia},
year = {2022},
url = {https://en.wikipedia.org/w/index.php?title=Oxidation_state&oldid=1102394064},
note = {[Online; accessed 28-September-2022]}
year = {2024},
url = {https://en.wikipedia.org/w/index.php?title=Oxidation_state&oldid=1253639950},
note = {[Online; accessed 4-November-2024]}
}
@book{haynes2016crc,
title = {CRC Handbook of Chemistry and Physics},
Expand Down
Binary file modified mendeleev/elements.db
Binary file not shown.
112 changes: 112 additions & 0 deletions mendeleev/interfaces/wiki_oxidation_states.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import requests
import pandas as pd
import polars as pl
from bs4 import BeautifulSoup
from bs4.element import Tag

from mendeleev.models import OxidationState
from mendeleev.db import get_session


def fetch_oxidation_states_table() -> Tag:
Dismissed Show dismissed Hide dismissed
"Fetch the table with oxidation states from wikipedia"

url = "https://en.wikipedia.org/wiki/Oxidation_state"
response = requests.get(url)
if response.status_code == 200:
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")
table_selector = "table.wikitable.sortable"

if table := soup.select_one(table_selector):
print("Table found and extracted!")
return table
else:
print("Table not found with the specified CSS selector.")
else:
print(f"Failed to retrieve page. Status code: {response.status_code}")


def parse_oxidation_states_table(table: Tag) -> list[dict]:
"""
Parse oxidation states from a table into a DataFrame.

Args:
table (bs4.element.Tag): The BeautifulSoup Tag object for the table.

Returns:
pd.DataFrame: A DataFrame with Z values as the first column and oxidation states from -5 to 9 as other columns.
"""
parsed_rows = []

for row in table.find_all("tr")[4:]: # Skip the header rows
# Extract the atomic number (Z value)
z_value = row.find_all("td")[0].text.strip()
if not z_value.isdigit():
continue
z_value = int(z_value)

oxidation_states = {"Z": z_value}
for i in range(-5, 10):
oxidation_states[i] = None

# iterate over oxidation state cells, starting from the 4th cell to the 18th
for i, cell in enumerate(row.find_all("td")[3:18], start=-5):
if cell.text.strip():
if cell.find("b"):
oxidation_states[i] = True
else:
oxidation_states[i] = False
parsed_rows.append(oxidation_states)
return parsed_rows


def create_pandas(data: list[dict]) -> pd.DataFrame:
"Create a pandas dataframe"
ox = pd.DataFrame(data)
ox = (
ox.melt(id_vars=["Z"], value_vars=list(range(-5, 10)))
.dropna(axis="rows", subset=["value"])
.sort_values(by="Z")
.reset_index(drop=True)
)
ox.loc[:, "category"] = ox.value.map({True: "main", False: "extended"})
return ox.rename(
columns={"Z": "atomic_number", "variable": "oxidation_state"}
).drop(columns=["value"], axis="columns")


def create_polars(data: list[dict]) -> pl.DataFrame:
"Create a polars dataframe"
parsed_str = [{str(k): v for k, v in d.items()} for d in data]
ox = pl.from_dicts(parsed_str)
return (
ox.melt(id_vars=["Z"])
.drop_nulls(subset=["value"])
.rename(
{"Z": "atomic_number", "variable": "oxidation_state", "value": "category"}
)
.cast({"oxidation_state": pl.Int32})
.with_columns(pl.col("category").map_dict({True: "main", False: "extended"}))
)


if __name__ == "__main__":
table = fetch_oxidation_states_table()
parsed = parse_oxidation_states_table(table)
ox = create_pandas(parsed)
Dismissed Show dismissed Hide dismissed

update = False
if update:
session = get_session(read_only=False)
Dismissed Show dismissed Hide dismissed
objects = [
OxidationState(
atomic_number=row["atomic_number"],
oxidation_state=row["oxidation_state"],
category=row["category"],
)
for i, row in ox.iterrows()
]
session.add_all(objects)
session.commit()
session.close()
14 changes: 8 additions & 6 deletions mendeleev/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,13 +428,15 @@ def oxidation_states(self, category: str = "main") -> List[int]:
)

if category == "all":
return [o.oxidation_state for o in self._oxidation_states]
return sorted([o.oxidation_state for o in self._oxidation_states])
else:
return [
o.oxidation_state
for o in self._oxidation_states
if o.category == category
]
return sorted(
[
o.oxidation_state
for o in self._oxidation_states
if o.category == category
]
)

def zeff(
self, n: int = None, o: str = None, method: str = "slater", alle: bool = False
Expand Down
2 changes: 1 addition & 1 deletion tests/test_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
("ionizationenergies", 5847),
("groups", 18),
("series", 10),
("oxidationstates", 579),
("oxidationstates", 601),
("phasetransitions", 108),
]

Expand Down
Loading