From d2237a8fdaa1e5ba652cf69f82c3786901b4afa8 Mon Sep 17 00:00:00 2001
From: Alexander Bonkowski <57258530+ab5424@users.noreply.github.com>
Date: Tue, 28 May 2024 23:11:36 +0200
Subject: [PATCH] `pandas.read_csv`: replace deprecated `delim_whitespace=True`
 with `sep="\s+"` (#3846)

* Replace deprecated 'delim_whitespace=True` with `sep="\s+"`

* pre-commit auto-fixes

* sio->str_io

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Janosh Riebesell <janosh.riebesell@gmail.com>
---
 pymatgen/io/lammps/data.py    | 12 ++++++------
 pymatgen/io/lammps/outputs.py |  4 ++--
 pymatgen/io/xyz.py            |  4 ++--
 pymatgen/util/provenance.py   |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pymatgen/io/lammps/data.py b/pymatgen/io/lammps/data.py
index 3f7e3b88240..0119341be15 100644
--- a/pymatgen/io/lammps/data.py
+++ b/pymatgen/io/lammps/data.py
@@ -664,17 +664,17 @@ def from_file(cls, filename: str, atom_style: str = "full", sort_id: bool = Fals
         def parse_section(sec_lines) -> tuple[str, pd.DataFrame]:
             title_info = sec_lines[0].split("#", 1)
             kw = title_info[0].strip()
-            sio = StringIO("".join(sec_lines[2:]))  # skip the 2nd line
+            str_io = StringIO("".join(sec_lines[2:]))  # skip the 2nd line
             if kw.endswith("Coeffs") and not kw.startswith("PairIJ"):
                 df_list = [
-                    pd.read_csv(StringIO(line), header=None, comment="#", delim_whitespace=True)
+                    pd.read_csv(StringIO(line), header=None, comment="#", sep=r"\s+")
                     for line in sec_lines[2:]
                     if line.strip()
                 ]
                 df = pd.concat(df_list, ignore_index=True)
                 names = ["id"] + [f"coeff{i}" for i in range(1, df.shape[1])]
             else:
-                df = pd.read_csv(sio, header=None, comment="#", delim_whitespace=True)
+                df = pd.read_csv(str_io, header=None, comment="#", sep=r"\s+")
                 if kw == "PairIJ Coeffs":
                     names = ["id1", "id2"] + [f"coeff{i}" for i in range(1, df.shape[1] - 1)]
                     df.index.name = None
@@ -1381,12 +1381,12 @@ def parse_xyz(cls, filename: str | Path) -> pd.DataFrame:
         with zopen(filename, mode="rt") as file:
             lines = file.readlines()
 
-        sio = StringIO("".join(lines[2:]))  # skip the 2nd line
+        str_io = StringIO("".join(lines[2:]))  # skip the 2nd line
         df = pd.read_csv(
-            sio,
+            str_io,
             header=None,
             comment="#",
-            delim_whitespace=True,
+            sep=r"\s+",
             names=["atom", "x", "y", "z"],
         )
         df.index += 1
diff --git a/pymatgen/io/lammps/outputs.py b/pymatgen/io/lammps/outputs.py
index 30757f6614d..033a7d58b4a 100644
--- a/pymatgen/io/lammps/outputs.py
+++ b/pymatgen/io/lammps/outputs.py
@@ -69,7 +69,7 @@ def from_str(cls, string: str) -> Self:
             bounds -= np.array([[min(x), max(x)], [min(y), max(y)], [0, 0]])
         box = LammpsBox(bounds, tilt)
         data_head = lines[8].replace("ITEM: ATOMS", "").split()
-        data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, delim_whitespace=True)
+        data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, sep=r"\s+")
         return cls(time_step, n_atoms, box, data)
 
     @classmethod
@@ -180,7 +180,7 @@ def _parse_thermo(lines: list[str]) -> pd.DataFrame:
             df = df[columns]
         # one line thermo data
         else:
-            df = pd.read_csv(StringIO("".join(lines)), delim_whitespace=True)
+            df = pd.read_csv(StringIO("".join(lines)), sep=r"\s+")
         return df
 
     runs = []
diff --git a/pymatgen/io/xyz.py b/pymatgen/io/xyz.py
index aefaa24f00a..595be4d7b14 100644
--- a/pymatgen/io/xyz.py
+++ b/pymatgen/io/xyz.py
@@ -118,9 +118,9 @@ def as_dataframe(self):
             pandas.DataFrame
         """
         lines = str(self)
-        sio = StringIO(lines)
+        str_io = StringIO(lines)
         df_xyz = pd.read_csv(
-            sio, header=None, skiprows=(0, 1), comment="#", delim_whitespace=True, names=("atom", "x", "y", "z")
+            str_io, header=None, skiprows=(0, 1), comment="#", sep=r"\s+", names=("atom", "x", "y", "z")
         )
         df_xyz.index += 1
         return df_xyz
diff --git a/pymatgen/util/provenance.py b/pymatgen/util/provenance.py
index f1763eeaa0b..4402653b9b5 100644
--- a/pymatgen/util/provenance.py
+++ b/pymatgen/util/provenance.py
@@ -46,10 +46,10 @@ def is_valid_bibtex(reference: str) -> bool:
     """
     # str is necessary since pybtex seems to have an issue with unicode. The
     # filter expression removes all non-ASCII characters.
-    sio = StringIO(reference.encode("ascii", "ignore").decode("ascii"))
+    str_io = StringIO(reference.encode("ascii", "ignore").decode("ascii"))
     parser = bibtex.Parser()
     errors.set_strict_mode(enable=False)
-    bib_data = parser.parse_stream(sio)
+    bib_data = parser.parse_stream(str_io)
     return len(bib_data.entries) > 0