From d2237a8fdaa1e5ba652cf69f82c3786901b4afa8 Mon Sep 17 00:00:00 2001 From: Alexander Bonkowski <57258530+ab5424@users.noreply.github.com> Date: Tue, 28 May 2024 23:11:36 +0200 Subject: [PATCH] `pandas.read_csv`: replace deprecated `delim_whitespace=True` with `sep="\s+"` (#3846) * Replace deprecated 'delim_whitespace=True` with `sep="\s+"` * pre-commit auto-fixes * sio->str_io --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Janosh Riebesell --- pymatgen/io/lammps/data.py | 12 ++++++------ pymatgen/io/lammps/outputs.py | 4 ++-- pymatgen/io/xyz.py | 4 ++-- pymatgen/util/provenance.py | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pymatgen/io/lammps/data.py b/pymatgen/io/lammps/data.py index 3f7e3b88240..0119341be15 100644 --- a/pymatgen/io/lammps/data.py +++ b/pymatgen/io/lammps/data.py @@ -664,17 +664,17 @@ def from_file(cls, filename: str, atom_style: str = "full", sort_id: bool = Fals def parse_section(sec_lines) -> tuple[str, pd.DataFrame]: title_info = sec_lines[0].split("#", 1) kw = title_info[0].strip() - sio = StringIO("".join(sec_lines[2:])) # skip the 2nd line + str_io = StringIO("".join(sec_lines[2:])) # skip the 2nd line if kw.endswith("Coeffs") and not kw.startswith("PairIJ"): df_list = [ - pd.read_csv(StringIO(line), header=None, comment="#", delim_whitespace=True) + pd.read_csv(StringIO(line), header=None, comment="#", sep=r"\s+") for line in sec_lines[2:] if line.strip() ] df = pd.concat(df_list, ignore_index=True) names = ["id"] + [f"coeff{i}" for i in range(1, df.shape[1])] else: - df = pd.read_csv(sio, header=None, comment="#", delim_whitespace=True) + df = pd.read_csv(str_io, header=None, comment="#", sep=r"\s+") if kw == "PairIJ Coeffs": names = ["id1", "id2"] + [f"coeff{i}" for i in range(1, df.shape[1] - 1)] df.index.name = None @@ -1381,12 +1381,12 @@ def parse_xyz(cls, filename: str | Path) -> pd.DataFrame: with zopen(filename, mode="rt") as file: lines = file.readlines() - sio = StringIO("".join(lines[2:])) # skip the 2nd line + str_io = StringIO("".join(lines[2:])) # skip the 2nd line df = pd.read_csv( - sio, + str_io, header=None, comment="#", - delim_whitespace=True, + sep=r"\s+", names=["atom", "x", "y", "z"], ) df.index += 1 diff --git a/pymatgen/io/lammps/outputs.py b/pymatgen/io/lammps/outputs.py index 30757f6614d..033a7d58b4a 100644 --- a/pymatgen/io/lammps/outputs.py +++ b/pymatgen/io/lammps/outputs.py @@ -69,7 +69,7 @@ def from_str(cls, string: str) -> Self: bounds -= np.array([[min(x), max(x)], [min(y), max(y)], [0, 0]]) box = LammpsBox(bounds, tilt) data_head = lines[8].replace("ITEM: ATOMS", "").split() - data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, delim_whitespace=True) + data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, sep=r"\s+") return cls(time_step, n_atoms, box, data) @classmethod @@ -180,7 +180,7 @@ def _parse_thermo(lines: list[str]) -> pd.DataFrame: df = df[columns] # one line thermo data else: - df = pd.read_csv(StringIO("".join(lines)), delim_whitespace=True) + df = pd.read_csv(StringIO("".join(lines)), sep=r"\s+") return df runs = [] diff --git a/pymatgen/io/xyz.py b/pymatgen/io/xyz.py index aefaa24f00a..595be4d7b14 100644 --- a/pymatgen/io/xyz.py +++ b/pymatgen/io/xyz.py @@ -118,9 +118,9 @@ def as_dataframe(self): pandas.DataFrame """ lines = str(self) - sio = StringIO(lines) + str_io = StringIO(lines) df_xyz = pd.read_csv( - sio, header=None, skiprows=(0, 1), comment="#", delim_whitespace=True, names=("atom", "x", "y", "z") + str_io, header=None, skiprows=(0, 1), comment="#", sep=r"\s+", names=("atom", "x", "y", "z") ) df_xyz.index += 1 return df_xyz diff --git a/pymatgen/util/provenance.py b/pymatgen/util/provenance.py index f1763eeaa0b..4402653b9b5 100644 --- a/pymatgen/util/provenance.py +++ b/pymatgen/util/provenance.py @@ -46,10 +46,10 @@ def is_valid_bibtex(reference: str) -> bool: """ # str is necessary since pybtex seems to have an issue with unicode. The # filter expression removes all non-ASCII characters. - sio = StringIO(reference.encode("ascii", "ignore").decode("ascii")) + str_io = StringIO(reference.encode("ascii", "ignore").decode("ascii")) parser = bibtex.Parser() errors.set_strict_mode(enable=False) - bib_data = parser.parse_stream(sio) + bib_data = parser.parse_stream(str_io) return len(bib_data.entries) > 0