Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add archive method and PyBaMM simulation importer #154

Merged
merged 32 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
98d4704
fix dma returning array objects in its dataframe
tomjholland Oct 9, 2024
f7a0502
Strip back readme processor, output now a single dict
tomjholland Sep 27, 2024
4ddabed
Persistent step_descriptions dataframe on RawData objects to create p…
tomjholland Sep 27, 2024
aa49a53
Move cycle column calculation into filtering step to allow for nested…
tomjholland Sep 27, 2024
93857c5
Fix missing Cycle column in cycling analysis module
tomjholland Sep 27, 2024
380ce72
Fix errors in pulsing tests
tomjholland Sep 27, 2024
9072777
Remove Cycle from expected columns in imported dataframes
tomjholland Sep 27, 2024
2d29de9
Replace checking for Cycle column with verifying cycle_info list
tomjholland Sep 27, 2024
cb77d0e
Make utils module, move flatten function into utils
tomjholland Sep 28, 2024
e44b3ee
Add back implicit readme support
tomjholland Sep 28, 2024
fb77274
Restore functionality to process readme containing only total steps
tomjholland Sep 28, 2024
06f5a25
Test that nans are in place where no step descriptions are given
tomjholland Sep 28, 2024
2f1ac1e
Remove cycle column in cell module tests
tomjholland Sep 29, 2024
ba21c61
Fix dashboard errors looking for procedure.titles and "Cycle" column …
tomjholland Oct 2, 2024
385c1a3
Add cycle method back into Procedure class
tomjholland Oct 2, 2024
a65b6e6
Add basic pybamm solution reading
tomjholland Oct 2, 2024
8d73134
Add support for separate pybamm experiments within a procedure
tomjholland Oct 2, 2024
39cdc23
Fix error with empty initialised lazyframe
tomjholland Oct 3, 2024
d2f2f2b
Remove Cycle column mention for pybamm imports as Cycle column not al…
tomjholland Oct 5, 2024
8813b5f
Correct pybamm current column to pyprobe convention of negative for d…
tomjholland Oct 8, 2024
ddb5da2
Add version number property and test against pyproject
tomjholland Oct 4, 2024
ae5aede
Store step descriptions in dict rather than lazyframe
tomjholland Oct 5, 2024
4916bc0
Save and load a cell object to a folder
tomjholland Oct 5, 2024
e83ce14
Add warning if loading an archive saved with a different version
tomjholland Oct 5, 2024
07f3150
Allow saved archive to be packaged into a zip file
tomjholland Oct 5, 2024
fbb7ebf
Add feature to remove an experiment from a procedure
tomjholland Oct 5, 2024
c772201
Experiment removal now in-place
tomjholland Oct 5, 2024
df5add3
Fix version number check in loading archive, get __version__ directly…
tomjholland Oct 5, 2024
b6fc15e
Fix test failures caused by edge case passing incorrect type to analy…
tomjholland Oct 18, 2024
7afe620
Move versioning into cell module and hardcode
tomjholland Oct 21, 2024
98941fd
Remove cycle references in data importing and tests
tomjholland Oct 21, 2024
f0ff9a0
Remove cycle column manipulation in performance example
tomjholland Oct 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ repos:
rev: v1.5.1
hooks:
- id: mypy
additional_dependencies: [types-beautifulsoup4, types-decorator, types-PyYAML, pydantic]
additional_dependencies: [types-beautifulsoup4, types-decorator, types-PyYAML, pydantic, types-toml]
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.35.0
hooks:
Expand Down
10 changes: 7 additions & 3 deletions docs/source/examples/comparing-pyprobe-performance.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@
" csv_time[repeat]= timeit.default_timer() - start_time\n",
" start_time = timeit.default_timer()\n",
" df = pd.read_parquet(data_directory + '/' + file)\n",
" # Add a column to identify the cycle number\n",
" df['Cycle'] = (\n",
" (df['Step'].astype(int) - df['Step'].astype(int).shift() < 0)\n",
" .fillna(0)\n",
" .cumsum()\n",
" )\n",
" cumulative_time[0, repeat] = timeit.default_timer() - start_time\n",
"\n",
" experiment = df[df['Step'].isin([4, 5, 6, 7])]\n",
Expand Down Expand Up @@ -240,20 +246,18 @@
"repeated_data = pl.concat([data] * n_repeats)\n",
"\n",
"# Repeat the 'Cycle' and 'Event' columns to match the length of the repeated data\n",
"cycle_repeated = pl.concat([data['Cycle']] * n_repeats)\n",
"event_repeated = pl.concat([data['Event']] * n_repeats)\n",
"step_repeated = pl.concat([data['Step']] * n_repeats)\n",
"time_repeated = pl.concat([data['Time [s]']]* n_repeats)\n",
"\n",
"# Increment the 'Cycle' and 'Event' columns\n",
"cycle_increment = data['Cycle'].max() + 1\n",
"event_increment = data['Event'].max() + 1\n",
"step_increment = data['Step'].max() + 1\n",
"time_increment = data['Time [s]'].max()\n",
"\n",
"\n",
"repeated_data = repeated_data.with_columns([\n",
" (pl.arange(0, len(repeated_data)) // len(data) * cycle_increment + cycle_repeated).alias('Cycle'),\n",
" # (pl.arange(0, len(repeated_data)) // len(data) * cycle_increment + cycle_repeated).alias('Cycle'),\n",
" (pl.arange(0, len(repeated_data)) // len(data) * event_increment + event_repeated).alias('Event'),\n",
" (pl.arange(0, len(repeated_data)) // len(data) * event_increment + step_repeated).alias('Step'),\n",
" (pl.arange(0, len(repeated_data)) // len(data) * time_increment + time_repeated).alias('Time [s]'),\n",
Expand Down
2 changes: 1 addition & 1 deletion pyprobe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""The PyProBE package."""
from .cell import Cell, make_cell_list # noqa: F401
from .cell import Cell, __version__, load_archive, make_cell_list # noqa: F401
from .dashboard import launch_dashboard # noqa: F401
from .plot import Plot # noqa: F401
from .result import Result # noqa: F401
3 changes: 2 additions & 1 deletion pyprobe/analysis/cycling.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pydantic import BaseModel

from pyprobe.analysis.utils import AnalysisValidator
from pyprobe.filters import Experiment
from pyprobe.filters import Experiment, get_cycle_column
from pyprobe.result import Result


Expand Down Expand Up @@ -42,6 +42,7 @@ def summary(self, dchg_before_chg: bool = True) -> Result:
AnalysisValidator(
input_data=self.input_data, required_columns=["Capacity [Ah]", "Time [s]"]
)
self.input_data.base_dataframe = get_cycle_column(self.input_data)

self._create_capacity_throughput()
lf_capacity_throughput = self.input_data.base_dataframe.group_by(
Expand Down
8 changes: 4 additions & 4 deletions pyprobe/analysis/degradation_mode_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,10 @@ def quantify_degradation_modes(
self.dma_result = electrode_capacity_results[0].clean_copy(
pl.DataFrame(
{
"SOH": SOH,
"LAM_pe": LAM_pe,
"LAM_ne": LAM_ne,
"LLI": LLI,
"SOH": SOH[:, 0],
"LAM_pe": LAM_pe[:, 0],
"LAM_ne": LAM_ne[:, 0],
"LLI": LLI[:, 0],
}
)
)
Expand Down
239 changes: 235 additions & 4 deletions pyprobe/cell.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
"""Module for the Cell class."""
import json
import os
import shutil
import time
import warnings
import zipfile
from typing import Callable, Dict, List, Optional

import distinctipy
import polars as pl
import pybamm.solvers.solution
from pydantic import BaseModel, Field, field_validator, validate_call

from pyprobe.cyclers import arbin, basecycler, basytec, biologic, maccor, neware
from pyprobe.filters import Procedure
from pyprobe.readme_processor import process_readme

__version__ = "1.0.3"


class Cell(BaseModel):
"""A class for a cell in a battery experiment."""
Expand Down Expand Up @@ -193,12 +199,9 @@ def add_procedure(
readme = process_readme(readme_path)

self.procedure[procedure_name] = Procedure(
titles=readme.titles,
steps_idx=readme.step_numbers,
readme_dict=readme.experiment_dict,
base_dataframe=base_dataframe,
info=self.info,
pybamm_experiment=readme.pybamm_experiment,
pybamm_experiment_list=readme.pybamm_experiment_list,
)

@staticmethod
Expand Down Expand Up @@ -287,6 +290,234 @@ def _get_data_paths(
data_path = os.path.join(folder_path, filename_str)
return data_path

def import_pybamm_solution(
self,
procedure_name: str,
experiment_names: List[str] | str,
pybamm_solutions: List[pybamm.solvers.solution] | pybamm.solvers.solution,
output_data_path: Optional[str] = None,
optional_variables: Optional[List[str]] = None,
) -> None:
"""Import a PyBaMM solution object into a procedure of the cell.

Filtering a PyBaMM solution object by cycle and step reflects the behaviour of
the :code:`cycles` and :code:`steps` dictionaries of the PyBaMM solution object.

Multiple experiments can be imported into the same procedure. This is achieved
by providing multiple solution objects and experiment names.

This method optionally writes the data to a parquet file, if a data path is
provided.

Args:
procedure_name (str):
A name to give the procedure. This will be used when calling
:code:`cell.procedure[procedure_name]`.
pybamm_solutions (list or pybamm_solution):
A list of PyBaMM solution objects or a single PyBaMM solution object.
experiment_names (list or str):
A list of experiment names or a single experiment name to assign to the
PyBaMM solution object.
output_data_path (str, optional):
The path to write the parquet file. Defaults to None.
optional_variables (list, optional):
A list of variables to import from the PyBaMM solution object in
addition to the PyProBE required variables. Defaults to None.
"""
# the minimum required variables to import from the PyBaMM solution object
required_variables = [
"Time [s]",
"Current [A]",
"Terminal voltage [V]",
"Discharge capacity [A.h]",
]

# get the list of variables to import from the PyBaMM solution object
if optional_variables is not None:
import_variables = required_variables + optional_variables
else:
import_variables = required_variables

# check if the experiment names and PyBaMM solutions are lists
if isinstance(experiment_names, list) and isinstance(pybamm_solutions, list):
if len(experiment_names) != len(pybamm_solutions):
raise ValueError(
"The number of experiment names and PyBaMM solutions must be equal."
)
elif isinstance(experiment_names, list) != isinstance(pybamm_solutions, list):
if isinstance(experiment_names, list):
raise ValueError(
"A list of experiment names must be provided with a list of PyBaMM"
" solutions."
)
else:
raise ValueError(
"A single experiment name must be provided with a single PyBaMM"
" solution."
)
else:
experiment_names = [str(experiment_names)]
pybamm_solutions = [pybamm_solutions]

lazyframe_created = False
for experiment_name, pybamm_solution in zip(experiment_names, pybamm_solutions):
# get the data from the PyBaMM solution object
pybamm_data = pybamm_solution.get_data_dict(import_variables)
# convert the PyBaMM data to a polars dataframe and add the experiment name
# as a column
solution_data = pl.LazyFrame(pybamm_data).with_columns(
pl.lit(experiment_name).alias("Experiment")
)
if lazyframe_created is False:
all_solution_data = solution_data
lazyframe_created = True
else:
# join the new solution data with the existing solution data, a right
# join is used to keep all the data
all_solution_data = all_solution_data.join(
solution_data, on=import_variables + ["Step"], how="right"
)
# fill null values where the experiment has been extended with the newly
# joined experiment name
all_solution_data = all_solution_data.with_columns(
pl.col("Experiment").fill_null(pl.col("Experiment_right"))
)
# get the maximum step number for each experiment
max_steps = (
all_solution_data.group_by("Experiment")
.agg(pl.max("Step").alias("Max Step"))
.sort("Experiment")
.with_columns(pl.col("Max Step").cum_sum().shift())
)
# add the maximum step number from the previous experiment to the step number
all_solution_data = all_solution_data.join(
max_steps, on="Experiment", how="left"
).with_columns(
(pl.col("Step") + pl.col("Max Step").fill_null(-1) + 1).alias("Step")
)
# get the range of step values for each experiment
step_ranges = all_solution_data.group_by("Experiment").agg(
pl.arange(pl.col("Step").min(), pl.col("Step").max() + 1).alias(
"Step Range"
)
)

# create a dictionary of the experiment names and the step ranges
experiment_dict = {}
for row in step_ranges.collect().iter_rows():
experiment = row[0]
experiment_dict[experiment] = {"Steps": row[1]}
experiment_dict[experiment]["Step Descriptions"] = []

# reformat the data to the PyProBE format
base_dataframe = all_solution_data.select(
[
pl.col("Time [s]"),
pl.col("Current [A]") * -1,
pl.col("Terminal voltage [V]").alias("Voltage [V]"),
(pl.col("Discharge capacity [A.h]") * -1).alias("Capacity [Ah]"),
pl.col("Step"),
(
(
pl.col("Step").cast(pl.Int64)
- pl.col("Step").cast(pl.Int64).shift()
!= 0
)
.fill_null(strategy="zero")
.cum_sum()
.alias("Event")
),
]
)
# create the procedure object
self.procedure[procedure_name] = Procedure(
base_dataframe=base_dataframe, info=self.info, readme_dict=experiment_dict
)

# write the data to a parquet file if a path is provided
if output_data_path is not None:
if not output_data_path.endswith(".parquet"):
output_data_path += ".parquet"
base_dataframe.collect().write_parquet(output_data_path)

def archive(self, path: str) -> None:
"""Archive the cell object.

Args:
path (str): The path to the archive directory or zip file.
"""
if path.endswith(".zip"):
zip = True
path = path[:-4]
else:
zip = False
if not os.path.exists(path):
os.makedirs(path)
metadata = self.dict()
metadata["PyProBE Version"] = __version__
for procedure_name, procedure in self.procedure.items():
if isinstance(procedure.base_dataframe, pl.LazyFrame):
df = procedure.base_dataframe.collect()
else:
df = procedure.base_dataframe
# write the dataframe to a parquet file
filename = procedure_name + ".parquet"
filepath = os.path.join(path, filename)
df.write_parquet(filepath)
# update the metadata with the filename
metadata["procedure"][procedure_name]["base_dataframe"] = filename
with open(os.path.join(path, "metadata.json"), "w") as f:
json.dump(metadata, f)

if zip:
with zipfile.ZipFile(path + ".zip", "w") as zipf:
for root, _, files in os.walk(path):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, path)
zipf.write(file_path, arcname)
# Delete the original directory
shutil.rmtree(path)


def load_archive(path: str) -> Cell:
"""Load a cell object from an archive.

Args:
path (str): The path to the archive directory.

Returns:
Cell: The cell object.
"""
if path.endswith(".zip"):
extract_path = path[:-4]
with zipfile.ZipFile(path, "r") as zipf:
with zipfile.ZipFile(path, "r") as zipf:
zipf.extractall(extract_path)
# Delete the original zip file
os.remove(path)
archive_path = extract_path
else:
archive_path = path

with open(os.path.join(archive_path, "metadata.json"), "r") as f:
metadata = json.load(f)
if metadata["PyProBE Version"] != __version__:
warnings.warn(
f"The PyProBE version used to archive the cell was "
f"{metadata['PyProBE Version']}, the current version is "
f"{__version__}. There may be compatibility"
f" issues."
)
metadata.pop("PyProBE Version")
for procedure in metadata["procedure"].values():
procedure["base_dataframe"] = os.path.join(
archive_path, procedure["base_dataframe"]
)
cell = Cell(**metadata)

return cell


def make_cell_list(
record_filepath: str,
Expand Down
20 changes: 0 additions & 20 deletions pyprobe/cyclers/basecycler.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,6 @@ def _assign_instructions(self) -> None:
"Capacity": self.capacity,
"Temperature": self.temperature,
"Step": self.step,
"Cycle": self.cycle,
"Event": self.event,
}
for quantity in self._column_map.keys():
Expand Down Expand Up @@ -353,7 +352,6 @@ def pyprobe_dataframe(self) -> pl.DataFrame:
required_columns = [
self.date if "Date" in self._column_map.keys() else None,
self.time,
self.cycle,
self.step,
self.event,
self.current,
Expand Down Expand Up @@ -480,24 +478,6 @@ def step(self) -> pl.Expr:
"""
return pl.col("Step")

@property
def cycle(self) -> pl.Expr:
"""Identify the cycle number.

Cycles are defined by repetition of steps. They are identified by a decrease
in the step number.

Returns:
pl.Expr: A polars expression for the cycle number.
"""
return (
(pl.col("Step").cast(pl.Int64) - pl.col("Step").cast(pl.Int64).shift() < 0)
.fill_null(strategy="zero")
.cum_sum()
.alias("Cycle")
.cast(pl.Int64)
)

@property
def event(self) -> pl.Expr:
"""Identify the event number.
Expand Down
Loading
Loading