Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Convert between Excel file and Table #233

Merged
merged 21 commits into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
cdfd832
created methods for table from_excel_file() and to_excel_file()
PhilipGutberlet Apr 21, 2023
ac71a9f
replaced os.remove() with Path.unlink()
PhilipGutberlet Apr 21, 2023
b896252
style: apply automated linter fixes
megalinter-bot Apr 21, 2023
6b72525
style: apply automated linter fixes
megalinter-bot Apr 21, 2023
c17106b
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 138-cre…
patrikguempel Apr 28, 2023
2ec39a2
- to_excel_file() and from_excel_file() now also accept a path object
patrikguempel Apr 28, 2023
95e8d29
resolved further comments
patrikguempel May 5, 2023
ef63bb7
Merge branch 'main' into 138-create-a-table-from-an-excel-file
PhilipGutberlet May 5, 2023
e38e447
fixed type annotation for test_to_excel_file.py and test_from_excel_f…
PhilipGutberlet May 5, 2023
7305f53
style: apply automated linter fixes
megalinter-bot May 5, 2023
15f9a50
removed unused imports
PhilipGutberlet May 5, 2023
1d51383
Merge remote-tracking branch 'origin/138-create-a-table-from-an-excel…
PhilipGutberlet May 5, 2023
8c61330
fixed keywords
patrikguempel May 5, 2023
30ad2be
Merge remote-tracking branch 'origin/138-create-a-table-from-an-excel…
patrikguempel May 5, 2023
9d93c13
build: recreate lock file
lars-reimann May 5, 2023
c7e801f
Merge branch 'main' into 138-create-a-table-from-an-excel-file
lars-reimann May 5, 2023
06f9e87
test: compare entire tables
lars-reimann May 5, 2023
466c854
docs: add comma
lars-reimann May 5, 2023
e3ef5d1
test: same structure as for other to_XY_file methods
lars-reimann May 5, 2023
4df05d2
style: apply automated linter fixes
megalinter-bot May 5, 2023
e14f9b9
test: rename test
lars-reimann May 5, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pandas = "^2.0.0"
pillow = "^9.5.0"
scikit-learn = "^1.2.0"
seaborn = "^0.12.2"
openpyxl = "^3.1.2"

[tool.poetry.group.dev.dependencies]
pytest = "^7.2.1"
Expand Down
49 changes: 49 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import matplotlib.pyplot as plt
import numpy as np
import openpyxl
import pandas as pd
import seaborn as sns
from pandas import DataFrame
Expand Down Expand Up @@ -84,6 +85,33 @@ def from_csv_file(path: str | Path) -> Table:
except FileNotFoundError as exception:
raise FileNotFoundError(f'File "{path}" does not exist') from exception

@staticmethod
def from_excel_file(path: str | Path) -> Table:
"""
Read data from an Excel file into a table.

Parameters
----------
path : str | Path
The path to the Excel file.

Returns
-------
table : Table
The table created from the Excel file.

Raises
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file could not be read.
"""
try:
return Table(pd.read_excel(path, engine="openpyxl", usecols=lambda colname: "Unnamed" not in colname))
except FileNotFoundError as exception:
raise FileNotFoundError(f'File "{path}" does not exist') from exception

@staticmethod
def from_json_file(path: str | Path) -> Table:
"""
Expand Down Expand Up @@ -1242,6 +1270,27 @@ def to_csv_file(self, path: str | Path) -> None:
data_to_csv.columns = self._schema.column_names
data_to_csv.to_csv(path, index=False)

def to_excel_file(self, path: str | Path) -> None:
"""
Write the data from the table into an Excel file.

If the file and/or the directories do not exist, they will be created. If the file already exists, it will be
overwritten.

Parameters
----------
path : str | Path
The path to the output file.
"""
# Create Excel metadata in the file
tmp_table_file = openpyxl.Workbook()
tmp_table_file.save(path)

Path(path).parent.mkdir(parents=True, exist_ok=True)
data_to_excel = self._data.copy()
data_to_excel.columns = self._schema.column_names
data_to_excel.to_excel(path)

def to_json_file(self, path: str | Path) -> None:
"""
Write the data from the table into a JSON file.
Expand Down
Binary file added tests/resources/dummy_excel_file.xlsx
Binary file not shown.
PhilipGutberlet marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from pathlib import Path

import pytest
from safeds.data.tabular.containers import Table

from tests.helpers import resolve_resource_path


@pytest.mark.parametrize(
("path", "expected"),
[
(
resolve_resource_path("./dummy_excel_file.xlsx"),
Table.from_dict(
{
"A": [1],
"B": [2],
},
),
),
(
Path(resolve_resource_path("./dummy_excel_file.xlsx")),
Table.from_dict(
{
"A": [1],
"B": [2],
},
),
),
],
ids=["string path", "object path"],
)
def test_should_create_table_from_excel_file(path: str | Path, expected: Table) -> None:
table = Table.from_excel_file(path)
assert table == expected


def test_should_raise_if_file_not_found() -> None:
with pytest.raises(FileNotFoundError):
Table.from_excel_file(resolve_resource_path("test_table_from_excel_file_invalid.xls"))
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pathlib import Path
from tempfile import NamedTemporaryFile

from safeds.data.tabular.containers import Table


def test_should_create_csv_file_from_table_by_str() -> None:
table = Table.from_dict({"col1": ["col1_1"], "col2": ["col2_1"]})
with NamedTemporaryFile(suffix=".xlsx") as tmp_table_file:
tmp_table_file.close()
with Path(tmp_table_file.name).open("w", encoding="utf-8") as tmp_file:
table.to_excel_file(tmp_file.name)
with Path(tmp_table_file.name).open("r", encoding="utf-8") as tmp_file:
table_r = Table.from_excel_file(tmp_file.name)
assert table == table_r


def test_should_create_csv_file_from_table_by_path() -> None:
table = Table.from_dict({"col1": ["col1_1"], "col2": ["col2_1"]})
with NamedTemporaryFile(suffix=".xlsx") as tmp_table_file:
tmp_table_file.close()
with Path(tmp_table_file.name).open("w", encoding="utf-8") as tmp_file:
table.to_excel_file(Path(tmp_file.name))
with Path(tmp_table_file.name).open("r", encoding="utf-8") as tmp_file:
table_r = Table.from_excel_file(Path(tmp_file.name))
assert table == table_r