Skip to content

Commit

Permalink
Merge pull request pandas-dev#3 from dimastbk/issue-50395
Browse files Browse the repository at this point in the history
some improves
  • Loading branch information
kostyafarber authored Jan 26, 2023
2 parents bebfec5 + 08a5616 commit 677a224
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 53 deletions.
65 changes: 47 additions & 18 deletions pandas/io/excel/_calaminereader.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
from __future__ import annotations

from io import (
BufferedReader,
BytesIO,
from datetime import (
date,
datetime,
time,
)
from pathlib import PurePath
from tempfile import NamedTemporaryFile
from typing import Union

from pandas._typing import (
FilePath,
ReadBuffer,
Scalar,
StorageOptions,
)
from pandas.compat._optional import import_optional_dependency

import pandas as pd

from pandas.io.common import stringify_path
from pandas.io.excel._base import (
BaseExcelReader,
inspect_excel_format,
)

ValueT = Union[int, float, str, bool, time, date, datetime]


class __calamine__:
pass
Expand All @@ -28,7 +36,9 @@ class CalamineExcelReader(BaseExcelReader):
_sheet_names: list[str] | None = None

def __init__(
self, filepath_or_buffer, storage_options: StorageOptions = None
self,
filepath_or_buffer: FilePath | ReadBuffer[bytes],
storage_options: StorageOptions = None,
) -> None:
import_optional_dependency("python_calamine")
super().__init__(filepath_or_buffer, storage_options=storage_options)
Expand All @@ -37,20 +47,15 @@ def __init__(
def _workbook_class(self) -> type[__calamine__]:
return __calamine__

def load_workbook(
self, filepath_or_buffer: str | PurePath | BufferedReader | BytesIO
) -> str:
if isinstance(filepath_or_buffer, BufferedReader):
filepath_or_buffer = filepath_or_buffer.name

elif isinstance(filepath_or_buffer, BytesIO):
def load_workbook(self, filepath_or_buffer) -> str:
if hasattr(filepath_or_buffer, "read") and hasattr(filepath_or_buffer, "seek"):
ext = inspect_excel_format(filepath_or_buffer)
with NamedTemporaryFile(suffix=f".{ext}", delete=False) as tmp_file:
tmp_file.write(filepath_or_buffer.getvalue())
filepath_or_buffer.seek(0)
tmp_file.write(filepath_or_buffer.read())
filepath_or_buffer = tmp_file.name

elif isinstance(filepath_or_buffer, PurePath):
filepath_or_buffer = filepath_or_buffer.as_posix()
else:
filepath_or_buffer = stringify_path(filepath_or_buffer)

assert isinstance(filepath_or_buffer, str)

Expand All @@ -75,7 +80,31 @@ def get_sheet_by_index(self, index: int) -> int:
self.raise_if_bad_sheet_by_index(index)
return index

def get_sheet_data(self, sheet: int, convert_float: bool) -> list[list[Scalar]]:
def get_sheet_data(
self, sheet: int, file_rows_needed: int | None = None
) -> list[list[Scalar]]:
def _convert_cell(value: ValueT) -> Scalar:
if isinstance(value, float):
val = int(value)
if val == value:
return val
else:
return value
elif isinstance(value, date):
return pd.Timestamp(value)
elif isinstance(value, time):
return value.isoformat()

return value

from python_calamine import get_sheet_data

return get_sheet_data(self.book, sheet)
rows = get_sheet_data(self.book, sheet)
data: list[list[Scalar]] = []

for row in rows:
data.append([_convert_cell(cell) for cell in row])
if file_rows_needed is not None and len(data) >= file_rows_needed:
break

return data
Loading

0 comments on commit 677a224

Please sign in to comment.