Monkey-Market · J1Loop · Nov 1, 2024 · Nov 1, 2024 · Nov 1, 2024 · Nov 1, 2024
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,9 +16,12 @@ weasyprint = "62.3"
 paramiko = "3.4.1,<3.5.0"
 boto3 = "^1.35.34"
 pandas = "^2.2.3"
+polars = "^1.12.0"
 psycopg = "^3.2.3"
 psycopg-pool = "^3.2.3"
 openpyxl = "^3.1.5"
+XlsxWriter = "^3.2.0"
+fastexcel = "^0.12.0"
 
 [tool.poetry.group.dev.dependencies]
 pylint = "^3.3.1"

diff --git a/quipus/data_sources/csv_data_source.py b/quipus/data_sources/csv_data_source.py
@@ -1,60 +1,71 @@
-from typing import Optional, List
+from pathlib import Path
+from typing import Union, Optional, List
 
-import pandas as pd
+import polars as pl
 
 
 class CSVDataSource:
     """
     CSV DataSource class to manage data retrieval from CSV files.
 
     Attributes:
-        file_path (str): Path to the CSV file.
+        file_path (Union[Path, str]): Path to the CSV file.
         delimiter (str): Delimiter used in the CSV file.
         encoding (str): Encoding of the CSV file.
-        dataframe (Optional[pd.DataFrame]): Loaded data as a pandas DataFrame.
+        dataframe (Optional[pl.DataFrame]): Loaded data as a polars DataFrame.
     """
 
-    def __init__(self, file_path: str, delimiter: str = ",", encoding: str = "utf-8"):
+    def __init__(
+            self,
+            file_path: Union[Path, str],
+            delimiter: str = ",",
+            encoding: str = "utf8"
+    ):
         self.file_path = file_path
         self.delimiter = delimiter
         self.encoding = encoding
-        self.dataframe: Optional[pd.DataFrame] = None
+        self.dataframe: Optional[pl.DataFrame] = None
         self.__load_data()
 
     def __load_data(self) -> None:
         """
-        Load data from the CSV file into a pandas DataFrame.
+        Load data from the CSV file into a polars DataFrame.
         """
-        self.dataframe = pd.read_csv(
-            self.file_path, delimiter=self.delimiter, encoding=self.encoding
+        self.dataframe = pl.read_csv(
+            source=self.file_path,
+            separator=self.delimiter,
+            encoding=self.encoding
         )
 
     @property
-    def file_path(self) -> str:
+    def file_path(self) -> Union[Path, str]:
         """
         Get the path to the CSV file.
 
         Returns:
-            str: Path to the CSV file.
+            Union[Path, str]: Path to the CSV file.
         """
         return self.__file_path
 
     @file_path.setter
-    def file_path(self, file_path: str) -> None:
+    def file_path(self, file_path: Union[Path, str]) -> None:
         """
         Set the path to the CSV file.
 
         Args:
-            file_path (str): Path to the CSV file.
+            file_path (Union[Path, str]): Path to the CSV file.
 
         Raises:
             TypeError: If 'file_path' is not a string.
             ValueError: If 'file_path' is an empty string.
         """
-        if not isinstance(file_path, str):
-            raise TypeError("'file_path' must be a string.")
-        if not file_path.strip():
-            raise ValueError("'file_path' cannot be an empty string.")
+        if not isinstance(file_path, (Path, str)):
+            raise TypeError("'file_path' must be either a string or 'Path' object.")
+
+        # Ensure that path exists
+        file_path = Path(file_path) if isinstance(file_path, str) else file_path
+        if not file_path.exists() or file_path.is_dir():
+            raise FileNotFoundError(f"'{file_path}' does not exist.")
         self.__file_path = file_path
 
     @property
@@ -98,12 +109,12 @@ def encoding(self, encoding: str) -> None:
             raise TypeError("'encoding' must be a string.")
         self.__encoding = encoding
 
-    def fetch_data(self) -> pd.DataFrame:
+    def fetch_data(self) -> pl.DataFrame:
         """
-        Fetch all data from the CSV file as a pandas DataFrame.
+        Fetch all data from the CSV file as a polars DataFrame.
 
         Returns:
-            pd.DataFrame: Data loaded from the CSV file.
+            pl.DataFrame: Data loaded from the CSV file.
         """
         if self.dataframe is None:
             raise RuntimeError("No data loaded from the CSV file.")
@@ -120,15 +131,15 @@ def get_columns(self) -> List[str]:
             raise RuntimeError("No data loaded from the CSV file.")
         return list(self.dataframe.columns)
 
-    def filter_data(self, query: str) -> pd.DataFrame:
+    def filter_data(self, query: str) -> pl.DataFrame:
         """
-        Filter the CSV data using a pandas query string.
+        Filter the CSV data using a polars query string.
 
         Args:
             query (str): Query string to filter the data.
 
         Returns:
-            pd.DataFrame: Filtered data based on the query.
+            pl.DataFrame: Filtered data based on the query.
 
         Raises:
             RuntimeError: If no data is loaded.
@@ -138,7 +149,7 @@ def filter_data(self, query: str) -> pd.DataFrame:
             raise RuntimeError("No data loaded from the CSV file.")
 
         try:
-            return self.dataframe.query(query)
+            return self.dataframe.sql(query=query)
         except Exception as e:
             raise ValueError(f"Invalid query: {query}") from e
 

diff --git a/quipus/data_sources/dataframe_data_source.py b/quipus/data_sources/dataframe_data_source.py
@@ -1,50 +1,50 @@
 from typing import List
 
-import pandas as pd
+import polars as pl
 
 
 class DataFrameDataSource:
     """
-    Pandas DataFrame DataSource to manage data retrieval from DataFrames.
+    polars DataFrame DataSource to manage data retrieval from DataFrames.
 
     Attributes:
-        dataframe (pd.DataFrame): DataFrame containing the data.
+        dataframe (pl.DataFrame): DataFrame containing the data.
     """
 
-    def __init__(self, dataframe: pd.DataFrame):
+    def __init__(self, dataframe: pl.DataFrame):
         self.dataframe = dataframe
 
     @property
-    def dataframe(self) -> pd.DataFrame:
+    def dataframe(self) -> pl.DataFrame:
         """
         Get the DataFrame containing the data.
 
         Returns:
-            pd.DataFrame: DataFrame containing the data.
+            pl.DataFrame: DataFrame containing the data.
         """
         return self.__dataframe
 
     @dataframe.setter
-    def dataframe(self, dataframe: pd.DataFrame) -> None:
+    def dataframe(self, dataframe: pl.DataFrame) -> None:
         """
         Set the DataFrame containing the data.
 
         Args:
-            dataframe (pd.DataFrame): DataFrame containing the data.
+            dataframe (pl.DataFrame): DataFrame containing the data.
 
         Raises:
-            TypeError: If 'dataframe' is not a pandas DataFrame.
+            TypeError: If 'dataframe' is not a polars DataFrame.
         """
-        if not isinstance(dataframe, pd.DataFrame):
-            raise TypeError("'dataframe' must be a pandas DataFrame.")
+        if not isinstance(dataframe, pl.DataFrame):
+            raise TypeError("'dataframe' must be a polars DataFrame.")
         self.__dataframe = dataframe
 
-    def fetch_data(self) -> pd.DataFrame:
+    def fetch_data(self) -> pl.DataFrame:
         """
         Fetch data from the DataFrame.
 
         Returns:
-            pd.DataFrame: DataFrame containing the data.
+            pl.DataFrame: DataFrame containing the data.
         """
         if self.dataframe is None:
             raise RuntimeError("No data loaded in the DataFrame.")
@@ -61,15 +61,15 @@ def get_columns(self) -> List[str]:
             raise RuntimeError("No data loaded in the DataFrame.")
         return list(self.dataframe.columns)
 
-    def filter_data(self, query: str) -> pd.DataFrame:
+    def filter_data(self, query: str) -> pl.DataFrame:
         """
         Filter the data in the DataFrame using a query.
 
         Args:
             query (str): Query to filter the data.
 
         Returns:
-            pd.DataFrame: Filtered DataFrame.
+            pl.DataFrame: Filtered DataFrame.
 
         Raises:
             RuntimeError: If no data is loaded in the DataFrame.
@@ -86,7 +86,7 @@ def filter_data(self, query: str) -> pd.DataFrame:
         if query.strip() == "":
             raise ValueError("Query cannot be an empty string.")
 
-        return self.dataframe.query(query)
+        return self.dataframe.sql(query)
 
     def __str__(self) -> str:
         """

diff --git a/quipus/data_sources/xlsx_data_source.py b/quipus/data_sources/xlsx_data_source.py
@@ -1,42 +1,43 @@
-from typing import Optional, List
+from pathlib import Path
+from typing import Union, Optional, List
 
-import pandas as pd
+import polars as pl
 
 
 class XLSXDataSource:
     """
     XLSX DataSource class to manage data retrieval from Excel (.xlsx) files.
 
     Attributes:
-        file_path (str): Path to the Excel file.
+        file_path (Union[Path, str]): Path to the Excel file.
         sheet_name (str): Name of the sheet to load from the Excel file.
-        dataframe (Optional[pd.DataFrame]): Loaded data as a pandas DataFrame.
+        dataframe (Optional[pl.DataFrame]): Loaded data as a polars DataFrame.
     """
 
-    def __init__(self, file_path: str, sheet_name: str):
+    def __init__(self, file_path: Union[Path, str], sheet_name: str):
         self.file_path = file_path
         self.sheet_name = sheet_name
-        self.dataframe: Optional[pd.DataFrame] = None
+        self.dataframe: Optional[pl.DataFrame] = None
         self.__load_data()
 
     def __load_data(self) -> None:
         """
-        Load data from the Excel file into a pandas DataFrame.
+        Load data from the Excel file into a polars DataFrame.
         """
-        self.dataframe = pd.read_excel(self.file_path, sheet_name=self.sheet_name)
+        self.dataframe = pl.read_excel(self.file_path, sheet_name=self.sheet_name)
 
     @property
-    def file_path(self) -> str:
+    def file_path(self) -> Union[Path, str]:
         """
         Get the path to the Excel file.
 
         Returns:
-            str: Path to the Excel file.
+            Union[Path, str]: Path to the Excel file.
         """
         return self.__file_path
 
     @file_path.setter
-    def file_path(self, file_path: str) -> None:
+    def file_path(self, file_path: Union[Path, str]) -> None:
         """
         Set the path to the Excel file.
 
@@ -47,11 +48,14 @@ def file_path(self, file_path: str) -> None:
             TypeError: If 'file_path' is not a string.
             ValueError: If 'file_path' is an empty string.
         """
-        if not isinstance(file_path, str):
-            raise TypeError("'file_path' must be a string.")
-        if not file_path.strip():
-            raise ValueError("'file_path' cannot be an empty string.")
-        self.__file_path = file_path
+        if not isinstance(file_path, (Path, str)):
+            raise TypeError("'file_path' must be either a string or 'Path' object.")
+
+        # Ensure if path exists
+        path = Path(file_path) if isinstance(file_path, str) else file_path
+        if not path.exists() or path.is_dir():
+            raise FileNotFoundError(f"'{file_path}' does not exist.")
+        self.__file_path = path
 
     @property
     def sheet_name(self) -> str:
@@ -77,12 +81,12 @@ def sheet_name(self, sheet_name: str) -> None:
             raise TypeError("'sheet_name' must be a string.")
         self.__sheet_name = sheet_name
 
-    def fetch_data(self) -> pd.DataFrame:
+    def fetch_data(self) -> pl.DataFrame:
         """
-        Fetch all data from the Excel sheet as a pandas DataFrame.
+        Fetch all data from the Excel sheet as a polars DataFrame.
 
         Returns:
-            pd.DataFrame: Data loaded from the Excel sheet.
+            pl.DataFrame: Data loaded from the Excel sheet.
         """
         if self.dataframe is None:
             raise RuntimeError("No data loaded from the Excel file.")
@@ -99,15 +103,15 @@ def get_columns(self) -> List[str]:
             raise RuntimeError("No data loaded from the Excel file.")
         return list(self.dataframe.columns)
 
-    def filter_data(self, query: str) -> pd.DataFrame:
+    def filter_data(self, query: str) -> pl.DataFrame:
         """
-        Filter the Excel data using a pandas query string.
+        Filter the Excel data using a polars query string.
 
         Args:
             query (str): Query string to filter the data.
 
         Returns:
-            pd.DataFrame: Filtered data based on the query.
+            pl.DataFrame: Filtered data based on the query.
 
         Raises:
             RuntimeError: If no data is loaded.
@@ -117,7 +121,7 @@ def filter_data(self, query: str) -> pd.DataFrame:
             raise RuntimeError("No data loaded from the Excel file.")
 
         try:
-            return self.dataframe.query(query)
+            return self.dataframe.sql(query)
         except Exception:
             raise ValueError("Invalid query provided.")