Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for MariaDb as backend for EvaDB #1027

Merged
merged 9 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/_toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ parts:
- file: source/reference/databases/postgres
- file: source/reference/databases/sqlite
- file: source/reference/databases/mysql
- file: source/reference/databases/mariadb

- file: source/reference/ai/index
title: AI Engines
Expand Down
36 changes: 36 additions & 0 deletions docs/source/reference/databases/mariadb.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
MariaDB
==========

The connection to MariaDB is based on the `mariadb <https://mariadb-corporation.github.io/mariadb-connector-python/>`_ library.

Dependency
----------

* mariadb


Parameters
----------

Required:

* `user` is the username corresponding to the database
* `password` is the password for the above username for the database
* `database` is the database name
* `host` is the host name, IP address or the URL
* `port` is the port used to make the TCP/IP connection.


Create Connection
-----------------

.. code-block:: text

CREATE DATABASE mariadb_data WITH ENGINE = 'mariadb', PARAMETERS = {
"user" : "eva",
"password": "password",
"host": "127.0.0.1".
"port": "7567",
"database": "evadb"
xzdandy marked this conversation as resolved.
Show resolved Hide resolved
};

2 changes: 2 additions & 0 deletions evadb/third_party/databases/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ def get_database_handler(engine: str, **kwargs):
return mod.SQLiteHandler(engine, **kwargs)
elif engine == "mysql":
return mod.MysqlHandler(engine, **kwargs)
elif engine == "mariadb":
return mod.MariaDbHandler(engine, **kwargs)
else:
raise NotImplementedError(f"Engine {engine} is not supported")

Expand Down
15 changes: 15 additions & 0 deletions evadb/third_party/databases/mariadb/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""mariadb integrations"""
176 changes: 176 additions & 0 deletions evadb/third_party/databases/mariadb/mariadb_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import mariadb
xzdandy marked this conversation as resolved.
Show resolved Hide resolved
import pandas as pd

from evadb.third_party.databases.types import (
DBHandler,
DBHandlerResponse,
DBHandlerStatus,
)


class MariaDbHandler(DBHandler):

"""
Class for implementing the Maria DB handler as a backend store for
EvaDb.
"""

def __init__(self, name: str, **kwargs):
"""
Initialize the handler.
Args:
name (str): name of the DB handler instance
**kwargs: arbitrary keyword arguments for establishing the connection.
"""
super().__init__(name)
self.host = kwargs.get("host")
self.port = kwargs.get("port")
self.user = kwargs.get("user")
self.password = kwargs.get("password")
self.database = kwargs.get("database")

def connect(self):
"""
Establish connection to the database.
Returns:
DBHandlerStatus
"""
try:
self.connection = mariadb.connect(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
database=self.database,
)
# Auto commit is off by default.
self.connection.autocommit = True
return DBHandlerStatus(status=True)
except mariadb.Error as e:
return DBHandlerStatus(status=False, error=str(e))

def disconnect(self):
"""
Disconnect from the database.
"""
if self.connection:
self.connection.close()

def check_connection(self) -> DBHandlerStatus:
"""
Method for checking the status of database connection.
Returns:
DBHandlerStatus
"""
if self.connection:
return DBHandlerStatus(status=True)
else:
return DBHandlerStatus(status=False, error="Not connected to the database.")

def get_tables(self) -> DBHandlerResponse:
"""
Method to get the list of tables from database.
Returns:
DBHandlerStatus
"""
if not self.connection:
return DBHandlerResponse(data=None, error="Not connected to the database.")

try:
query = f"SELECT table_name as 'table_name' FROM information_schema.tables WHERE table_schema='{self.database}'"
tables_df = pd.read_sql_query(query, self.connection)
return DBHandlerResponse(data=tables_df)
except mariadb.Error as e:
return DBHandlerResponse(data=None, error=str(e))

def get_columns(self, table_name: str) -> DBHandlerResponse:
"""
Method to retrieve the columns of the specified table from the database.
Args:
table_name (str): name of the table whose columns are to be retrieved.
Returns:
DBHandlerStatus
"""
if not self.connection:
return DBHandlerResponse(data=None, error="Not connected to the database.")

try:
query = f"SELECT column_name as 'name', data_type as 'dtype' FROM information_schema.columns WHERE table_name='{table_name}'"
columns_df = pd.read_sql_query(query, self.connection)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the mapping function

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Just double-check, does your integration pass when you run locally?

columns_df["dtype"] = columns_df["dtype"].apply(
self._mariadb_to_python_types
)
return DBHandlerResponse(data=columns_df)
except mariadb.Error as e:
return DBHandlerResponse(data=None, error=str(e))

def _fetch_results_as_df(self, cursor):
"""
Fetch results from the cursor for the executed query and return the
query results as dataframe.
"""
try:
res = cursor.fetchall()
res_df = pd.DataFrame(res, columns=[desc[0] for desc in cursor.description])
return res_df
except mariadb.ProgrammingError as e:
if str(e) == "no results to fetch":
return pd.DataFrame({"status": ["success"]})
raise e

def execute_native_query(self, query_string: str) -> DBHandlerResponse:
"""
Executes the native query on the database.
Args:
query_string (str): query in native format
Returns:
DBHandlerResponse
"""
if not self.connection:
return DBHandlerResponse(data=None, error="Not connected to the database.")

try:
cursor = self.connection.cursor()
cursor.execute(query_string)
return DBHandlerResponse(data=self._fetch_results_as_df(cursor))
except mariadb.Error as e:
return DBHandlerResponse(data=None, error=str(e))

def _mariadb_to_python_types(self, mariadb_type: str):
mapping = {
"tinyint": int,
"smallint": int,
"mediumint": int,
"bigint": int,
"int": int,
"decimal": float,
"float": float,
"double": float,
"text": str,
"string literals": str,
"char": str,
"varchar": str,
"boolean": bool,
# Add more mappings as needed
}

if mariadb_type in mapping:
return mapping[mariadb_type]
else:
raise Exception(
f"Unsupported column {mariadb_type} encountered in the MariaDB. Please raise a feature request!"
)
1 change: 1 addition & 0 deletions evadb/third_party/databases/mariadb/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mariadb
16 changes: 16 additions & 0 deletions test/third_party_tests/test_native_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,22 @@ def test_should_run_query_in_postgres(self):
self._raise_error_on_multiple_creation()
self._raise_error_on_invalid_connection()

def test_should_run_query_in_mariadb(self):
# Create database.
params = {
"user": "eva",
"password": "password",
"database": "evadb",
xzdandy marked this conversation as resolved.
Show resolved Hide resolved
}
query = f"""CREATE DATABASE test_data_source
WITH ENGINE = "mariadb",
PARAMETERS = {params};"""
execute_query_fetch_all(self.evadb, query)

# Test executions.
self._execute_native_query()
self._execute_evadb_query()

def test_should_run_query_in_sqlite(self):
# Create database.
params = {
Expand Down
Loading