Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add read_sql in the dbc package #3434

Merged
merged 6 commits into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Integrations/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# test-oriented local docker compose file to run redpanda and apicurio for testing
# test-oriented local docker compose file to run redpanda and postgres for testing

version: "3.4"

Expand Down
4 changes: 2 additions & 2 deletions docker/registry/cpp-client-base/gradle.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
io.deephaven.project.ProjectType=DOCKER_REGISTRY
deephaven.registry.imageName=ghcr.io/deephaven/cpp-client-base:latest
deephaven.registry.imageId=ghcr.io/deephaven/cpp-client-base@sha256:5b6e1749437170c0b0a3ebe5755c38fdbe3645b7c409cf8a72bc6a6acf50eebe
deephaven.registry.imageId=ghcr.io/deephaven/cpp-client-base@sha256:be329d3f792ddbf51b0d53a26f7a923a1b6e7990a9fffe8725cf7ba3f3e0da4a
# TODO(deephaven-base-images#54): arm64 native image for cpp-client-base
deephaven.registry.platform=linux/amd64
deephaven.registry.platform=linux/amd64
2 changes: 1 addition & 1 deletion docker/registry/go/gradle.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
io.deephaven.project.ProjectType=DOCKER_REGISTRY
deephaven.registry.imageName=golang:1
deephaven.registry.imageId=golang@sha256:6e835db45c7d88e12b057c0638814c2b266f69143437e4110b8bec5cfc7fa53b
deephaven.registry.imageId=golang@sha256:2edf6aab2d57644f3fe7407132a0d1770846867465a39c2083770cf62734b05d
2 changes: 1 addition & 1 deletion docker/registry/nginx-noroot-base/gradle.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
io.deephaven.project.ProjectType=DOCKER_REGISTRY
deephaven.registry.imageName=ghcr.io/deephaven/nginx-noroot-base:latest
deephaven.registry.imageId=ghcr.io/deephaven/nginx-noroot-base@sha256:e24ada348d0dd23b27a4edfe325677b4c2bd4edc6503f0a0e2b100f8e4164ac4
deephaven.registry.imageId=ghcr.io/deephaven/nginx-noroot-base@sha256:f10668ac0ab53c2a0005497679fa960c8e0dc7247c56ae0dda96a8c3ebcbfb9a
2 changes: 1 addition & 1 deletion docker/registry/node/gradle.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
io.deephaven.project.ProjectType=DOCKER_REGISTRY
deephaven.registry.imageName=node:14
deephaven.registry.imageId=node@sha256:eb709cd9ccbc70f194353d7f4227c52406a9dc6714d798666252d14e344422b2
deephaven.registry.imageId=node@sha256:1b5300317e95ed8bb2a1c25003f57e52400ce7af1e2e1efd9f52407293f88317
4 changes: 2 additions & 2 deletions docker/registry/protoc-base/gradle.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
io.deephaven.project.ProjectType=DOCKER_REGISTRY
deephaven.registry.imageName=ghcr.io/deephaven/protoc-base:latest
deephaven.registry.imageId=ghcr.io/deephaven/protoc-base@sha256:dcbde68539a04be744963d1f3c90c30be1c7f75d291635d2f1efc3ee0c0167ed
deephaven.registry.imageId=ghcr.io/deephaven/protoc-base@sha256:161d15724ae82ca0b4be788b4efc36d655361bd62b6c6b414def36d7f71c6150
# TODO(deephaven-base-images#55): arm64 native image for protoc-base
deephaven.registry.platform=linux/amd64
deephaven.registry.platform=linux/amd64
2 changes: 1 addition & 1 deletion docker/registry/python/gradle.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
io.deephaven.project.ProjectType=DOCKER_REGISTRY
deephaven.registry.imageName=python:3.7
deephaven.registry.imageId=python@sha256:50cb2d7edb60980bbf62438c3eb7fb7f3cd6656d05f30863fd8cc3058e068d79
deephaven.registry.imageId=python@sha256:1eacb4f802a52a73371facb2b69ee12aecf7b4cac69210ef1e41f44da2a02b27
2 changes: 1 addition & 1 deletion docker/registry/server-base/gradle.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
io.deephaven.project.ProjectType=DOCKER_REGISTRY
deephaven.registry.imageName=ghcr.io/deephaven/server-base:edge
deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:be6261a716ea276958f0eec4c29c2ad1d4eaf6a8d90fc6b77fcd071a39db1d74
deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:a9cc78389af95643a55592bd422972879d56cbf7bd2ee91bdd3c676e168747c1
2 changes: 1 addition & 1 deletion docker/registry/slim-base/gradle.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
io.deephaven.project.ProjectType=DOCKER_REGISTRY
deephaven.registry.imageName=ghcr.io/deephaven/server-slim-base:edge
deephaven.registry.imageId=ghcr.io/deephaven/server-slim-base@sha256:80545a6acde04839fbbcee9632b95a2a363b71f4eacf69b6174ee5cb613a43bc
deephaven.registry.imageId=ghcr.io/deephaven/server-slim-base@sha256:7c182292fcd51298e662bbc96d05c679e939cfabf4c8d1365e2eee05615ce950
7 changes: 4 additions & 3 deletions docker/server-jetty/src/main/server-jetty/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
adbc-driver-manager==0.1.0
adbc-driver-postgresql==0.1.0
adbc-driver-manager==0.2.0
adbc-driver-postgresql==0.2.0
connectorx==0.3.1; platform.machine == 'x86_64'
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
deephaven-plugin==0.3.0
java-utilities==0.2.0
jedi==0.18.2
Expand All @@ -13,4 +14,4 @@ pyarrow==11.0.0
python-dateutil==2.8.2
pytz==2022.7.1
six==1.16.0
turbodbc==4.5.6
turbodbc==4.5.10
7 changes: 4 additions & 3 deletions docker/server/src/main/server-netty/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
adbc-driver-manager==0.1.0
adbc-driver-postgresql==0.1.0
adbc-driver-manager==0.2.0
adbc-driver-postgresql==0.2.0
connectorx==0.3.1; platform.machine == 'x86_64'
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
deephaven-plugin==0.3.0
java-utilities==0.2.0
jedi==0.18.2
Expand All @@ -13,4 +14,4 @@ pyarrow==11.0.0
python-dateutil==2.8.2
pytz==2022.7.1
six==1.16.0
turbodbc==4.5.6
turbodbc==4.5.10
1 change: 1 addition & 0 deletions py/server/deephaven/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@
from .table_factory import empty_table, time_table, merge, merge_sorted, new_table, DynamicTableWriter, input_table
from .replay import TableReplayer
from ._gc import garbage_collect
from .dbc import read_sql
84 changes: 83 additions & 1 deletion py/server/deephaven/dbc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,87 @@
#
# Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending
#
"""The dbc package includes the modules and functions for using external databases with Deephaven."""
from typing import Any

import deephaven.arrow as dharrow
from deephaven import DHError
from deephaven.table import Table


def read_sql(conn: Any, query: str, driver: str = "connectorx") -> Table:
"""Executes the provided SQL query via a supported driver and returns a Deephaven table.

Args:
conn (Any): must either be a connection string for the given driver or a Turbodbc/ADBC DBAPI Connection
object; when it is a Connection object, the driver argument will be ignored.
query (str): SQL query statement
driver: (str): the driver to use, supported drivers are "odbc", "adbc", "connectorx", default is "connectorx"

Returns:
a new Table

Raises:
DHError
"""
if isinstance(conn, str):
if driver == "connectorx":
try:
import connectorx as cx
except ImportError:
raise DHError(message="import connectorx failed, please install it first.")

try:
pa_table = cx.read_sql(conn=conn, query=query, return_type="arrow")
return dharrow.to_table(pa_table)
except Exception as e:
raise DHError(e, message="failed to get a Arrow table from ConnectorX.") from e
elif driver == "odbc":
from deephaven.dbc.odbc import read_cursor
import turbodbc
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
with turbodbc.connect(connection_string=conn) as conn:
with conn.cursor() as cursor:
cursor.execute(query)
return read_cursor(cursor)
elif driver == "adbc":
from deephaven.dbc.adbc import read_cursor
if not conn:
import adbc_driver_sqlite.dbapi as dbapi
elif conn.strip().startswith("postgresql:"):
import adbc_driver_postgresql.dbapi as dbapi
else:
raise DHError(message=f"unsupported ADBC connection string {conn}")

with dbapi.connect(conn) as dbconn:
with dbconn.cursor() as cursor:
cursor.execute(query)
return read_cursor(cursor)
else:
raise DHError(message=f"unsupported driver {driver}")
else:
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
try:
import adbc_driver_manager.dbapi as dbapi
if isinstance(conn, dbapi.Connection):
from deephaven.dbc.adbc import read_cursor
with conn.cursor() as cursor:
cursor.execute(query)
return read_cursor(cursor)
except ImportError:
pass

try:
import turbodbc.connection
if isinstance(conn, turbodbc.connection.Connection):
from deephaven.dbc.odbc import read_cursor
with conn.cursor() as cursor:
cursor.execute(query)
return read_cursor(cursor)
except ImportError:
pass

raise DHError(message=f"invalid conn argument {conn}")





"""The dbc package includes the modules for using external databases with Deephaven."""
10 changes: 4 additions & 6 deletions py/server/deephaven/dbc/adbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
from databases that only support ODBC/JDBC. By relying on ADBC, Deephaven is able to ingest data efficiently from a
wide variety of data sources. """

from typing import Any

from deephaven.table import Table
from deephaven import arrow as dharrow
from deephaven import DHError
from deephaven import arrow as dharrow
from deephaven.table import Table

try:
import adbc_driver_manager.dbapi
Expand All @@ -26,8 +24,8 @@ def read_cursor(cursor: adbc_driver_manager.dbapi.Cursor) -> Table:
"""Converts the result set of the provided cursor into a Deephaven table.

Args:
cursor (Any): an ADBC DB-API cursor. Prior to it being passed in, its execute() method must be called to
run a query operation that produces an Arrow table
cursor (adbc_driver_manager.dbapi.Cursor): an ADBC DB-API cursor. Prior to it being passed in, its execute()
method must be called to run a query operation that produces an Arrow table

Returns:
a new Table
Expand Down
6 changes: 2 additions & 4 deletions py/server/deephaven/dbc/odbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
importantly it has optimized, built-in Apache Arrow support when fetching ODBC result sets. This enables Deephaven to
achieve maximum efficiency when ingesting relational data. """

from typing import Any

from deephaven import DHError
from deephaven import arrow as dharrow
from deephaven.table import Table
Expand All @@ -25,8 +23,8 @@ def read_cursor(cursor: turbodbc.cursor.Cursor) -> Table:
"""Converts the result set of the provided cursor into a Deephaven table.

Args:
cursor (Any): a Turbodbc cursor. Prior to it being passed in, its execute() method must be called to run a query
operation that produces a result set
cursor (turbodbc.cursor.Cursor): a Turbodbc cursor. Prior to it being passed in, its execute() method must be
called to run a query operation that produces a result set

Returns:
a new Table
Expand Down
53 changes: 53 additions & 0 deletions py/server/tests/test_dbc.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#
# Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending
#
import platform
import unittest

import turbodbc

from deephaven import DHError, read_sql
from deephaven.dbc import odbc as dhodbc, adbc as dhadbc
from tests.testbase import BaseTestCase

Expand Down Expand Up @@ -32,6 +34,57 @@ def test_read_adbc(self):
# instead of the actual size
self.assertEqual(table.size, 10)

@unittest.skipIf(platform.machine() != "x86_64", reason="connectorx not available on Linux/Arm64"
"https://github.com/sfu-db/connector-x/issues/386")
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
def test_read_sql_connectorx(self):
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
query = "SELECT t_ts, t_id, t_instrument, t_exchange, t_price, t_size FROM CRYPTO_TRADES LIMIT 10"
postgres_url = "postgresql://test:test@postgres:5432/test"
dh_table = read_sql(conn=postgres_url, query=query)
self.assertEqual(len(dh_table.columns), 6)
self.assertEqual(dh_table.size, 10)

with self.assertRaises(DHError) as cm:
dh_table = read_sql(conn="garbage", query=query)

def test_read_sql(self):
query = "SELECT t_ts, t_id, t_instrument, t_exchange, t_price, t_size FROM CRYPTO_TRADES LIMIT 10"

with self.subTest("odbc"):
connection_string = 'Driver={PostgreSQL};Server=postgres;Port=5432;Database=test;Uid=test;Pwd=test;'
dh_table = read_sql(conn=connection_string, query=query, driver="odbc")
self.assertEqual(len(dh_table.columns), 6)
self.assertEqual(dh_table.size, 10)

with self.subTest("adbc"):
uri = "postgresql://postgres:5432/test?user=test&password=test"
dh_table = read_sql(conn=uri, query=query, driver="adbc")
self.assertEqual(len(dh_table.columns), 6)
self.assertEqual(dh_table.size, 10)

with self.subTest("odbc-connection"):
connection_string = 'Driver={PostgreSQL};Server=postgres;Port=5432;Database=test;Uid=test;Pwd=test;'
with turbodbc.connect(connection_string=connection_string) as conn:
dh_table = read_sql(conn=conn, query=query, driver="odbc")
self.assertEqual(len(dh_table.columns), 6)
self.assertEqual(dh_table.size, 10)

with self.subTest("adbc-connection"):
import adbc_driver_postgresql.dbapi
uri = "postgresql://postgres:5432/test?user=test&password=test"
with adbc_driver_postgresql.dbapi.connect(uri) as conn:
dh_table = read_sql(conn=conn, query=query, driver="adbc")
self.assertEqual(len(dh_table.columns), 6)
self.assertEqual(dh_table.size, 10)

with self.assertRaises(DHError) as cm:
dh_table = read_sql(conn=[None], query=query, driver="adbc")

with self.assertRaises(DHError) as cm:
dh_table = read_sql(conn="garbage", query=query, driver="adbc")

with self.assertRaises(Exception) as cm:
dh_table = read_sql(conn="garbage", query=query, driver="odbc")


if __name__ == '__main__':
unittest.main()